347 lines
12 KiB
Python
347 lines
12 KiB
Python
"""
|
|
Async storage abstraction for handling large files efficiently.
|
|
|
|
Extends the existing storage abstraction with async capabilities
|
|
for better performance with large files.
|
|
"""
|
|
import asyncio
|
|
import aiofiles
|
|
import os
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Optional, AsyncGenerator, Callable, Tuple
|
|
from app.config import settings
|
|
from app.utils.logging import get_logger
|
|
|
|
logger = get_logger("async_storage")
|
|
|
|
CHUNK_SIZE = 64 * 1024 # 64KB chunks
|
|
|
|
|
|
class AsyncStorageAdapter:
|
|
"""Abstract async storage adapter."""
|
|
|
|
async def save_bytes_async(
|
|
self,
|
|
content: bytes,
|
|
filename_hint: str,
|
|
subdir: Optional[str] = None,
|
|
content_type: Optional[str] = None,
|
|
progress_callback: Optional[Callable[[int, int], None]] = None
|
|
) -> str:
|
|
raise NotImplementedError
|
|
|
|
async def save_stream_async(
|
|
self,
|
|
content_stream: AsyncGenerator[bytes, None],
|
|
filename_hint: str,
|
|
subdir: Optional[str] = None,
|
|
content_type: Optional[str] = None,
|
|
progress_callback: Optional[Callable[[int, int], None]] = None
|
|
) -> str:
|
|
raise NotImplementedError
|
|
|
|
async def open_bytes_async(self, storage_path: str) -> bytes:
|
|
raise NotImplementedError
|
|
|
|
async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]:
|
|
raise NotImplementedError
|
|
|
|
async def delete_async(self, storage_path: str) -> bool:
|
|
raise NotImplementedError
|
|
|
|
async def exists_async(self, storage_path: str) -> bool:
|
|
raise NotImplementedError
|
|
|
|
async def get_size_async(self, storage_path: str) -> Optional[int]:
|
|
raise NotImplementedError
|
|
|
|
def public_url(self, storage_path: str) -> Optional[str]:
|
|
return None
|
|
|
|
|
|
class AsyncLocalStorageAdapter(AsyncStorageAdapter):
|
|
"""Async local storage adapter for handling large files efficiently."""
|
|
|
|
def __init__(self, base_dir: Optional[str] = None) -> None:
|
|
self.base_dir = Path(base_dir or settings.upload_dir).resolve()
|
|
self.base_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
async def _ensure_dir_async(self, directory: Path) -> None:
|
|
"""Ensure directory exists asynchronously."""
|
|
if not directory.exists():
|
|
directory.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _generate_unique_filename(self, filename_hint: str, subdir: Optional[str] = None) -> Tuple[Path, str]:
|
|
"""Generate unique filename and return full path and relative path."""
|
|
safe_name = filename_hint.replace("/", "_").replace("\\", "_")
|
|
if not Path(safe_name).suffix:
|
|
safe_name = f"{safe_name}.bin"
|
|
|
|
unique = uuid.uuid4().hex
|
|
final_name = f"{unique}_{safe_name}"
|
|
|
|
if subdir:
|
|
directory = self.base_dir / subdir
|
|
full_path = directory / final_name
|
|
relative_path = f"{subdir}/{final_name}"
|
|
else:
|
|
directory = self.base_dir
|
|
full_path = directory / final_name
|
|
relative_path = final_name
|
|
|
|
return full_path, relative_path
|
|
|
|
async def save_bytes_async(
|
|
self,
|
|
content: bytes,
|
|
filename_hint: str,
|
|
subdir: Optional[str] = None,
|
|
content_type: Optional[str] = None,
|
|
progress_callback: Optional[Callable[[int, int], None]] = None
|
|
) -> str:
|
|
"""Save bytes to storage asynchronously."""
|
|
full_path, relative_path = self._generate_unique_filename(filename_hint, subdir)
|
|
|
|
# Ensure directory exists
|
|
await self._ensure_dir_async(full_path.parent)
|
|
|
|
try:
|
|
async with aiofiles.open(full_path, "wb") as f:
|
|
if len(content) <= CHUNK_SIZE:
|
|
# Small file - write directly
|
|
await f.write(content)
|
|
if progress_callback:
|
|
progress_callback(len(content), len(content))
|
|
else:
|
|
# Large file - write in chunks
|
|
total_size = len(content)
|
|
written = 0
|
|
|
|
for i in range(0, len(content), CHUNK_SIZE):
|
|
chunk = content[i:i + CHUNK_SIZE]
|
|
await f.write(chunk)
|
|
written += len(chunk)
|
|
|
|
if progress_callback:
|
|
progress_callback(written, total_size)
|
|
|
|
# Yield control
|
|
await asyncio.sleep(0)
|
|
|
|
return relative_path
|
|
|
|
except Exception as e:
|
|
# Clean up on failure
|
|
if full_path.exists():
|
|
try:
|
|
full_path.unlink()
|
|
except:
|
|
pass
|
|
logger.error(f"Failed to save file {relative_path}: {str(e)}")
|
|
raise
|
|
|
|
async def save_stream_async(
|
|
self,
|
|
content_stream: AsyncGenerator[bytes, None],
|
|
filename_hint: str,
|
|
subdir: Optional[str] = None,
|
|
content_type: Optional[str] = None,
|
|
progress_callback: Optional[Callable[[int, int], None]] = None
|
|
) -> str:
|
|
"""Save streaming content to storage asynchronously."""
|
|
full_path, relative_path = self._generate_unique_filename(filename_hint, subdir)
|
|
|
|
# Ensure directory exists
|
|
await self._ensure_dir_async(full_path.parent)
|
|
|
|
try:
|
|
total_written = 0
|
|
async with aiofiles.open(full_path, "wb") as f:
|
|
async for chunk in content_stream:
|
|
await f.write(chunk)
|
|
total_written += len(chunk)
|
|
|
|
if progress_callback:
|
|
progress_callback(total_written, total_written) # Unknown total for streams
|
|
|
|
# Yield control
|
|
await asyncio.sleep(0)
|
|
|
|
return relative_path
|
|
|
|
except Exception as e:
|
|
# Clean up on failure
|
|
if full_path.exists():
|
|
try:
|
|
full_path.unlink()
|
|
except:
|
|
pass
|
|
logger.error(f"Failed to save stream {relative_path}: {str(e)}")
|
|
raise
|
|
|
|
async def open_bytes_async(self, storage_path: str) -> bytes:
|
|
"""Read entire file as bytes asynchronously."""
|
|
full_path = self.base_dir / storage_path
|
|
|
|
if not full_path.exists():
|
|
raise FileNotFoundError(f"File not found: {storage_path}")
|
|
|
|
try:
|
|
async with aiofiles.open(full_path, "rb") as f:
|
|
return await f.read()
|
|
except Exception as e:
|
|
logger.error(f"Failed to read file {storage_path}: {str(e)}")
|
|
raise
|
|
|
|
async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]:
|
|
"""Stream file content asynchronously."""
|
|
full_path = self.base_dir / storage_path
|
|
|
|
if not full_path.exists():
|
|
raise FileNotFoundError(f"File not found: {storage_path}")
|
|
|
|
try:
|
|
async with aiofiles.open(full_path, "rb") as f:
|
|
while True:
|
|
chunk = await f.read(CHUNK_SIZE)
|
|
if not chunk:
|
|
break
|
|
yield chunk
|
|
# Yield control
|
|
await asyncio.sleep(0)
|
|
except Exception as e:
|
|
logger.error(f"Failed to stream file {storage_path}: {str(e)}")
|
|
raise
|
|
|
|
async def delete_async(self, storage_path: str) -> bool:
|
|
"""Delete file asynchronously."""
|
|
full_path = self.base_dir / storage_path
|
|
|
|
try:
|
|
if full_path.exists():
|
|
full_path.unlink()
|
|
return True
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Failed to delete file {storage_path}: {str(e)}")
|
|
return False
|
|
|
|
async def exists_async(self, storage_path: str) -> bool:
|
|
"""Check if file exists asynchronously."""
|
|
full_path = self.base_dir / storage_path
|
|
return full_path.exists()
|
|
|
|
async def get_size_async(self, storage_path: str) -> Optional[int]:
|
|
"""Get file size asynchronously."""
|
|
full_path = self.base_dir / storage_path
|
|
|
|
try:
|
|
if full_path.exists():
|
|
return full_path.stat().st_size
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Failed to get size for {storage_path}: {str(e)}")
|
|
return None
|
|
|
|
def public_url(self, storage_path: str) -> Optional[str]:
|
|
"""Get public URL for file."""
|
|
return f"/uploads/{storage_path}".replace("\\", "/")
|
|
|
|
|
|
class HybridStorageAdapter:
|
|
"""
|
|
Hybrid storage adapter that provides both sync and async interfaces.
|
|
|
|
Uses async operations internally but provides sync compatibility
|
|
for existing code.
|
|
"""
|
|
|
|
def __init__(self, base_dir: Optional[str] = None):
|
|
self.async_adapter = AsyncLocalStorageAdapter(base_dir)
|
|
self.base_dir = self.async_adapter.base_dir
|
|
|
|
# Sync interface for backward compatibility
|
|
def save_bytes(
|
|
self,
|
|
content: bytes,
|
|
filename_hint: str,
|
|
subdir: Optional[str] = None,
|
|
content_type: Optional[str] = None
|
|
) -> str:
|
|
"""Sync wrapper for save_bytes_async."""
|
|
return asyncio.run(self.async_adapter.save_bytes_async(
|
|
content, filename_hint, subdir, content_type
|
|
))
|
|
|
|
def open_bytes(self, storage_path: str) -> bytes:
|
|
"""Sync wrapper for open_bytes_async."""
|
|
return asyncio.run(self.async_adapter.open_bytes_async(storage_path))
|
|
|
|
def delete(self, storage_path: str) -> bool:
|
|
"""Sync wrapper for delete_async."""
|
|
return asyncio.run(self.async_adapter.delete_async(storage_path))
|
|
|
|
def exists(self, storage_path: str) -> bool:
|
|
"""Sync wrapper for exists_async."""
|
|
return asyncio.run(self.async_adapter.exists_async(storage_path))
|
|
|
|
def public_url(self, storage_path: str) -> Optional[str]:
|
|
"""Get public URL for file."""
|
|
return self.async_adapter.public_url(storage_path)
|
|
|
|
# Async interface
|
|
async def save_bytes_async(
|
|
self,
|
|
content: bytes,
|
|
filename_hint: str,
|
|
subdir: Optional[str] = None,
|
|
content_type: Optional[str] = None,
|
|
progress_callback: Optional[Callable[[int, int], None]] = None
|
|
) -> str:
|
|
"""Save bytes asynchronously."""
|
|
return await self.async_adapter.save_bytes_async(
|
|
content, filename_hint, subdir, content_type, progress_callback
|
|
)
|
|
|
|
async def save_stream_async(
|
|
self,
|
|
content_stream: AsyncGenerator[bytes, None],
|
|
filename_hint: str,
|
|
subdir: Optional[str] = None,
|
|
content_type: Optional[str] = None,
|
|
progress_callback: Optional[Callable[[int, int], None]] = None
|
|
) -> str:
|
|
"""Save stream asynchronously."""
|
|
return await self.async_adapter.save_stream_async(
|
|
content_stream, filename_hint, subdir, content_type, progress_callback
|
|
)
|
|
|
|
async def open_bytes_async(self, storage_path: str) -> bytes:
|
|
"""Read file as bytes asynchronously."""
|
|
return await self.async_adapter.open_bytes_async(storage_path)
|
|
|
|
async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]:
|
|
"""Stream file content asynchronously."""
|
|
async for chunk in self.async_adapter.open_stream_async(storage_path):
|
|
yield chunk
|
|
|
|
async def get_size_async(self, storage_path: str) -> Optional[int]:
|
|
"""Get file size asynchronously."""
|
|
return await self.async_adapter.get_size_async(storage_path)
|
|
|
|
|
|
def get_async_storage() -> AsyncLocalStorageAdapter:
|
|
"""Get async storage adapter instance."""
|
|
return AsyncLocalStorageAdapter()
|
|
|
|
|
|
def get_hybrid_storage() -> HybridStorageAdapter:
|
|
"""Get hybrid storage adapter with both sync and async interfaces."""
|
|
return HybridStorageAdapter()
|
|
|
|
|
|
# Global instances
|
|
async_storage = get_async_storage()
|
|
hybrid_storage = get_hybrid_storage()
|