""" Async storage abstraction for handling large files efficiently. Extends the existing storage abstraction with async capabilities for better performance with large files. """ import asyncio import aiofiles import os import uuid from pathlib import Path from typing import Optional, AsyncGenerator, Callable, Tuple from app.config import settings from app.utils.logging import get_logger logger = get_logger("async_storage") CHUNK_SIZE = 64 * 1024 # 64KB chunks class AsyncStorageAdapter: """Abstract async storage adapter.""" async def save_bytes_async( self, content: bytes, filename_hint: str, subdir: Optional[str] = None, content_type: Optional[str] = None, progress_callback: Optional[Callable[[int, int], None]] = None ) -> str: raise NotImplementedError async def save_stream_async( self, content_stream: AsyncGenerator[bytes, None], filename_hint: str, subdir: Optional[str] = None, content_type: Optional[str] = None, progress_callback: Optional[Callable[[int, int], None]] = None ) -> str: raise NotImplementedError async def open_bytes_async(self, storage_path: str) -> bytes: raise NotImplementedError async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]: raise NotImplementedError async def delete_async(self, storage_path: str) -> bool: raise NotImplementedError async def exists_async(self, storage_path: str) -> bool: raise NotImplementedError async def get_size_async(self, storage_path: str) -> Optional[int]: raise NotImplementedError def public_url(self, storage_path: str) -> Optional[str]: return None class AsyncLocalStorageAdapter(AsyncStorageAdapter): """Async local storage adapter for handling large files efficiently.""" def __init__(self, base_dir: Optional[str] = None) -> None: self.base_dir = Path(base_dir or settings.upload_dir).resolve() self.base_dir.mkdir(parents=True, exist_ok=True) async def _ensure_dir_async(self, directory: Path) -> None: """Ensure directory exists asynchronously.""" if not directory.exists(): directory.mkdir(parents=True, exist_ok=True) def _generate_unique_filename(self, filename_hint: str, subdir: Optional[str] = None) -> Tuple[Path, str]: """Generate unique filename and return full path and relative path.""" safe_name = filename_hint.replace("/", "_").replace("\\", "_") if not Path(safe_name).suffix: safe_name = f"{safe_name}.bin" unique = uuid.uuid4().hex final_name = f"{unique}_{safe_name}" if subdir: directory = self.base_dir / subdir full_path = directory / final_name relative_path = f"{subdir}/{final_name}" else: directory = self.base_dir full_path = directory / final_name relative_path = final_name return full_path, relative_path async def save_bytes_async( self, content: bytes, filename_hint: str, subdir: Optional[str] = None, content_type: Optional[str] = None, progress_callback: Optional[Callable[[int, int], None]] = None ) -> str: """Save bytes to storage asynchronously.""" full_path, relative_path = self._generate_unique_filename(filename_hint, subdir) # Ensure directory exists await self._ensure_dir_async(full_path.parent) try: async with aiofiles.open(full_path, "wb") as f: if len(content) <= CHUNK_SIZE: # Small file - write directly await f.write(content) if progress_callback: progress_callback(len(content), len(content)) else: # Large file - write in chunks total_size = len(content) written = 0 for i in range(0, len(content), CHUNK_SIZE): chunk = content[i:i + CHUNK_SIZE] await f.write(chunk) written += len(chunk) if progress_callback: progress_callback(written, total_size) # Yield control await asyncio.sleep(0) return relative_path except Exception as e: # Clean up on failure if full_path.exists(): try: full_path.unlink() except: pass logger.error(f"Failed to save file {relative_path}: {str(e)}") raise async def save_stream_async( self, content_stream: AsyncGenerator[bytes, None], filename_hint: str, subdir: Optional[str] = None, content_type: Optional[str] = None, progress_callback: Optional[Callable[[int, int], None]] = None ) -> str: """Save streaming content to storage asynchronously.""" full_path, relative_path = self._generate_unique_filename(filename_hint, subdir) # Ensure directory exists await self._ensure_dir_async(full_path.parent) try: total_written = 0 async with aiofiles.open(full_path, "wb") as f: async for chunk in content_stream: await f.write(chunk) total_written += len(chunk) if progress_callback: progress_callback(total_written, total_written) # Unknown total for streams # Yield control await asyncio.sleep(0) return relative_path except Exception as e: # Clean up on failure if full_path.exists(): try: full_path.unlink() except: pass logger.error(f"Failed to save stream {relative_path}: {str(e)}") raise async def open_bytes_async(self, storage_path: str) -> bytes: """Read entire file as bytes asynchronously.""" full_path = self.base_dir / storage_path if not full_path.exists(): raise FileNotFoundError(f"File not found: {storage_path}") try: async with aiofiles.open(full_path, "rb") as f: return await f.read() except Exception as e: logger.error(f"Failed to read file {storage_path}: {str(e)}") raise async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]: """Stream file content asynchronously.""" full_path = self.base_dir / storage_path if not full_path.exists(): raise FileNotFoundError(f"File not found: {storage_path}") try: async with aiofiles.open(full_path, "rb") as f: while True: chunk = await f.read(CHUNK_SIZE) if not chunk: break yield chunk # Yield control await asyncio.sleep(0) except Exception as e: logger.error(f"Failed to stream file {storage_path}: {str(e)}") raise async def delete_async(self, storage_path: str) -> bool: """Delete file asynchronously.""" full_path = self.base_dir / storage_path try: if full_path.exists(): full_path.unlink() return True return False except Exception as e: logger.error(f"Failed to delete file {storage_path}: {str(e)}") return False async def exists_async(self, storage_path: str) -> bool: """Check if file exists asynchronously.""" full_path = self.base_dir / storage_path return full_path.exists() async def get_size_async(self, storage_path: str) -> Optional[int]: """Get file size asynchronously.""" full_path = self.base_dir / storage_path try: if full_path.exists(): return full_path.stat().st_size return None except Exception as e: logger.error(f"Failed to get size for {storage_path}: {str(e)}") return None def public_url(self, storage_path: str) -> Optional[str]: """Get public URL for file.""" return f"/uploads/{storage_path}".replace("\\", "/") class HybridStorageAdapter: """ Hybrid storage adapter that provides both sync and async interfaces. Uses async operations internally but provides sync compatibility for existing code. """ def __init__(self, base_dir: Optional[str] = None): self.async_adapter = AsyncLocalStorageAdapter(base_dir) self.base_dir = self.async_adapter.base_dir # Sync interface for backward compatibility def save_bytes( self, content: bytes, filename_hint: str, subdir: Optional[str] = None, content_type: Optional[str] = None ) -> str: """Sync wrapper for save_bytes_async.""" return asyncio.run(self.async_adapter.save_bytes_async( content, filename_hint, subdir, content_type )) def open_bytes(self, storage_path: str) -> bytes: """Sync wrapper for open_bytes_async.""" return asyncio.run(self.async_adapter.open_bytes_async(storage_path)) def delete(self, storage_path: str) -> bool: """Sync wrapper for delete_async.""" return asyncio.run(self.async_adapter.delete_async(storage_path)) def exists(self, storage_path: str) -> bool: """Sync wrapper for exists_async.""" return asyncio.run(self.async_adapter.exists_async(storage_path)) def public_url(self, storage_path: str) -> Optional[str]: """Get public URL for file.""" return self.async_adapter.public_url(storage_path) # Async interface async def save_bytes_async( self, content: bytes, filename_hint: str, subdir: Optional[str] = None, content_type: Optional[str] = None, progress_callback: Optional[Callable[[int, int], None]] = None ) -> str: """Save bytes asynchronously.""" return await self.async_adapter.save_bytes_async( content, filename_hint, subdir, content_type, progress_callback ) async def save_stream_async( self, content_stream: AsyncGenerator[bytes, None], filename_hint: str, subdir: Optional[str] = None, content_type: Optional[str] = None, progress_callback: Optional[Callable[[int, int], None]] = None ) -> str: """Save stream asynchronously.""" return await self.async_adapter.save_stream_async( content_stream, filename_hint, subdir, content_type, progress_callback ) async def open_bytes_async(self, storage_path: str) -> bytes: """Read file as bytes asynchronously.""" return await self.async_adapter.open_bytes_async(storage_path) async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]: """Stream file content asynchronously.""" async for chunk in self.async_adapter.open_stream_async(storage_path): yield chunk async def get_size_async(self, storage_path: str) -> Optional[int]: """Get file size asynchronously.""" return await self.async_adapter.get_size_async(storage_path) def get_async_storage() -> AsyncLocalStorageAdapter: """Get async storage adapter instance.""" return AsyncLocalStorageAdapter() def get_hybrid_storage() -> HybridStorageAdapter: """Get hybrid storage adapter with both sync and async interfaces.""" return HybridStorageAdapter() # Global instances async_storage = get_async_storage() hybrid_storage = get_hybrid_storage()