Files
delphi-database/app/services/async_storage.py
HotSwapp bac8cc4bd5 changes
2025-08-18 20:20:04 -05:00

347 lines
12 KiB
Python

"""
Async storage abstraction for handling large files efficiently.
Extends the existing storage abstraction with async capabilities
for better performance with large files.
"""
import asyncio
import aiofiles
import os
import uuid
from pathlib import Path
from typing import Optional, AsyncGenerator, Callable, Tuple
from app.config import settings
from app.utils.logging import get_logger
logger = get_logger("async_storage")
CHUNK_SIZE = 64 * 1024 # 64KB chunks
class AsyncStorageAdapter:
"""Abstract async storage adapter."""
async def save_bytes_async(
self,
content: bytes,
filename_hint: str,
subdir: Optional[str] = None,
content_type: Optional[str] = None,
progress_callback: Optional[Callable[[int, int], None]] = None
) -> str:
raise NotImplementedError
async def save_stream_async(
self,
content_stream: AsyncGenerator[bytes, None],
filename_hint: str,
subdir: Optional[str] = None,
content_type: Optional[str] = None,
progress_callback: Optional[Callable[[int, int], None]] = None
) -> str:
raise NotImplementedError
async def open_bytes_async(self, storage_path: str) -> bytes:
raise NotImplementedError
async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]:
raise NotImplementedError
async def delete_async(self, storage_path: str) -> bool:
raise NotImplementedError
async def exists_async(self, storage_path: str) -> bool:
raise NotImplementedError
async def get_size_async(self, storage_path: str) -> Optional[int]:
raise NotImplementedError
def public_url(self, storage_path: str) -> Optional[str]:
return None
class AsyncLocalStorageAdapter(AsyncStorageAdapter):
"""Async local storage adapter for handling large files efficiently."""
def __init__(self, base_dir: Optional[str] = None) -> None:
self.base_dir = Path(base_dir or settings.upload_dir).resolve()
self.base_dir.mkdir(parents=True, exist_ok=True)
async def _ensure_dir_async(self, directory: Path) -> None:
"""Ensure directory exists asynchronously."""
if not directory.exists():
directory.mkdir(parents=True, exist_ok=True)
def _generate_unique_filename(self, filename_hint: str, subdir: Optional[str] = None) -> Tuple[Path, str]:
"""Generate unique filename and return full path and relative path."""
safe_name = filename_hint.replace("/", "_").replace("\\", "_")
if not Path(safe_name).suffix:
safe_name = f"{safe_name}.bin"
unique = uuid.uuid4().hex
final_name = f"{unique}_{safe_name}"
if subdir:
directory = self.base_dir / subdir
full_path = directory / final_name
relative_path = f"{subdir}/{final_name}"
else:
directory = self.base_dir
full_path = directory / final_name
relative_path = final_name
return full_path, relative_path
async def save_bytes_async(
self,
content: bytes,
filename_hint: str,
subdir: Optional[str] = None,
content_type: Optional[str] = None,
progress_callback: Optional[Callable[[int, int], None]] = None
) -> str:
"""Save bytes to storage asynchronously."""
full_path, relative_path = self._generate_unique_filename(filename_hint, subdir)
# Ensure directory exists
await self._ensure_dir_async(full_path.parent)
try:
async with aiofiles.open(full_path, "wb") as f:
if len(content) <= CHUNK_SIZE:
# Small file - write directly
await f.write(content)
if progress_callback:
progress_callback(len(content), len(content))
else:
# Large file - write in chunks
total_size = len(content)
written = 0
for i in range(0, len(content), CHUNK_SIZE):
chunk = content[i:i + CHUNK_SIZE]
await f.write(chunk)
written += len(chunk)
if progress_callback:
progress_callback(written, total_size)
# Yield control
await asyncio.sleep(0)
return relative_path
except Exception as e:
# Clean up on failure
if full_path.exists():
try:
full_path.unlink()
except:
pass
logger.error(f"Failed to save file {relative_path}: {str(e)}")
raise
async def save_stream_async(
self,
content_stream: AsyncGenerator[bytes, None],
filename_hint: str,
subdir: Optional[str] = None,
content_type: Optional[str] = None,
progress_callback: Optional[Callable[[int, int], None]] = None
) -> str:
"""Save streaming content to storage asynchronously."""
full_path, relative_path = self._generate_unique_filename(filename_hint, subdir)
# Ensure directory exists
await self._ensure_dir_async(full_path.parent)
try:
total_written = 0
async with aiofiles.open(full_path, "wb") as f:
async for chunk in content_stream:
await f.write(chunk)
total_written += len(chunk)
if progress_callback:
progress_callback(total_written, total_written) # Unknown total for streams
# Yield control
await asyncio.sleep(0)
return relative_path
except Exception as e:
# Clean up on failure
if full_path.exists():
try:
full_path.unlink()
except:
pass
logger.error(f"Failed to save stream {relative_path}: {str(e)}")
raise
async def open_bytes_async(self, storage_path: str) -> bytes:
"""Read entire file as bytes asynchronously."""
full_path = self.base_dir / storage_path
if not full_path.exists():
raise FileNotFoundError(f"File not found: {storage_path}")
try:
async with aiofiles.open(full_path, "rb") as f:
return await f.read()
except Exception as e:
logger.error(f"Failed to read file {storage_path}: {str(e)}")
raise
async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]:
"""Stream file content asynchronously."""
full_path = self.base_dir / storage_path
if not full_path.exists():
raise FileNotFoundError(f"File not found: {storage_path}")
try:
async with aiofiles.open(full_path, "rb") as f:
while True:
chunk = await f.read(CHUNK_SIZE)
if not chunk:
break
yield chunk
# Yield control
await asyncio.sleep(0)
except Exception as e:
logger.error(f"Failed to stream file {storage_path}: {str(e)}")
raise
async def delete_async(self, storage_path: str) -> bool:
"""Delete file asynchronously."""
full_path = self.base_dir / storage_path
try:
if full_path.exists():
full_path.unlink()
return True
return False
except Exception as e:
logger.error(f"Failed to delete file {storage_path}: {str(e)}")
return False
async def exists_async(self, storage_path: str) -> bool:
"""Check if file exists asynchronously."""
full_path = self.base_dir / storage_path
return full_path.exists()
async def get_size_async(self, storage_path: str) -> Optional[int]:
"""Get file size asynchronously."""
full_path = self.base_dir / storage_path
try:
if full_path.exists():
return full_path.stat().st_size
return None
except Exception as e:
logger.error(f"Failed to get size for {storage_path}: {str(e)}")
return None
def public_url(self, storage_path: str) -> Optional[str]:
"""Get public URL for file."""
return f"/uploads/{storage_path}".replace("\\", "/")
class HybridStorageAdapter:
"""
Hybrid storage adapter that provides both sync and async interfaces.
Uses async operations internally but provides sync compatibility
for existing code.
"""
def __init__(self, base_dir: Optional[str] = None):
self.async_adapter = AsyncLocalStorageAdapter(base_dir)
self.base_dir = self.async_adapter.base_dir
# Sync interface for backward compatibility
def save_bytes(
self,
content: bytes,
filename_hint: str,
subdir: Optional[str] = None,
content_type: Optional[str] = None
) -> str:
"""Sync wrapper for save_bytes_async."""
return asyncio.run(self.async_adapter.save_bytes_async(
content, filename_hint, subdir, content_type
))
def open_bytes(self, storage_path: str) -> bytes:
"""Sync wrapper for open_bytes_async."""
return asyncio.run(self.async_adapter.open_bytes_async(storage_path))
def delete(self, storage_path: str) -> bool:
"""Sync wrapper for delete_async."""
return asyncio.run(self.async_adapter.delete_async(storage_path))
def exists(self, storage_path: str) -> bool:
"""Sync wrapper for exists_async."""
return asyncio.run(self.async_adapter.exists_async(storage_path))
def public_url(self, storage_path: str) -> Optional[str]:
"""Get public URL for file."""
return self.async_adapter.public_url(storage_path)
# Async interface
async def save_bytes_async(
self,
content: bytes,
filename_hint: str,
subdir: Optional[str] = None,
content_type: Optional[str] = None,
progress_callback: Optional[Callable[[int, int], None]] = None
) -> str:
"""Save bytes asynchronously."""
return await self.async_adapter.save_bytes_async(
content, filename_hint, subdir, content_type, progress_callback
)
async def save_stream_async(
self,
content_stream: AsyncGenerator[bytes, None],
filename_hint: str,
subdir: Optional[str] = None,
content_type: Optional[str] = None,
progress_callback: Optional[Callable[[int, int], None]] = None
) -> str:
"""Save stream asynchronously."""
return await self.async_adapter.save_stream_async(
content_stream, filename_hint, subdir, content_type, progress_callback
)
async def open_bytes_async(self, storage_path: str) -> bytes:
"""Read file as bytes asynchronously."""
return await self.async_adapter.open_bytes_async(storage_path)
async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]:
"""Stream file content asynchronously."""
async for chunk in self.async_adapter.open_stream_async(storage_path):
yield chunk
async def get_size_async(self, storage_path: str) -> Optional[int]:
"""Get file size asynchronously."""
return await self.async_adapter.get_size_async(storage_path)
def get_async_storage() -> AsyncLocalStorageAdapter:
"""Get async storage adapter instance."""
return AsyncLocalStorageAdapter()
def get_hybrid_storage() -> HybridStorageAdapter:
"""Get hybrid storage adapter with both sync and async interfaces."""
return HybridStorageAdapter()
# Global instances
async_storage = get_async_storage()
hybrid_storage = get_hybrid_storage()