changes
This commit is contained in:
346
app/services/async_storage.py
Normal file
346
app/services/async_storage.py
Normal file
@@ -0,0 +1,346 @@
|
||||
"""
|
||||
Async storage abstraction for handling large files efficiently.
|
||||
|
||||
Extends the existing storage abstraction with async capabilities
|
||||
for better performance with large files.
|
||||
"""
|
||||
import asyncio
|
||||
import aiofiles
|
||||
import os
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Optional, AsyncGenerator, Callable, Tuple
|
||||
from app.config import settings
|
||||
from app.utils.logging import get_logger
|
||||
|
||||
logger = get_logger("async_storage")
|
||||
|
||||
CHUNK_SIZE = 64 * 1024 # 64KB chunks
|
||||
|
||||
|
||||
class AsyncStorageAdapter:
|
||||
"""Abstract async storage adapter."""
|
||||
|
||||
async def save_bytes_async(
|
||||
self,
|
||||
content: bytes,
|
||||
filename_hint: str,
|
||||
subdir: Optional[str] = None,
|
||||
content_type: Optional[str] = None,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None
|
||||
) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
async def save_stream_async(
|
||||
self,
|
||||
content_stream: AsyncGenerator[bytes, None],
|
||||
filename_hint: str,
|
||||
subdir: Optional[str] = None,
|
||||
content_type: Optional[str] = None,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None
|
||||
) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
async def open_bytes_async(self, storage_path: str) -> bytes:
|
||||
raise NotImplementedError
|
||||
|
||||
async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def delete_async(self, storage_path: str) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
async def exists_async(self, storage_path: str) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
async def get_size_async(self, storage_path: str) -> Optional[int]:
|
||||
raise NotImplementedError
|
||||
|
||||
def public_url(self, storage_path: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
class AsyncLocalStorageAdapter(AsyncStorageAdapter):
|
||||
"""Async local storage adapter for handling large files efficiently."""
|
||||
|
||||
def __init__(self, base_dir: Optional[str] = None) -> None:
|
||||
self.base_dir = Path(base_dir or settings.upload_dir).resolve()
|
||||
self.base_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
async def _ensure_dir_async(self, directory: Path) -> None:
|
||||
"""Ensure directory exists asynchronously."""
|
||||
if not directory.exists():
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _generate_unique_filename(self, filename_hint: str, subdir: Optional[str] = None) -> Tuple[Path, str]:
|
||||
"""Generate unique filename and return full path and relative path."""
|
||||
safe_name = filename_hint.replace("/", "_").replace("\\", "_")
|
||||
if not Path(safe_name).suffix:
|
||||
safe_name = f"{safe_name}.bin"
|
||||
|
||||
unique = uuid.uuid4().hex
|
||||
final_name = f"{unique}_{safe_name}"
|
||||
|
||||
if subdir:
|
||||
directory = self.base_dir / subdir
|
||||
full_path = directory / final_name
|
||||
relative_path = f"{subdir}/{final_name}"
|
||||
else:
|
||||
directory = self.base_dir
|
||||
full_path = directory / final_name
|
||||
relative_path = final_name
|
||||
|
||||
return full_path, relative_path
|
||||
|
||||
async def save_bytes_async(
|
||||
self,
|
||||
content: bytes,
|
||||
filename_hint: str,
|
||||
subdir: Optional[str] = None,
|
||||
content_type: Optional[str] = None,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None
|
||||
) -> str:
|
||||
"""Save bytes to storage asynchronously."""
|
||||
full_path, relative_path = self._generate_unique_filename(filename_hint, subdir)
|
||||
|
||||
# Ensure directory exists
|
||||
await self._ensure_dir_async(full_path.parent)
|
||||
|
||||
try:
|
||||
async with aiofiles.open(full_path, "wb") as f:
|
||||
if len(content) <= CHUNK_SIZE:
|
||||
# Small file - write directly
|
||||
await f.write(content)
|
||||
if progress_callback:
|
||||
progress_callback(len(content), len(content))
|
||||
else:
|
||||
# Large file - write in chunks
|
||||
total_size = len(content)
|
||||
written = 0
|
||||
|
||||
for i in range(0, len(content), CHUNK_SIZE):
|
||||
chunk = content[i:i + CHUNK_SIZE]
|
||||
await f.write(chunk)
|
||||
written += len(chunk)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(written, total_size)
|
||||
|
||||
# Yield control
|
||||
await asyncio.sleep(0)
|
||||
|
||||
return relative_path
|
||||
|
||||
except Exception as e:
|
||||
# Clean up on failure
|
||||
if full_path.exists():
|
||||
try:
|
||||
full_path.unlink()
|
||||
except:
|
||||
pass
|
||||
logger.error(f"Failed to save file {relative_path}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def save_stream_async(
|
||||
self,
|
||||
content_stream: AsyncGenerator[bytes, None],
|
||||
filename_hint: str,
|
||||
subdir: Optional[str] = None,
|
||||
content_type: Optional[str] = None,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None
|
||||
) -> str:
|
||||
"""Save streaming content to storage asynchronously."""
|
||||
full_path, relative_path = self._generate_unique_filename(filename_hint, subdir)
|
||||
|
||||
# Ensure directory exists
|
||||
await self._ensure_dir_async(full_path.parent)
|
||||
|
||||
try:
|
||||
total_written = 0
|
||||
async with aiofiles.open(full_path, "wb") as f:
|
||||
async for chunk in content_stream:
|
||||
await f.write(chunk)
|
||||
total_written += len(chunk)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(total_written, total_written) # Unknown total for streams
|
||||
|
||||
# Yield control
|
||||
await asyncio.sleep(0)
|
||||
|
||||
return relative_path
|
||||
|
||||
except Exception as e:
|
||||
# Clean up on failure
|
||||
if full_path.exists():
|
||||
try:
|
||||
full_path.unlink()
|
||||
except:
|
||||
pass
|
||||
logger.error(f"Failed to save stream {relative_path}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def open_bytes_async(self, storage_path: str) -> bytes:
|
||||
"""Read entire file as bytes asynchronously."""
|
||||
full_path = self.base_dir / storage_path
|
||||
|
||||
if not full_path.exists():
|
||||
raise FileNotFoundError(f"File not found: {storage_path}")
|
||||
|
||||
try:
|
||||
async with aiofiles.open(full_path, "rb") as f:
|
||||
return await f.read()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read file {storage_path}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]:
|
||||
"""Stream file content asynchronously."""
|
||||
full_path = self.base_dir / storage_path
|
||||
|
||||
if not full_path.exists():
|
||||
raise FileNotFoundError(f"File not found: {storage_path}")
|
||||
|
||||
try:
|
||||
async with aiofiles.open(full_path, "rb") as f:
|
||||
while True:
|
||||
chunk = await f.read(CHUNK_SIZE)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
# Yield control
|
||||
await asyncio.sleep(0)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to stream file {storage_path}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def delete_async(self, storage_path: str) -> bool:
|
||||
"""Delete file asynchronously."""
|
||||
full_path = self.base_dir / storage_path
|
||||
|
||||
try:
|
||||
if full_path.exists():
|
||||
full_path.unlink()
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete file {storage_path}: {str(e)}")
|
||||
return False
|
||||
|
||||
async def exists_async(self, storage_path: str) -> bool:
|
||||
"""Check if file exists asynchronously."""
|
||||
full_path = self.base_dir / storage_path
|
||||
return full_path.exists()
|
||||
|
||||
async def get_size_async(self, storage_path: str) -> Optional[int]:
|
||||
"""Get file size asynchronously."""
|
||||
full_path = self.base_dir / storage_path
|
||||
|
||||
try:
|
||||
if full_path.exists():
|
||||
return full_path.stat().st_size
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get size for {storage_path}: {str(e)}")
|
||||
return None
|
||||
|
||||
def public_url(self, storage_path: str) -> Optional[str]:
|
||||
"""Get public URL for file."""
|
||||
return f"/uploads/{storage_path}".replace("\\", "/")
|
||||
|
||||
|
||||
class HybridStorageAdapter:
|
||||
"""
|
||||
Hybrid storage adapter that provides both sync and async interfaces.
|
||||
|
||||
Uses async operations internally but provides sync compatibility
|
||||
for existing code.
|
||||
"""
|
||||
|
||||
def __init__(self, base_dir: Optional[str] = None):
|
||||
self.async_adapter = AsyncLocalStorageAdapter(base_dir)
|
||||
self.base_dir = self.async_adapter.base_dir
|
||||
|
||||
# Sync interface for backward compatibility
|
||||
def save_bytes(
|
||||
self,
|
||||
content: bytes,
|
||||
filename_hint: str,
|
||||
subdir: Optional[str] = None,
|
||||
content_type: Optional[str] = None
|
||||
) -> str:
|
||||
"""Sync wrapper for save_bytes_async."""
|
||||
return asyncio.run(self.async_adapter.save_bytes_async(
|
||||
content, filename_hint, subdir, content_type
|
||||
))
|
||||
|
||||
def open_bytes(self, storage_path: str) -> bytes:
|
||||
"""Sync wrapper for open_bytes_async."""
|
||||
return asyncio.run(self.async_adapter.open_bytes_async(storage_path))
|
||||
|
||||
def delete(self, storage_path: str) -> bool:
|
||||
"""Sync wrapper for delete_async."""
|
||||
return asyncio.run(self.async_adapter.delete_async(storage_path))
|
||||
|
||||
def exists(self, storage_path: str) -> bool:
|
||||
"""Sync wrapper for exists_async."""
|
||||
return asyncio.run(self.async_adapter.exists_async(storage_path))
|
||||
|
||||
def public_url(self, storage_path: str) -> Optional[str]:
|
||||
"""Get public URL for file."""
|
||||
return self.async_adapter.public_url(storage_path)
|
||||
|
||||
# Async interface
|
||||
async def save_bytes_async(
|
||||
self,
|
||||
content: bytes,
|
||||
filename_hint: str,
|
||||
subdir: Optional[str] = None,
|
||||
content_type: Optional[str] = None,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None
|
||||
) -> str:
|
||||
"""Save bytes asynchronously."""
|
||||
return await self.async_adapter.save_bytes_async(
|
||||
content, filename_hint, subdir, content_type, progress_callback
|
||||
)
|
||||
|
||||
async def save_stream_async(
|
||||
self,
|
||||
content_stream: AsyncGenerator[bytes, None],
|
||||
filename_hint: str,
|
||||
subdir: Optional[str] = None,
|
||||
content_type: Optional[str] = None,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None
|
||||
) -> str:
|
||||
"""Save stream asynchronously."""
|
||||
return await self.async_adapter.save_stream_async(
|
||||
content_stream, filename_hint, subdir, content_type, progress_callback
|
||||
)
|
||||
|
||||
async def open_bytes_async(self, storage_path: str) -> bytes:
|
||||
"""Read file as bytes asynchronously."""
|
||||
return await self.async_adapter.open_bytes_async(storage_path)
|
||||
|
||||
async def open_stream_async(self, storage_path: str) -> AsyncGenerator[bytes, None]:
|
||||
"""Stream file content asynchronously."""
|
||||
async for chunk in self.async_adapter.open_stream_async(storage_path):
|
||||
yield chunk
|
||||
|
||||
async def get_size_async(self, storage_path: str) -> Optional[int]:
|
||||
"""Get file size asynchronously."""
|
||||
return await self.async_adapter.get_size_async(storage_path)
|
||||
|
||||
|
||||
def get_async_storage() -> AsyncLocalStorageAdapter:
|
||||
"""Get async storage adapter instance."""
|
||||
return AsyncLocalStorageAdapter()
|
||||
|
||||
|
||||
def get_hybrid_storage() -> HybridStorageAdapter:
|
||||
"""Get hybrid storage adapter with both sync and async interfaces."""
|
||||
return HybridStorageAdapter()
|
||||
|
||||
|
||||
# Global instances
|
||||
async_storage = get_async_storage()
|
||||
hybrid_storage = get_hybrid_storage()
|
||||
Reference in New Issue
Block a user