""" Adaptive Cache TTL Service Dynamically adjusts cache TTL based on data update frequency and patterns. Provides intelligent caching that adapts to system usage patterns. """ import asyncio import time from typing import Dict, Optional, Tuple, Any, List from datetime import datetime, timedelta from collections import defaultdict, deque from dataclasses import dataclass from sqlalchemy.orm import Session from sqlalchemy import text, func from app.utils.logging import get_logger from app.services.cache import cache_get_json, cache_set_json logger = get_logger("adaptive_cache") @dataclass class UpdateMetrics: """Metrics for tracking data update frequency""" table_name: str updates_per_hour: float last_update: datetime avg_query_frequency: float cache_hit_rate: float @dataclass class CacheConfig: """Cache configuration with adaptive TTL""" base_ttl: int min_ttl: int max_ttl: int update_weight: float = 0.7 # How much update frequency affects TTL query_weight: float = 0.3 # How much query frequency affects TTL class AdaptiveCacheManager: """ Manages adaptive caching with TTL that adjusts based on: - Data update frequency - Query frequency - Cache hit rates - Time of day patterns """ def __init__(self): # Track update frequencies by table self.update_history: Dict[str, deque] = defaultdict(lambda: deque(maxlen=100)) self.query_history: Dict[str, deque] = defaultdict(lambda: deque(maxlen=200)) self.cache_stats: Dict[str, Dict[str, float]] = defaultdict(lambda: { "hits": 0, "misses": 0, "total_queries": 0 }) # Cache configurations for different data types self.cache_configs = { "customers": CacheConfig(base_ttl=300, min_ttl=60, max_ttl=1800), # 5min base, 1min-30min range "files": CacheConfig(base_ttl=240, min_ttl=60, max_ttl=1200), # 4min base, 1min-20min range "ledger": CacheConfig(base_ttl=120, min_ttl=30, max_ttl=600), # 2min base, 30sec-10min range "documents": CacheConfig(base_ttl=600, min_ttl=120, max_ttl=3600), # 10min base, 2min-1hr range "templates": CacheConfig(base_ttl=900, min_ttl=300, max_ttl=7200), # 15min base, 5min-2hr range "global": CacheConfig(base_ttl=180, min_ttl=45, max_ttl=900), # 3min base, 45sec-15min range "advanced": CacheConfig(base_ttl=300, min_ttl=60, max_ttl=1800), # 5min base, 1min-30min range } # Background task for monitoring self._monitoring_task: Optional[asyncio.Task] = None self._last_metrics_update = time.time() async def start_monitoring(self, db: Session): """Start background monitoring of data update patterns""" if self._monitoring_task is None or self._monitoring_task.done(): self._monitoring_task = asyncio.create_task(self._monitor_update_patterns(db)) async def stop_monitoring(self): """Stop background monitoring""" if self._monitoring_task and not self._monitoring_task.done(): self._monitoring_task.cancel() try: await self._monitoring_task except asyncio.CancelledError: pass def record_data_update(self, table_name: str): """Record that data was updated in a table""" now = time.time() self.update_history[table_name].append(now) logger.debug(f"Recorded update for table: {table_name}") def record_query(self, cache_type: str, cache_key: str, hit: bool): """Record a cache query (hit or miss)""" now = time.time() self.query_history[cache_type].append(now) stats = self.cache_stats[cache_type] stats["total_queries"] += 1 if hit: stats["hits"] += 1 else: stats["misses"] += 1 def get_adaptive_ttl(self, cache_type: str, fallback_ttl: int = 300) -> int: """ Calculate adaptive TTL based on update and query patterns Args: cache_type: Type of cache (customers, files, etc.) fallback_ttl: Default TTL if no config found Returns: Adaptive TTL in seconds """ config = self.cache_configs.get(cache_type) if not config: return fallback_ttl # Get recent update frequency (updates per hour) updates_per_hour = self._calculate_update_frequency(cache_type) # Get recent query frequency (queries per minute) queries_per_minute = self._calculate_query_frequency(cache_type) # Get cache hit rate hit_rate = self._calculate_hit_rate(cache_type) # Calculate adaptive TTL ttl = self._calculate_adaptive_ttl( config, updates_per_hour, queries_per_minute, hit_rate ) logger.debug( f"Adaptive TTL for {cache_type}: {ttl}s " f"(updates/hr: {updates_per_hour:.1f}, queries/min: {queries_per_minute:.1f}, hit_rate: {hit_rate:.2f})" ) return ttl def _calculate_update_frequency(self, table_name: str) -> float: """Calculate updates per hour for the last hour""" now = time.time() hour_ago = now - 3600 recent_updates = [ update_time for update_time in self.update_history[table_name] if update_time >= hour_ago ] return len(recent_updates) def _calculate_query_frequency(self, cache_type: str) -> float: """Calculate queries per minute for the last 10 minutes""" now = time.time() ten_minutes_ago = now - 600 recent_queries = [ query_time for query_time in self.query_history[cache_type] if query_time >= ten_minutes_ago ] return len(recent_queries) / 10.0 # per minute def _calculate_hit_rate(self, cache_type: str) -> float: """Calculate cache hit rate""" stats = self.cache_stats[cache_type] total = stats["total_queries"] if total == 0: return 0.5 # Neutral assumption return stats["hits"] / total def _calculate_adaptive_ttl( self, config: CacheConfig, updates_per_hour: float, queries_per_minute: float, hit_rate: float ) -> int: """ Calculate adaptive TTL using multiple factors Logic: - Higher update frequency = lower TTL - Higher query frequency = shorter TTL (fresher data needed) - Higher hit rate = can use longer TTL - Apply time-of-day adjustments """ base_ttl = config.base_ttl # Update frequency factor (0.1 to 2.0) # More updates = shorter TTL if updates_per_hour == 0: update_factor = 1.5 # No recent updates, can cache longer else: # Logarithmic scaling: 1 update/hr = 1.0, 10 updates/hr = 0.5 update_factor = max(0.1, 1.0 / (1 + updates_per_hour * 0.1)) # Query frequency factor (0.5 to 1.5) # More queries = need fresher data if queries_per_minute == 0: query_factor = 1.2 # No queries, can cache longer else: # More queries = shorter TTL, but with diminishing returns query_factor = max(0.5, 1.0 / (1 + queries_per_minute * 0.05)) # Hit rate factor (0.8 to 1.3) # Higher hit rate = working well, can extend TTL slightly hit_rate_factor = 0.8 + (hit_rate * 0.5) # Time-of-day factor time_factor = self._get_time_of_day_factor() # Combine factors adaptive_factor = ( update_factor * config.update_weight + query_factor * config.query_weight + hit_rate_factor * 0.2 + time_factor * 0.1 ) # Apply to base TTL adaptive_ttl = int(base_ttl * adaptive_factor) # Clamp to min/max bounds return max(config.min_ttl, min(config.max_ttl, adaptive_ttl)) def _get_time_of_day_factor(self) -> float: """ Adjust TTL based on time of day Business hours = shorter TTL (more activity) Off hours = longer TTL (less activity) """ now = datetime.now() hour = now.hour # Business hours (8 AM - 6 PM): shorter TTL if 8 <= hour <= 18: return 0.9 # 10% shorter TTL # Evening (6 PM - 10 PM): normal TTL elif 18 < hour <= 22: return 1.0 # Night/early morning: longer TTL else: return 1.3 # 30% longer TTL async def _monitor_update_patterns(self, db: Session): """Background task to monitor database update patterns""" logger.info("Starting adaptive cache monitoring") try: while True: await asyncio.sleep(300) # Check every 5 minutes await self._update_metrics(db) except asyncio.CancelledError: logger.info("Stopping adaptive cache monitoring") raise except Exception as e: logger.error(f"Error in cache monitoring: {str(e)}") async def _update_metrics(self, db: Session): """Update metrics from database statistics""" try: # Query recent update activity from audit logs or timestamp fields now = datetime.now() hour_ago = now - timedelta(hours=1) # Check for recent updates in key tables tables_to_monitor = ['files', 'ledger', 'rolodex', 'documents', 'templates'] for table in tables_to_monitor: try: # Try to get update count from updated_at fields query = text(f""" SELECT COUNT(*) as update_count FROM {table} WHERE updated_at >= :hour_ago """) result = db.execute(query, {"hour_ago": hour_ago}).scalar() if result and result > 0: # Record the updates for _ in range(int(result)): self.record_data_update(table) except Exception as e: # Table might not have updated_at field, skip silently logger.debug(f"Could not check updates for table {table}: {str(e)}") continue # Clean old data self._cleanup_old_data() except Exception as e: logger.error(f"Error updating cache metrics: {str(e)}") def _cleanup_old_data(self): """Clean up old tracking data to prevent memory leaks""" cutoff_time = time.time() - 7200 # Keep last 2 hours for table_history in self.update_history.values(): while table_history and table_history[0] < cutoff_time: table_history.popleft() for query_history in self.query_history.values(): while query_history and query_history[0] < cutoff_time: query_history.popleft() # Reset cache stats periodically if time.time() - self._last_metrics_update > 3600: # Every hour for stats in self.cache_stats.values(): # Decay the stats to prevent them from growing indefinitely stats["hits"] = int(stats["hits"] * 0.8) stats["misses"] = int(stats["misses"] * 0.8) stats["total_queries"] = stats["hits"] + stats["misses"] self._last_metrics_update = time.time() def get_cache_statistics(self) -> Dict[str, Any]: """Get current cache statistics for monitoring""" stats = {} for cache_type, config in self.cache_configs.items(): current_ttl = self.get_adaptive_ttl(cache_type, config.base_ttl) update_freq = self._calculate_update_frequency(cache_type) query_freq = self._calculate_query_frequency(cache_type) hit_rate = self._calculate_hit_rate(cache_type) stats[cache_type] = { "current_ttl": current_ttl, "base_ttl": config.base_ttl, "min_ttl": config.min_ttl, "max_ttl": config.max_ttl, "updates_per_hour": update_freq, "queries_per_minute": query_freq, "hit_rate": hit_rate, "total_queries": self.cache_stats[cache_type]["total_queries"] } return stats # Global instance adaptive_cache_manager = AdaptiveCacheManager() # Enhanced cache functions that use adaptive TTL async def adaptive_cache_get( cache_type: str, cache_key: str, user_id: Optional[str] = None, parts: Optional[Dict] = None ) -> Optional[Any]: """Get from cache and record metrics""" parts = parts or {} try: result = await cache_get_json(cache_type, user_id, parts) adaptive_cache_manager.record_query(cache_type, cache_key, hit=result is not None) return result except Exception as e: logger.error(f"Cache get error: {str(e)}") adaptive_cache_manager.record_query(cache_type, cache_key, hit=False) return None async def adaptive_cache_set( cache_type: str, cache_key: str, value: Any, user_id: Optional[str] = None, parts: Optional[Dict] = None, ttl_override: Optional[int] = None ) -> None: """Set cache with adaptive TTL""" parts = parts or {} # Use adaptive TTL unless overridden ttl = ttl_override or adaptive_cache_manager.get_adaptive_ttl(cache_type) try: await cache_set_json(cache_type, user_id, parts, value, ttl) logger.debug(f"Cached {cache_type} with adaptive TTL: {ttl}s") except Exception as e: logger.error(f"Cache set error: {str(e)}") def record_data_update(table_name: str): """Record that data was updated (call from model save/update operations)""" adaptive_cache_manager.record_data_update(table_name) def get_cache_stats() -> Dict[str, Any]: """Get current cache statistics""" return adaptive_cache_manager.get_cache_statistics()