changes

2025-08-18 20:20:04 -05:00
parent 89b2bc0aa2
commit bac8cc4bd5
114 changed files with 30258 additions and 1341 deletions
--- a/app/services/adaptive_cache.py
+++ b/app/services/adaptive_cache.py
@@ -0,0 +1,399 @@
+"""
+Adaptive Cache TTL Service
+
+Dynamically adjusts cache TTL based on data update frequency and patterns.
+Provides intelligent caching that adapts to system usage patterns.
+"""
+import asyncio
+import time
+from typing import Dict, Optional, Tuple, Any, List
+from datetime import datetime, timedelta
+from collections import defaultdict, deque
+from dataclasses import dataclass
+from sqlalchemy.orm import Session
+from sqlalchemy import text, func
+
+from app.utils.logging import get_logger
+from app.services.cache import cache_get_json, cache_set_json
+
+logger = get_logger("adaptive_cache")
+
+
+@dataclass
+class UpdateMetrics:
+    """Metrics for tracking data update frequency"""
+    table_name: str
+    updates_per_hour: float
+    last_update: datetime
+    avg_query_frequency: float
+    cache_hit_rate: float
+    
+    
+@dataclass
+class CacheConfig:
+    """Cache configuration with adaptive TTL"""
+    base_ttl: int
+    min_ttl: int
+    max_ttl: int
+    update_weight: float = 0.7  # How much update frequency affects TTL
+    query_weight: float = 0.3   # How much query frequency affects TTL
+
+
+class AdaptiveCacheManager:
+    """
+    Manages adaptive caching with TTL that adjusts based on:
+    - Data update frequency
+    - Query frequency 
+    - Cache hit rates
+    - Time of day patterns
+    """
+    
+    def __init__(self):
+        # Track update frequencies by table
+        self.update_history: Dict[str, deque] = defaultdict(lambda: deque(maxlen=100))
+        self.query_history: Dict[str, deque] = defaultdict(lambda: deque(maxlen=200))
+        self.cache_stats: Dict[str, Dict[str, float]] = defaultdict(lambda: {
+            "hits": 0, "misses": 0, "total_queries": 0
+        })
+        
+        # Cache configurations for different data types
+        self.cache_configs = {
+            "customers": CacheConfig(base_ttl=300, min_ttl=60, max_ttl=1800),  # 5min base, 1min-30min range
+            "files": CacheConfig(base_ttl=240, min_ttl=60, max_ttl=1200),      # 4min base, 1min-20min range
+            "ledger": CacheConfig(base_ttl=120, min_ttl=30, max_ttl=600),      # 2min base, 30sec-10min range
+            "documents": CacheConfig(base_ttl=600, min_ttl=120, max_ttl=3600), # 10min base, 2min-1hr range
+            "templates": CacheConfig(base_ttl=900, min_ttl=300, max_ttl=7200), # 15min base, 5min-2hr range
+            "global": CacheConfig(base_ttl=180, min_ttl=45, max_ttl=900),      # 3min base, 45sec-15min range
+            "advanced": CacheConfig(base_ttl=300, min_ttl=60, max_ttl=1800),   # 5min base, 1min-30min range
+        }
+        
+        # Background task for monitoring
+        self._monitoring_task: Optional[asyncio.Task] = None
+        self._last_metrics_update = time.time()
+        
+    async def start_monitoring(self, db: Session):
+        """Start background monitoring of data update patterns"""
+        if self._monitoring_task is None or self._monitoring_task.done():
+            self._monitoring_task = asyncio.create_task(self._monitor_update_patterns(db))
+    
+    async def stop_monitoring(self):
+        """Stop background monitoring"""
+        if self._monitoring_task and not self._monitoring_task.done():
+            self._monitoring_task.cancel()
+            try:
+                await self._monitoring_task
+            except asyncio.CancelledError:
+                pass
+    
+    def record_data_update(self, table_name: str):
+        """Record that data was updated in a table"""
+        now = time.time()
+        self.update_history[table_name].append(now)
+        logger.debug(f"Recorded update for table: {table_name}")
+    
+    def record_query(self, cache_type: str, cache_key: str, hit: bool):
+        """Record a cache query (hit or miss)"""
+        now = time.time()
+        self.query_history[cache_type].append(now)
+        
+        stats = self.cache_stats[cache_type]
+        stats["total_queries"] += 1
+        if hit:
+            stats["hits"] += 1
+        else:
+            stats["misses"] += 1
+    
+    def get_adaptive_ttl(self, cache_type: str, fallback_ttl: int = 300) -> int:
+        """
+        Calculate adaptive TTL based on update and query patterns
+        
+        Args:
+            cache_type: Type of cache (customers, files, etc.)
+            fallback_ttl: Default TTL if no config found
+            
+        Returns:
+            Adaptive TTL in seconds
+        """
+        config = self.cache_configs.get(cache_type)
+        if not config:
+            return fallback_ttl
+        
+        # Get recent update frequency (updates per hour)
+        updates_per_hour = self._calculate_update_frequency(cache_type)
+        
+        # Get recent query frequency (queries per minute)
+        queries_per_minute = self._calculate_query_frequency(cache_type)
+        
+        # Get cache hit rate
+        hit_rate = self._calculate_hit_rate(cache_type)
+        
+        # Calculate adaptive TTL
+        ttl = self._calculate_adaptive_ttl(
+            config, updates_per_hour, queries_per_minute, hit_rate
+        )
+        
+        logger.debug(
+            f"Adaptive TTL for {cache_type}: {ttl}s "
+            f"(updates/hr: {updates_per_hour:.1f}, queries/min: {queries_per_minute:.1f}, hit_rate: {hit_rate:.2f})"
+        )
+        
+        return ttl
+    
+    def _calculate_update_frequency(self, table_name: str) -> float:
+        """Calculate updates per hour for the last hour"""
+        now = time.time()
+        hour_ago = now - 3600
+        
+        recent_updates = [
+            update_time for update_time in self.update_history[table_name]
+            if update_time >= hour_ago
+        ]
+        
+        return len(recent_updates)
+    
+    def _calculate_query_frequency(self, cache_type: str) -> float:
+        """Calculate queries per minute for the last 10 minutes"""
+        now = time.time()
+        ten_minutes_ago = now - 600
+        
+        recent_queries = [
+            query_time for query_time in self.query_history[cache_type]
+            if query_time >= ten_minutes_ago
+        ]
+        
+        return len(recent_queries) / 10.0  # per minute
+    
+    def _calculate_hit_rate(self, cache_type: str) -> float:
+        """Calculate cache hit rate"""
+        stats = self.cache_stats[cache_type]
+        total = stats["total_queries"]
+        
+        if total == 0:
+            return 0.5  # Neutral assumption
+        
+        return stats["hits"] / total
+    
+    def _calculate_adaptive_ttl(
+        self, 
+        config: CacheConfig, 
+        updates_per_hour: float, 
+        queries_per_minute: float, 
+        hit_rate: float
+    ) -> int:
+        """
+        Calculate adaptive TTL using multiple factors
+        
+        Logic:
+        - Higher update frequency = lower TTL
+        - Higher query frequency = shorter TTL (fresher data needed)
+        - Higher hit rate = can use longer TTL
+        - Apply time-of-day adjustments
+        """
+        base_ttl = config.base_ttl
+        
+        # Update frequency factor (0.1 to 2.0)
+        # More updates = shorter TTL
+        if updates_per_hour == 0:
+            update_factor = 1.5  # No recent updates, can cache longer
+        else:
+            # Logarithmic scaling: 1 update/hr = 1.0, 10 updates/hr = 0.5
+            update_factor = max(0.1, 1.0 / (1 + updates_per_hour * 0.1))
+        
+        # Query frequency factor (0.5 to 1.5)
+        # More queries = need fresher data
+        if queries_per_minute == 0:
+            query_factor = 1.2  # No queries, can cache longer
+        else:
+            # More queries = shorter TTL, but with diminishing returns
+            query_factor = max(0.5, 1.0 / (1 + queries_per_minute * 0.05))
+        
+        # Hit rate factor (0.8 to 1.3)
+        # Higher hit rate = working well, can extend TTL slightly
+        hit_rate_factor = 0.8 + (hit_rate * 0.5)
+        
+        # Time-of-day factor
+        time_factor = self._get_time_of_day_factor()
+        
+        # Combine factors
+        adaptive_factor = (
+            update_factor * config.update_weight +
+            query_factor * config.query_weight +
+            hit_rate_factor * 0.2 +
+            time_factor * 0.1
+        )
+        
+        # Apply to base TTL
+        adaptive_ttl = int(base_ttl * adaptive_factor)
+        
+        # Clamp to min/max bounds
+        return max(config.min_ttl, min(config.max_ttl, adaptive_ttl))
+    
+    def _get_time_of_day_factor(self) -> float:
+        """
+        Adjust TTL based on time of day
+        Business hours = shorter TTL (more activity)
+        Off hours = longer TTL (less activity)
+        """
+        now = datetime.now()
+        hour = now.hour
+        
+        # Business hours (8 AM - 6 PM): shorter TTL
+        if 8 <= hour <= 18:
+            return 0.9  # 10% shorter TTL
+        # Evening (6 PM - 10 PM): normal TTL
+        elif 18 < hour <= 22:
+            return 1.0
+        # Night/early morning: longer TTL
+        else:
+            return 1.3  # 30% longer TTL
+    
+    async def _monitor_update_patterns(self, db: Session):
+        """Background task to monitor database update patterns"""
+        logger.info("Starting adaptive cache monitoring")
+        
+        try:
+            while True:
+                await asyncio.sleep(300)  # Check every 5 minutes
+                await self._update_metrics(db)
+        except asyncio.CancelledError:
+            logger.info("Stopping adaptive cache monitoring")
+            raise
+        except Exception as e:
+            logger.error(f"Error in cache monitoring: {str(e)}")
+    
+    async def _update_metrics(self, db: Session):
+        """Update metrics from database statistics"""
+        try:
+            # Query recent update activity from audit logs or timestamp fields
+            now = datetime.now()
+            hour_ago = now - timedelta(hours=1)
+            
+            # Check for recent updates in key tables
+            tables_to_monitor = ['files', 'ledger', 'rolodex', 'documents', 'templates']
+            
+            for table in tables_to_monitor:
+                try:
+                    # Try to get update count from updated_at fields
+                    query = text(f"""
+                        SELECT COUNT(*) as update_count 
+                        FROM {table} 
+                        WHERE updated_at >= :hour_ago
+                    """)
+                    
+                    result = db.execute(query, {"hour_ago": hour_ago}).scalar()
+                    
+                    if result and result > 0:
+                        # Record the updates
+                        for _ in range(int(result)):
+                            self.record_data_update(table)
+                            
+                except Exception as e:
+                    # Table might not have updated_at field, skip silently
+                    logger.debug(f"Could not check updates for table {table}: {str(e)}")
+                    continue
+            
+            # Clean old data
+            self._cleanup_old_data()
+            
+        except Exception as e:
+            logger.error(f"Error updating cache metrics: {str(e)}")
+    
+    def _cleanup_old_data(self):
+        """Clean up old tracking data to prevent memory leaks"""
+        cutoff_time = time.time() - 7200  # Keep last 2 hours
+        
+        for table_history in self.update_history.values():
+            while table_history and table_history[0] < cutoff_time:
+                table_history.popleft()
+        
+        for query_history in self.query_history.values():
+            while query_history and query_history[0] < cutoff_time:
+                query_history.popleft()
+        
+        # Reset cache stats periodically
+        if time.time() - self._last_metrics_update > 3600:  # Every hour
+            for stats in self.cache_stats.values():
+                # Decay the stats to prevent them from growing indefinitely
+                stats["hits"] = int(stats["hits"] * 0.8)
+                stats["misses"] = int(stats["misses"] * 0.8)
+                stats["total_queries"] = stats["hits"] + stats["misses"]
+            
+            self._last_metrics_update = time.time()
+    
+    def get_cache_statistics(self) -> Dict[str, Any]:
+        """Get current cache statistics for monitoring"""
+        stats = {}
+        
+        for cache_type, config in self.cache_configs.items():
+            current_ttl = self.get_adaptive_ttl(cache_type, config.base_ttl)
+            update_freq = self._calculate_update_frequency(cache_type)
+            query_freq = self._calculate_query_frequency(cache_type)
+            hit_rate = self._calculate_hit_rate(cache_type)
+            
+            stats[cache_type] = {
+                "current_ttl": current_ttl,
+                "base_ttl": config.base_ttl,
+                "min_ttl": config.min_ttl,
+                "max_ttl": config.max_ttl,
+                "updates_per_hour": update_freq,
+                "queries_per_minute": query_freq,
+                "hit_rate": hit_rate,
+                "total_queries": self.cache_stats[cache_type]["total_queries"]
+            }
+        
+        return stats
+
+
+# Global instance
+adaptive_cache_manager = AdaptiveCacheManager()
+
+
+# Enhanced cache functions that use adaptive TTL
+async def adaptive_cache_get(
+    cache_type: str, 
+    cache_key: str, 
+    user_id: Optional[str] = None, 
+    parts: Optional[Dict] = None
+) -> Optional[Any]:
+    """Get from cache and record metrics"""
+    parts = parts or {}
+    
+    try:
+        result = await cache_get_json(cache_type, user_id, parts)
+        adaptive_cache_manager.record_query(cache_type, cache_key, hit=result is not None)
+        return result
+    except Exception as e:
+        logger.error(f"Cache get error: {str(e)}")
+        adaptive_cache_manager.record_query(cache_type, cache_key, hit=False)
+        return None
+
+
+async def adaptive_cache_set(
+    cache_type: str,
+    cache_key: str,
+    value: Any,
+    user_id: Optional[str] = None,
+    parts: Optional[Dict] = None,
+    ttl_override: Optional[int] = None
+) -> None:
+    """Set cache with adaptive TTL"""
+    parts = parts or {}
+    
+    # Use adaptive TTL unless overridden
+    ttl = ttl_override or adaptive_cache_manager.get_adaptive_ttl(cache_type)
+    
+    try:
+        await cache_set_json(cache_type, user_id, parts, value, ttl)
+        logger.debug(f"Cached {cache_type} with adaptive TTL: {ttl}s")
+    except Exception as e:
+        logger.error(f"Cache set error: {str(e)}")
+
+
+def record_data_update(table_name: str):
+    """Record that data was updated (call from model save/update operations)"""
+    adaptive_cache_manager.record_data_update(table_name)
+
+
+def get_cache_stats() -> Dict[str, Any]:
+    """Get current cache statistics"""
+    return adaptive_cache_manager.get_cache_statistics()