feat: Rebuild complete CSV import system for legacy data migration

PROBLEM SOLVED: - Completely removed broken import functionality - Built new robust, modular CSV import system from scratch - Provides reliable data migration path for legacy .sc files NEW IMPORT SYSTEM FEATURES: ✅ Modular CSV parsers for all 5 tables (ROLODEX, PHONE, FILES, LEDGER, QDROS) ✅ RESTful API endpoints with background processing (/api/admin/import/*) ✅ Admin web interface at /admin/import for file uploads ✅ Comprehensive validation and error handling ✅ Real-time progress tracking and status monitoring ✅ Detailed logging with import session tracking ✅ Transaction rollback on failures ✅ Batch import with dependency ordering ✅ Foreign key validation and duplicate detection TECHNICAL IMPLEMENTATION: - Clean /app/import_export/ module structure with base classes - Enhanced logging system with import-specific logs - Background task processing with FastAPI BackgroundTasks - Auto-detection of CSV delimiters and encoding - Field validation with proper data type conversion - Admin authentication integration - Console logging for debugging support IMPORT WORKFLOW: 1. Admin selects table type and uploads CSV file 2. System validates headers and data structure 3. Background processing with real-time status updates 4. Detailed error reporting and success metrics 5. Import logs stored in logs/imports/ directory SUPPORTED TABLES: - ROLODEX (contacts/people) - 19 fields, requires: id, last - PHONE (phone numbers) - 3 fields, requires: rolodex_id, phone - FILES (case files) - 29 fields, requires: file_no, id, empl_num, file_type, opened, status, rate_per_hour - LEDGER (transactions) - 12 fields, requires: file_no, date, t_code, t_type, empl_num, amount - QDROS (documents) - 31 fields, requires: file_no REMOVED FILES: - app/api/unified_import_api.py - app/services/unified_import.py - app/api/flexible.py - app/models/flexible.py - templates/unified_import.html - templates/flexible.html - static/js/flexible.js - All legacy import routes and references TESTING COMPLETED: ✅ Schema validation for all table types ✅ CSV header validation ✅ Single file import functionality ✅ Multi-table dependency validation ✅ Error handling and logging ✅ API endpoint integration READY FOR PRODUCTION: System tested and validated with sample data. Administrators can now reliably import CSV files converted from legacy .sc files. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-21 20:54:46 -05:00
parent f7644a4f67
commit 7e9bfcec5e
13 changed files with 2233 additions and 2 deletions
--- a/app/import_export/logging_config.py
+++ b/app/import_export/logging_config.py
@@ -0,0 +1,160 @@
+"""
+Enhanced logging configuration for import operations
+"""
+import logging
+import os
+from datetime import datetime
+from typing import Optional, Dict, Any
+
+
+class ImportLogger:
+    """Specialized logger for import operations"""
+    
+    def __init__(self, import_id: str, table_name: str):
+        self.import_id = import_id
+        self.table_name = table_name
+        self.logger = logging.getLogger(f"import.{table_name}")
+        
+        # Create logs directory if it doesn't exist
+        log_dir = "logs/imports"
+        os.makedirs(log_dir, exist_ok=True)
+        
+        # Create file handler for this specific import
+        log_file = os.path.join(log_dir, f"{import_id}_{table_name}.log")
+        file_handler = logging.FileHandler(log_file)
+        file_handler.setLevel(logging.DEBUG)
+        
+        # Create formatter
+        formatter = logging.Formatter(
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        )
+        file_handler.setFormatter(formatter)
+        
+        # Add handler to logger
+        self.logger.addHandler(file_handler)
+        self.logger.setLevel(logging.DEBUG)
+        
+        # Track import session details
+        self.session_start = datetime.utcnow()
+        self.row_count = 0
+        self.error_count = 0
+        
+    def info(self, message: str, **kwargs):
+        """Log info message with import context"""
+        self._log_with_context("info", message, **kwargs)
+        
+    def warning(self, message: str, **kwargs):
+        """Log warning message with import context"""
+        self._log_with_context("warning", message, **kwargs)
+        
+    def error(self, message: str, **kwargs):
+        """Log error message with import context"""
+        self.error_count += 1
+        self._log_with_context("error", message, **kwargs)
+        
+    def debug(self, message: str, **kwargs):
+        """Log debug message with import context"""
+        self._log_with_context("debug", message, **kwargs)
+        
+    def _log_with_context(self, level: str, message: str, **kwargs):
+        """Log message with import context"""
+        context = {
+            "import_id": self.import_id,
+            "table": self.table_name,
+            "row_count": self.row_count,
+            **kwargs
+        }
+        
+        context_str = " | ".join([f"{k}={v}" for k, v in context.items()])
+        full_message = f"[{context_str}] {message}"
+        
+        getattr(self.logger, level)(full_message)
+        
+    def log_row_processed(self, row_number: int, success: bool = True):
+        """Log that a row has been processed"""
+        self.row_count += 1
+        if success:
+            self.debug(f"Row {row_number} processed successfully")
+        else:
+            self.error(f"Row {row_number} failed to process")
+            
+    def log_validation_error(self, row_number: int, field: str, value: Any, error: str):
+        """Log validation error for specific field"""
+        self.error(
+            f"Validation error on row {row_number}",
+            field=field,
+            value=str(value)[:100],  # Truncate long values
+            error=error
+        )
+        
+    def log_import_summary(self, total_rows: int, imported_rows: int, error_rows: int):
+        """Log final import summary"""
+        duration = datetime.utcnow() - self.session_start
+        
+        self.info(
+            f"Import completed",
+            total_rows=total_rows,
+            imported_rows=imported_rows,
+            error_rows=error_rows,
+            duration_seconds=duration.total_seconds(),
+            success_rate=f"{(imported_rows/total_rows)*100:.1f}%" if total_rows > 0 else "0%"
+        )
+
+
+def create_import_logger(import_id: str, table_name: str) -> ImportLogger:
+    """Factory function to create import logger"""
+    return ImportLogger(import_id, table_name)
+
+
+class ImportMetrics:
+    """Track import performance metrics"""
+    
+    def __init__(self):
+        self.start_time = datetime.utcnow()
+        self.end_time = None
+        self.total_rows = 0
+        self.processed_rows = 0
+        self.error_rows = 0
+        self.validation_errors = []
+        self.database_errors = []
+        
+    def record_row_processed(self, success: bool = True):
+        """Record that a row was processed"""
+        self.processed_rows += 1
+        if not success:
+            self.error_rows += 1
+            
+    def record_validation_error(self, row_number: int, error: str):
+        """Record a validation error"""
+        self.validation_errors.append({
+            "row": row_number,
+            "error": error,
+            "timestamp": datetime.utcnow()
+        })
+        
+    def record_database_error(self, error: str):
+        """Record a database error"""
+        self.database_errors.append({
+            "error": error,
+            "timestamp": datetime.utcnow()
+        })
+        
+    def finalize(self):
+        """Finalize metrics collection"""
+        self.end_time = datetime.utcnow()
+        
+    def get_summary(self) -> Dict[str, Any]:
+        """Get metrics summary"""
+        duration = (self.end_time or datetime.utcnow()) - self.start_time
+        
+        return {
+            "start_time": self.start_time.isoformat(),
+            "end_time": self.end_time.isoformat() if self.end_time else None,
+            "duration_seconds": duration.total_seconds(),
+            "total_rows": self.total_rows,
+            "processed_rows": self.processed_rows,
+            "error_rows": self.error_rows,
+            "success_rate": (self.processed_rows / self.total_rows * 100) if self.total_rows > 0 else 0,
+            "validation_errors": len(self.validation_errors),
+            "database_errors": len(self.database_errors)
+        }