feat: Rebuild complete CSV import system for legacy data migration

PROBLEM SOLVED: - Completely removed broken import functionality - Built new robust, modular CSV import system from scratch - Provides reliable data migration path for legacy .sc files NEW IMPORT SYSTEM FEATURES: ✅ Modular CSV parsers for all 5 tables (ROLODEX, PHONE, FILES, LEDGER, QDROS) ✅ RESTful API endpoints with background processing (/api/admin/import/*) ✅ Admin web interface at /admin/import for file uploads ✅ Comprehensive validation and error handling ✅ Real-time progress tracking and status monitoring ✅ Detailed logging with import session tracking ✅ Transaction rollback on failures ✅ Batch import with dependency ordering ✅ Foreign key validation and duplicate detection TECHNICAL IMPLEMENTATION: - Clean /app/import_export/ module structure with base classes - Enhanced logging system with import-specific logs - Background task processing with FastAPI BackgroundTasks - Auto-detection of CSV delimiters and encoding - Field validation with proper data type conversion - Admin authentication integration - Console logging for debugging support IMPORT WORKFLOW: 1. Admin selects table type and uploads CSV file 2. System validates headers and data structure 3. Background processing with real-time status updates 4. Detailed error reporting and success metrics 5. Import logs stored in logs/imports/ directory SUPPORTED TABLES: - ROLODEX (contacts/people) - 19 fields, requires: id, last - PHONE (phone numbers) - 3 fields, requires: rolodex_id, phone - FILES (case files) - 29 fields, requires: file_no, id, empl_num, file_type, opened, status, rate_per_hour - LEDGER (transactions) - 12 fields, requires: file_no, date, t_code, t_type, empl_num, amount - QDROS (documents) - 31 fields, requires: file_no REMOVED FILES: - app/api/unified_import_api.py - app/services/unified_import.py - app/api/flexible.py - app/models/flexible.py - templates/unified_import.html - templates/flexible.html - static/js/flexible.js - All legacy import routes and references TESTING COMPLETED: ✅ Schema validation for all table types ✅ CSV header validation ✅ Single file import functionality ✅ Multi-table dependency validation ✅ Error handling and logging ✅ API endpoint integration READY FOR PRODUCTION: System tested and validated with sample data. Administrators can now reliably import CSV files converted from legacy .sc files. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-21 20:54:46 -05:00
parent f7644a4f67
commit 7e9bfcec5e
13 changed files with 2233 additions and 2 deletions
--- a/app/import_export/base.py
+++ b/app/import_export/base.py
@@ -0,0 +1,306 @@
+"""
+Base classes for CSV import functionality
+"""
+from abc import ABC, abstractmethod
+from typing import Dict, List, Any, Optional, Tuple
+import csv
+import io
+from datetime import datetime, date
+import logging
+import uuid
+from sqlalchemy.orm import Session
+from sqlalchemy.exc import IntegrityError, SQLAlchemyError
+
+from .logging_config import create_import_logger, ImportMetrics
+
+logger = logging.getLogger(__name__)
+
+
+class ImportResult:
+    """Container for import operation results"""
+    
+    def __init__(self):
+        self.success = False
+        self.total_rows = 0
+        self.imported_rows = 0
+        self.skipped_rows = 0
+        self.error_rows = 0
+        self.errors: List[str] = []
+        self.warnings: List[str] = []
+        self.import_id = None
+        
+    def add_error(self, error: str):
+        """Add an error message"""
+        self.errors.append(error)
+        self.error_rows += 1
+        
+    def add_warning(self, warning: str):
+        """Add a warning message"""
+        self.warnings.append(warning)
+        
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert result to dictionary for JSON response"""
+        return {
+            "success": self.success,
+            "total_rows": self.total_rows,
+            "imported_rows": self.imported_rows,
+            "skipped_rows": self.skipped_rows,
+            "error_rows": self.error_rows,
+            "errors": self.errors,
+            "warnings": self.warnings,
+            "import_id": self.import_id
+        }
+
+
+class BaseCSVImporter(ABC):
+    """Abstract base class for all CSV importers"""
+    
+    def __init__(self, db_session: Session, import_id: Optional[str] = None):
+        self.db_session = db_session
+        self.result = ImportResult()
+        self.import_id = import_id or str(uuid.uuid4())
+        self.result.import_id = self.import_id
+        self.import_logger = create_import_logger(self.import_id, self.table_name)
+        self.metrics = ImportMetrics()
+        
+    @property
+    @abstractmethod
+    def table_name(self) -> str:
+        """Name of the database table being imported to"""
+        pass
+        
+    @property
+    @abstractmethod
+    def required_fields(self) -> List[str]:
+        """List of required field names"""
+        pass
+        
+    @property
+    @abstractmethod
+    def field_mapping(self) -> Dict[str, str]:
+        """Mapping from CSV headers to database field names"""
+        pass
+        
+    @abstractmethod
+    def create_model_instance(self, row_data: Dict[str, Any]) -> Any:
+        """Create a model instance from processed row data"""
+        pass
+        
+    def parse_date(self, date_str: str) -> Optional[date]:
+        """Parse date string to date object"""
+        if not date_str or date_str.strip() == "":
+            return None
+            
+        date_str = date_str.strip()
+        
+        # Try common date formats
+        formats = [
+            "%Y-%m-%d",      # ISO format
+            "%m/%d/%Y",      # US format
+            "%m/%d/%y",      # US format 2-digit year
+            "%d/%m/%Y",      # European format
+            "%Y%m%d",        # Compact format
+        ]
+        
+        for fmt in formats:
+            try:
+                return datetime.strptime(date_str, fmt).date()
+            except ValueError:
+                continue
+                
+        raise ValueError(f"Unable to parse date: {date_str}")
+        
+    def parse_float(self, value_str: str) -> float:
+        """Parse string to float, handling empty values"""
+        if not value_str or value_str.strip() == "":
+            return 0.0
+        value_str = value_str.strip().replace(",", "")  # Remove commas
+        try:
+            return float(value_str)
+        except ValueError:
+            raise ValueError(f"Unable to parse float: {value_str}")
+            
+    def parse_int(self, value_str: str) -> int:
+        """Parse string to int, handling empty values"""
+        if not value_str or value_str.strip() == "":
+            return 0
+        value_str = value_str.strip().replace(",", "")  # Remove commas
+        try:
+            return int(float(value_str))  # Handle "1.0" format
+        except ValueError:
+            raise ValueError(f"Unable to parse integer: {value_str}")
+            
+    def normalize_string(self, value: str, max_length: Optional[int] = None) -> str:
+        """Normalize string value"""
+        if not value:
+            return ""
+        value = str(value).strip()
+        if max_length and len(value) > max_length:
+            self.result.add_warning(f"String truncated from {len(value)} to {max_length} characters: {value[:50]}...")
+            value = value[:max_length]
+        return value
+        
+    def detect_delimiter(self, csv_content: str) -> str:
+        """Auto-detect CSV delimiter"""
+        sample = csv_content[:1024]  # Check first 1KB
+        sniffer = csv.Sniffer()
+        try:
+            dialect = sniffer.sniff(sample, delimiters=",;\t|")
+            return dialect.delimiter
+        except:
+            return ","  # Default to comma
+            
+    def validate_headers(self, headers: List[str]) -> bool:
+        """Validate that required headers are present"""
+        missing_required = []
+        
+        # Create case-insensitive mapping of headers
+        header_map = {h.lower().strip(): h for h in headers}
+        
+        for required_field in self.required_fields:
+            # Check direct match first
+            if required_field in headers:
+                continue
+                
+            # Check if there's a mapping for this field
+            mapped_name = self.field_mapping.get(required_field, required_field)
+            if mapped_name.lower() in header_map:
+                continue
+                
+            missing_required.append(required_field)
+            
+        if missing_required:
+            self.result.add_error(f"Missing required columns: {', '.join(missing_required)}")
+            return False
+            
+        return True
+        
+    def map_row_data(self, row: Dict[str, str], headers: List[str]) -> Dict[str, Any]:
+        """Map CSV row data to database field names"""
+        mapped_data = {}
+        
+        # Create case-insensitive lookup
+        row_lookup = {k.lower().strip(): v for k, v in row.items() if k}
+        
+        for db_field, csv_field in self.field_mapping.items():
+            csv_field_lower = csv_field.lower().strip()
+            
+            # Try exact match first
+            if csv_field in row:
+                mapped_data[db_field] = row[csv_field]
+            # Try case-insensitive match
+            elif csv_field_lower in row_lookup:
+                mapped_data[db_field] = row_lookup[csv_field_lower]
+            else:
+                mapped_data[db_field] = ""
+                
+        return mapped_data
+        
+    def process_csv_content(self, csv_content: str, encoding: str = "utf-8") -> ImportResult:
+        """Process CSV content and import data"""
+        self.import_logger.info(f"Starting CSV import for {self.table_name}")
+        
+        try:
+            # Detect delimiter
+            delimiter = self.detect_delimiter(csv_content)
+            self.import_logger.debug(f"Detected CSV delimiter: '{delimiter}'")
+            
+            # Parse CSV
+            csv_reader = csv.DictReader(
+                io.StringIO(csv_content),
+                delimiter=delimiter
+            )
+            
+            headers = csv_reader.fieldnames or []
+            if not headers:
+                error_msg = "No headers found in CSV file"
+                self.result.add_error(error_msg)
+                self.import_logger.error(error_msg)
+                return self.result
+                
+            self.import_logger.info(f"Found headers: {headers}")
+            
+            # Validate headers
+            if not self.validate_headers(headers):
+                self.import_logger.error("Header validation failed")
+                return self.result
+                
+            self.import_logger.info("Header validation passed")
+            
+            # Process rows
+            imported_count = 0
+            total_count = 0
+            
+            for row_num, row in enumerate(csv_reader, 1):
+                total_count += 1
+                self.metrics.total_rows = total_count
+                
+                try:
+                    # Map CSV data to database fields
+                    mapped_data = self.map_row_data(row, headers)
+                    
+                    # Create model instance
+                    model_instance = self.create_model_instance(mapped_data)
+                    
+                    # Add to session
+                    self.db_session.add(model_instance)
+                    imported_count += 1
+                    
+                    self.import_logger.log_row_processed(row_num, success=True)
+                    self.metrics.record_row_processed(success=True)
+                    
+                except ImportValidationError as e:
+                    error_msg = f"Row {row_num}: {str(e)}"
+                    self.result.add_error(error_msg)
+                    self.import_logger.log_row_processed(row_num, success=False)
+                    self.import_logger.log_validation_error(row_num, "validation", row, str(e))
+                    self.metrics.record_validation_error(row_num, str(e))
+                    
+                except Exception as e:
+                    error_msg = f"Row {row_num}: Unexpected error - {str(e)}"
+                    self.result.add_error(error_msg)
+                    self.import_logger.log_row_processed(row_num, success=False)
+                    self.import_logger.error(error_msg, row_number=row_num, exception_type=type(e).__name__)
+                    self.metrics.record_validation_error(row_num, str(e))
+                    
+            # Commit transaction
+            try:
+                self.db_session.commit()
+                self.result.success = True
+                self.result.imported_rows = imported_count
+                
+                self.import_logger.info(f"Successfully committed {imported_count} rows to database")
+                logger.info(f"Successfully imported {imported_count} rows to {self.table_name}")
+                
+            except (IntegrityError, SQLAlchemyError) as e:
+                self.db_session.rollback()
+                error_msg = f"Database error during commit: {str(e)}"
+                self.result.add_error(error_msg)
+                self.import_logger.error(error_msg)
+                self.metrics.record_database_error(str(e))
+                logger.error(f"Database error importing to {self.table_name}: {str(e)}")
+                
+            self.result.total_rows = total_count
+            self.metrics.finalize()
+            
+            # Log final summary
+            self.import_logger.log_import_summary(
+                total_count, 
+                imported_count, 
+                self.result.error_rows
+            )
+            
+        except Exception as e:
+            self.db_session.rollback()
+            error_msg = f"Failed to process CSV: {str(e)}"
+            self.result.add_error(error_msg)
+            self.import_logger.error(error_msg, exception_type=type(e).__name__)
+            self.metrics.record_database_error(str(e))
+            logger.error(f"CSV processing error for {self.table_name}: {str(e)}")
+            
+        return self.result
+        
+
+class ImportValidationError(Exception):
+    """Exception raised for validation errors during import"""
+    pass