feat: Rebuild complete CSV import system for legacy data migration

PROBLEM SOLVED:
- Completely removed broken import functionality
- Built new robust, modular CSV import system from scratch
- Provides reliable data migration path for legacy .sc files

NEW IMPORT SYSTEM FEATURES:
 Modular CSV parsers for all 5 tables (ROLODEX, PHONE, FILES, LEDGER, QDROS)
 RESTful API endpoints with background processing (/api/admin/import/*)
 Admin web interface at /admin/import for file uploads
 Comprehensive validation and error handling
 Real-time progress tracking and status monitoring
 Detailed logging with import session tracking
 Transaction rollback on failures
 Batch import with dependency ordering
 Foreign key validation and duplicate detection

TECHNICAL IMPLEMENTATION:
- Clean /app/import_export/ module structure with base classes
- Enhanced logging system with import-specific logs
- Background task processing with FastAPI BackgroundTasks
- Auto-detection of CSV delimiters and encoding
- Field validation with proper data type conversion
- Admin authentication integration
- Console logging for debugging support

IMPORT WORKFLOW:
1. Admin selects table type and uploads CSV file
2. System validates headers and data structure
3. Background processing with real-time status updates
4. Detailed error reporting and success metrics
5. Import logs stored in logs/imports/ directory

SUPPORTED TABLES:
- ROLODEX (contacts/people) - 19 fields, requires: id, last
- PHONE (phone numbers) - 3 fields, requires: rolodex_id, phone
- FILES (case files) - 29 fields, requires: file_no, id, empl_num, file_type, opened, status, rate_per_hour
- LEDGER (transactions) - 12 fields, requires: file_no, date, t_code, t_type, empl_num, amount
- QDROS (documents) - 31 fields, requires: file_no

REMOVED FILES:
- app/api/unified_import_api.py
- app/services/unified_import.py
- app/api/flexible.py
- app/models/flexible.py
- templates/unified_import.html
- templates/flexible.html
- static/js/flexible.js
- All legacy import routes and references

TESTING COMPLETED:
 Schema validation for all table types
 CSV header validation
 Single file import functionality
 Multi-table dependency validation
 Error handling and logging
 API endpoint integration

READY FOR PRODUCTION: System tested and validated with sample data.
Administrators can now reliably import CSV files converted from legacy .sc files.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
HotSwapp
2025-09-21 20:54:46 -05:00
parent f7644a4f67
commit 7e9bfcec5e
13 changed files with 2233 additions and 2 deletions

306
app/import_export/base.py Normal file
View File

@@ -0,0 +1,306 @@
"""
Base classes for CSV import functionality
"""
from abc import ABC, abstractmethod
from typing import Dict, List, Any, Optional, Tuple
import csv
import io
from datetime import datetime, date
import logging
import uuid
from sqlalchemy.orm import Session
from sqlalchemy.exc import IntegrityError, SQLAlchemyError
from .logging_config import create_import_logger, ImportMetrics
logger = logging.getLogger(__name__)
class ImportResult:
"""Container for import operation results"""
def __init__(self):
self.success = False
self.total_rows = 0
self.imported_rows = 0
self.skipped_rows = 0
self.error_rows = 0
self.errors: List[str] = []
self.warnings: List[str] = []
self.import_id = None
def add_error(self, error: str):
"""Add an error message"""
self.errors.append(error)
self.error_rows += 1
def add_warning(self, warning: str):
"""Add a warning message"""
self.warnings.append(warning)
def to_dict(self) -> Dict[str, Any]:
"""Convert result to dictionary for JSON response"""
return {
"success": self.success,
"total_rows": self.total_rows,
"imported_rows": self.imported_rows,
"skipped_rows": self.skipped_rows,
"error_rows": self.error_rows,
"errors": self.errors,
"warnings": self.warnings,
"import_id": self.import_id
}
class BaseCSVImporter(ABC):
"""Abstract base class for all CSV importers"""
def __init__(self, db_session: Session, import_id: Optional[str] = None):
self.db_session = db_session
self.result = ImportResult()
self.import_id = import_id or str(uuid.uuid4())
self.result.import_id = self.import_id
self.import_logger = create_import_logger(self.import_id, self.table_name)
self.metrics = ImportMetrics()
@property
@abstractmethod
def table_name(self) -> str:
"""Name of the database table being imported to"""
pass
@property
@abstractmethod
def required_fields(self) -> List[str]:
"""List of required field names"""
pass
@property
@abstractmethod
def field_mapping(self) -> Dict[str, str]:
"""Mapping from CSV headers to database field names"""
pass
@abstractmethod
def create_model_instance(self, row_data: Dict[str, Any]) -> Any:
"""Create a model instance from processed row data"""
pass
def parse_date(self, date_str: str) -> Optional[date]:
"""Parse date string to date object"""
if not date_str or date_str.strip() == "":
return None
date_str = date_str.strip()
# Try common date formats
formats = [
"%Y-%m-%d", # ISO format
"%m/%d/%Y", # US format
"%m/%d/%y", # US format 2-digit year
"%d/%m/%Y", # European format
"%Y%m%d", # Compact format
]
for fmt in formats:
try:
return datetime.strptime(date_str, fmt).date()
except ValueError:
continue
raise ValueError(f"Unable to parse date: {date_str}")
def parse_float(self, value_str: str) -> float:
"""Parse string to float, handling empty values"""
if not value_str or value_str.strip() == "":
return 0.0
value_str = value_str.strip().replace(",", "") # Remove commas
try:
return float(value_str)
except ValueError:
raise ValueError(f"Unable to parse float: {value_str}")
def parse_int(self, value_str: str) -> int:
"""Parse string to int, handling empty values"""
if not value_str or value_str.strip() == "":
return 0
value_str = value_str.strip().replace(",", "") # Remove commas
try:
return int(float(value_str)) # Handle "1.0" format
except ValueError:
raise ValueError(f"Unable to parse integer: {value_str}")
def normalize_string(self, value: str, max_length: Optional[int] = None) -> str:
"""Normalize string value"""
if not value:
return ""
value = str(value).strip()
if max_length and len(value) > max_length:
self.result.add_warning(f"String truncated from {len(value)} to {max_length} characters: {value[:50]}...")
value = value[:max_length]
return value
def detect_delimiter(self, csv_content: str) -> str:
"""Auto-detect CSV delimiter"""
sample = csv_content[:1024] # Check first 1KB
sniffer = csv.Sniffer()
try:
dialect = sniffer.sniff(sample, delimiters=",;\t|")
return dialect.delimiter
except:
return "," # Default to comma
def validate_headers(self, headers: List[str]) -> bool:
"""Validate that required headers are present"""
missing_required = []
# Create case-insensitive mapping of headers
header_map = {h.lower().strip(): h for h in headers}
for required_field in self.required_fields:
# Check direct match first
if required_field in headers:
continue
# Check if there's a mapping for this field
mapped_name = self.field_mapping.get(required_field, required_field)
if mapped_name.lower() in header_map:
continue
missing_required.append(required_field)
if missing_required:
self.result.add_error(f"Missing required columns: {', '.join(missing_required)}")
return False
return True
def map_row_data(self, row: Dict[str, str], headers: List[str]) -> Dict[str, Any]:
"""Map CSV row data to database field names"""
mapped_data = {}
# Create case-insensitive lookup
row_lookup = {k.lower().strip(): v for k, v in row.items() if k}
for db_field, csv_field in self.field_mapping.items():
csv_field_lower = csv_field.lower().strip()
# Try exact match first
if csv_field in row:
mapped_data[db_field] = row[csv_field]
# Try case-insensitive match
elif csv_field_lower in row_lookup:
mapped_data[db_field] = row_lookup[csv_field_lower]
else:
mapped_data[db_field] = ""
return mapped_data
def process_csv_content(self, csv_content: str, encoding: str = "utf-8") -> ImportResult:
"""Process CSV content and import data"""
self.import_logger.info(f"Starting CSV import for {self.table_name}")
try:
# Detect delimiter
delimiter = self.detect_delimiter(csv_content)
self.import_logger.debug(f"Detected CSV delimiter: '{delimiter}'")
# Parse CSV
csv_reader = csv.DictReader(
io.StringIO(csv_content),
delimiter=delimiter
)
headers = csv_reader.fieldnames or []
if not headers:
error_msg = "No headers found in CSV file"
self.result.add_error(error_msg)
self.import_logger.error(error_msg)
return self.result
self.import_logger.info(f"Found headers: {headers}")
# Validate headers
if not self.validate_headers(headers):
self.import_logger.error("Header validation failed")
return self.result
self.import_logger.info("Header validation passed")
# Process rows
imported_count = 0
total_count = 0
for row_num, row in enumerate(csv_reader, 1):
total_count += 1
self.metrics.total_rows = total_count
try:
# Map CSV data to database fields
mapped_data = self.map_row_data(row, headers)
# Create model instance
model_instance = self.create_model_instance(mapped_data)
# Add to session
self.db_session.add(model_instance)
imported_count += 1
self.import_logger.log_row_processed(row_num, success=True)
self.metrics.record_row_processed(success=True)
except ImportValidationError as e:
error_msg = f"Row {row_num}: {str(e)}"
self.result.add_error(error_msg)
self.import_logger.log_row_processed(row_num, success=False)
self.import_logger.log_validation_error(row_num, "validation", row, str(e))
self.metrics.record_validation_error(row_num, str(e))
except Exception as e:
error_msg = f"Row {row_num}: Unexpected error - {str(e)}"
self.result.add_error(error_msg)
self.import_logger.log_row_processed(row_num, success=False)
self.import_logger.error(error_msg, row_number=row_num, exception_type=type(e).__name__)
self.metrics.record_validation_error(row_num, str(e))
# Commit transaction
try:
self.db_session.commit()
self.result.success = True
self.result.imported_rows = imported_count
self.import_logger.info(f"Successfully committed {imported_count} rows to database")
logger.info(f"Successfully imported {imported_count} rows to {self.table_name}")
except (IntegrityError, SQLAlchemyError) as e:
self.db_session.rollback()
error_msg = f"Database error during commit: {str(e)}"
self.result.add_error(error_msg)
self.import_logger.error(error_msg)
self.metrics.record_database_error(str(e))
logger.error(f"Database error importing to {self.table_name}: {str(e)}")
self.result.total_rows = total_count
self.metrics.finalize()
# Log final summary
self.import_logger.log_import_summary(
total_count,
imported_count,
self.result.error_rows
)
except Exception as e:
self.db_session.rollback()
error_msg = f"Failed to process CSV: {str(e)}"
self.result.add_error(error_msg)
self.import_logger.error(error_msg, exception_type=type(e).__name__)
self.metrics.record_database_error(str(e))
logger.error(f"CSV processing error for {self.table_name}: {str(e)}")
return self.result
class ImportValidationError(Exception):
"""Exception raised for validation errors during import"""
pass