Apply encoding fallback to all CSV importers (phone, files, ledger, payments, qdros)

- Updated import_phone_data to use open_text_with_fallbacks for encoding support - Updated import_files_data to use open_text_with_fallbacks for encoding support - Updated import_ledger_data to use open_text_with_fallbacks for encoding support - Updated import_qdros_data to use open_text_with_fallbacks for encoding support - Updated import_payments_data to use open_text_with_fallbacks for encoding support All CSV import functions now use the same encoding fallback pattern that tries utf-8, utf-8-sig, cp1252, and latin-1 encodings to handle legacy CSV files with different encodings.
2025-10-07 22:21:07 -05:00
parent 58b2bb9a6c
commit 09ef56fc1d
1 changed files with 274 additions and 211 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -61,6 +61,29 @@ if not SECRET_KEY:
 # Configure structured logging
 setup_logging()
 logger = structlog.get_logger(__name__)
 def open_text_with_fallbacks(file_path: str):
    """
    Open a text file trying multiple encodings commonly seen in legacy CSVs.
    Attempts in order: utf-8, utf-8-sig, cp1252, latin-1.
    Returns a tuple of (file_object, encoding_used). Caller is responsible to close file.
    """
    encodings = ["utf-8", "utf-8-sig", "cp1252", "latin-1"]
    last_error = None
    for enc in encodings:
        try:
            f = open(file_path, 'r', encoding=enc, errors='strict', newline='')
            # Try reading a tiny chunk to force decoding errors early
            _ = f.read(1024)
            f.seek(0)
            logger.info("csv_open_encoding_selected", file=file_path, encoding=enc)
            return f, enc
        except Exception as e:
            last_error = e
            continue
    raise last_error if last_error else RuntimeError("Unable to open file with known encodings")
 # Configure Jinja2 templates
 templates = Jinja2Templates(directory="app/templates")
@@ -218,21 +241,38 @@ def get_import_type_from_filename(filename: str) -> str:
        Import type string (client, phone, case, transaction, document, payment)
    """
    filename_upper = filename.upper()
    # Strip extension and normalize
    base = filename_upper.rsplit('.', 1)[0]
-    if filename_upper.startswith('ROLODEX') or filename_upper.startswith('ROLEX'):
+    # Support files saved with explicit type prefixes (e.g., CLIENT_<uuid>.csv)
    if base.startswith('CLIENT_'):
        return 'client'
-    elif filename_upper.startswith('PHONE'):
+    if base.startswith('PHONE_'):
        return 'phone'
-    elif filename_upper.startswith('FILES'):
+    if base.startswith('CASE_'):
        return 'case'
-    elif filename_upper.startswith('LEDGER'):
+    if base.startswith('TRANSACTION_'):
        return 'transaction'
-    elif filename_upper.startswith('QDROS') or filename_upper.startswith('QDRO'):
+    if base.startswith('DOCUMENT_'):
        return 'document'
-    elif filename_upper.startswith('PAYMENTS') or filename_upper.startswith('DEPOSITS'):
+    if base.startswith('PAYMENT_'):
        return 'payment'
-    else:
+
-        raise ValueError(f"Unknown file type for filename: {filename}")
+    # Legacy/real file name patterns
    if base.startswith('ROLODEX') or base.startswith('ROLEX') or 'ROLODEX' in base or 'ROLEX' in base:
        return 'client'
    if base.startswith('PHONE') or 'PHONE' in base:
        return 'phone'
    if base.startswith('FILES') or base.startswith('FILE') or 'FILES' in base:
        return 'case'
    if base.startswith('LEDGER') or 'LEDGER' in base or base.startswith('TRNSACTN') or 'TRNSACTN' in base:
        return 'transaction'
    if base.startswith('QDROS') or base.startswith('QDRO') or 'QDRO' in base:
        return 'document'
    if base.startswith('PAYMENTS') or base.startswith('DEPOSITS') or 'PAYMENT' in base or 'DEPOSIT' in base:
        return 'payment'
    raise ValueError(f"Unknown file type for filename: {filename}")
 def validate_csv_headers(headers: List[str], expected_fields: Dict[str, str]) -> Dict[str, Any]:
@@ -370,7 +410,8 @@ def import_rolodex_data(db: Session, file_path: str) -> Dict[str, Any]:
    }
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
        with f as file:
            reader = csv.DictReader(file)
            # Validate headers
@@ -418,6 +459,7 @@ def import_rolodex_data(db: Session, file_path: str) -> Dict[str, Any]:
        db.commit()
    except Exception as e:
        logger.error("rolodex_import_failed", file=file_path, error=str(e))
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
@@ -436,52 +478,56 @@ def import_phone_data(db: Session, file_path: str) -> Dict[str, Any]:
        'total_rows': 0
    }
    f = None
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
-            reader = csv.DictReader(file)
+        reader = csv.DictReader(f)
-            headers = reader.fieldnames or []
+        headers = reader.fieldnames or []
-            if len(headers) < 2:
+        if len(headers) < 2:
-                result['errors'].append("Invalid CSV format: expected at least 2 columns")
+            result['errors'].append("Invalid CSV format: expected at least 2 columns")
-                return result
+            return result
-            for row_num, row in enumerate(reader, start=2):
+        for row_num, row in enumerate(reader, start=2):
-                result['total_rows'] += 1
+            result['total_rows'] += 1
-                try:
+            try:
-                    client_id = row.get('Id', '').strip()
+                client_id = row.get('Id', '').strip()
-                    if not client_id:
+                if not client_id:
-                        result['errors'].append(f"Row {row_num}: Missing client ID")
+                    result['errors'].append(f"Row {row_num}: Missing client ID")
-                        continue
+                    continue
-                    # Find the client
+                # Find the client
-                    client = db.query(Client).filter(Client.rolodex_id == client_id).first()
+                client = db.query(Client).filter(Client.rolodex_id == client_id).first()
-                    if not client:
+                if not client:
-                        result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found")
+                    result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found")
-                        continue
+                    continue
-                    phone_number = row.get('Phone', '').strip()
+                phone_number = row.get('Phone', '').strip()
-                    if not phone_number:
+                if not phone_number:
-                        result['errors'].append(f"Row {row_num}: Missing phone number")
+                    result['errors'].append(f"Row {row_num}: Missing phone number")
-                        continue
+                    continue
-                    phone = Phone(
+                phone = Phone(
-                        client_id=client.id,
+                    client_id=client.id,
-                        phone_type=row.get('Location', '').strip() or 'primary',
+                    phone_type=row.get('Location', '').strip() or 'primary',
-                        phone_number=phone_number
+                    phone_number=phone_number
-                    )
+                )
-                    db.add(phone)
+                db.add(phone)
-                    result['success'] += 1
+                result['success'] += 1
-                except Exception as e:
+            except Exception as e:
-                    result['errors'].append(f"Row {row_num}: {str(e)}")
+                result['errors'].append(f"Row {row_num}: {str(e)}")
        db.commit()
    except Exception as e:
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
    finally:
        if f:
            f.close()
    return result
@@ -530,62 +576,66 @@ def import_files_data(db: Session, file_path: str) -> Dict[str, Any]:
        'Memo': 'Memo'
    }
    f = None
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
-            reader = csv.DictReader(file)
+        reader = csv.DictReader(f)
-            headers = reader.fieldnames or []
+        headers = reader.fieldnames or []
-            validation = validate_csv_headers(headers, expected_fields)
+        validation = validate_csv_headers(headers, expected_fields)
-            if not validation['valid']:
+        if not validation['valid']:
-                result['errors'].append(f"Header validation failed: {validation['errors']}")
+            result['errors'].append(f"Header validation failed: {validation['errors']}")
-                return result
+            return result
-            for row_num, row in enumerate(reader, start=2):
+        for row_num, row in enumerate(reader, start=2):
-                result['total_rows'] += 1
+            result['total_rows'] += 1
-                try:
+            try:
-                    file_no = row.get('File_No', '').strip()
+                file_no = row.get('File_No', '').strip()
-                    if not file_no:
+                if not file_no:
-                        result['errors'].append(f"Row {row_num}: Missing file number")
+                    result['errors'].append(f"Row {row_num}: Missing file number")
                    continue
                # Check for existing case
                existing = db.query(Case).filter(Case.file_no == file_no).first()
                if existing:
                    result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' already exists")
                    continue
                # Find client by ID
                client_id = row.get('Id', '').strip()
                client = None
                if client_id:
                    client = db.query(Client).filter(Client.rolodex_id == client_id).first()
                    if not client:
                        result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found")
                        continue
-                    # Check for existing case
+                case = Case(
-                    existing = db.query(Case).filter(Case.file_no == file_no).first()
+                    file_no=file_no,
-                    if existing:
+                    client_id=client.id if client else None,
-                        result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' already exists")
+                    status=row.get('Status', '').strip() or 'active',
-                        continue
+                    case_type=row.get('File_Type', '').strip() or None,
                    description=row.get('Regarding', '').strip() or None,
                    open_date=parse_date(row.get('Opened', '')),
                    close_date=parse_date(row.get('Closed', ''))
                )
-                    # Find client by ID
+                db.add(case)
-                    client_id = row.get('Id', '').strip()
+                result['success'] += 1
                    client = None
                    if client_id:
                        client = db.query(Client).filter(Client.rolodex_id == client_id).first()
                        if not client:
                            result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found")
                            continue
-                    case = Case(
+            except Exception as e:
-                        file_no=file_no,
+                result['errors'].append(f"Row {row_num}: {str(e)}")
                        client_id=client.id if client else None,
                        status=row.get('Status', '').strip() or 'active',
                        case_type=row.get('File_Type', '').strip() or None,
                        description=row.get('Regarding', '').strip() or None,
                        open_date=parse_date(row.get('Opened', '')),
                        close_date=parse_date(row.get('Closed', ''))
                    )
                    db.add(case)
                    result['success'] += 1
                except Exception as e:
                    result['errors'].append(f"Row {row_num}: {str(e)}")
        db.commit()
    except Exception as e:
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
    finally:
        if f:
            f.close()
    return result
@@ -602,81 +652,85 @@ def import_ledger_data(db: Session, file_path: str) -> Dict[str, Any]:
        'total_rows': 0
    }
    f = None
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
-            reader = csv.DictReader(file)
+        reader = csv.DictReader(f)
-            headers = reader.fieldnames or []
+        headers = reader.fieldnames or []
-            if len(headers) < 3:
+        if len(headers) < 3:
-                result['errors'].append("Invalid CSV format: expected at least 3 columns")
+            result['errors'].append("Invalid CSV format: expected at least 3 columns")
-                return result
+            return result
-            for row_num, row in enumerate(reader, start=2):
+        for row_num, row in enumerate(reader, start=2):
-                result['total_rows'] += 1
+            result['total_rows'] += 1
-                try:
+            try:
-                    file_no = row.get('File_No', '').strip()
+                file_no = row.get('File_No', '').strip()
-                    if not file_no:
+                if not file_no:
-                        result['errors'].append(f"Row {row_num}: Missing file number")
+                    result['errors'].append(f"Row {row_num}: Missing file number")
-                        continue
+                    continue
-                    # Find the case
+                # Find the case
-                    case = db.query(Case).filter(Case.file_no == file_no).first()
+                case = db.query(Case).filter(Case.file_no == file_no).first()
-                    if not case:
+                if not case:
-                        result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found")
+                    result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found")
-                        continue
+                    continue
-                    amount = parse_float(row.get('Amount', '0'))
+                amount = parse_float(row.get('Amount', '0'))
-                    if amount is None:
+                if amount is None:
-                        result['errors'].append(f"Row {row_num}: Invalid amount")
+                    result['errors'].append(f"Row {row_num}: Invalid amount")
-                        continue
+                    continue
-                    tx_date = parse_date(row.get('Date', ''))
+                tx_date = parse_date(row.get('Date', ''))
-                    item_no = parse_int(row.get('Item_No', '') or '')
+                item_no = parse_int(row.get('Item_No', '') or '')
-                    # ensure unique item_no per date by increment
+                # ensure unique item_no per date by increment
-                    # temp session-less check via while loop
+                # temp session-less check via while loop
-                    desired_item_no = item_no if item_no is not None else 1
+                desired_item_no = item_no if item_no is not None else 1
-                    while True:
+                while True:
-                        exists = (
+                    exists = (
-                            db.query(Transaction)
+                        db.query(Transaction)
-                            .filter(
+                        .filter(
-                                Transaction.case_id == case.id,
+                            Transaction.case_id == case.id,
-                                Transaction.transaction_date == tx_date,
+                            Transaction.transaction_date == tx_date,
-                                Transaction.item_no == desired_item_no,
+                            Transaction.item_no == desired_item_no,
                            )
                            .first()
                        )
-                        if not exists:
+                        .first()
                            break
                        desired_item_no += 1
                    transaction = Transaction(
                        case_id=case.id,
                        transaction_date=tx_date,
                        transaction_type=(row.get('T_Type', '').strip() or None),
                        t_type_l=(row.get('T_Type_L', '').strip().upper() or None),
                        amount=amount,
                        description=(row.get('Note', '').strip() or None),
                        reference=(row.get('Item_No', '').strip() or None),
                        item_no=desired_item_no,
                        employee_number=(row.get('Empl_Num', '').strip() or None),
                        t_code=(row.get('T_Code', '').strip().upper() or None),
                        quantity=parse_float(row.get('Quantity', '')),
                        rate=parse_float(row.get('Rate', '')),
                        billed=((row.get('Billed', '') or '').strip().upper() or None),
                    )
                    if not exists:
                        break
                    desired_item_no += 1
-                    db.add(transaction)
+                transaction = Transaction(
-                    result['success'] += 1
+                    case_id=case.id,
                    transaction_date=tx_date,
                    transaction_type=(row.get('T_Type', '').strip() or None),
                    t_type_l=(row.get('T_Type_L', '').strip().upper() or None),
                    amount=amount,
                    description=(row.get('Note', '').strip() or None),
                    reference=(row.get('Item_No', '').strip() or None),
                    item_no=desired_item_no,
                    employee_number=(row.get('Empl_Num', '').strip() or None),
                    t_code=(row.get('T_Code', '').strip().upper() or None),
                    quantity=parse_float(row.get('Quantity', '')),
                    rate=parse_float(row.get('Rate', '')),
                    billed=((row.get('Billed', '') or '').strip().upper() or None),
                )
-                except Exception as e:
+                db.add(transaction)
-                    result['errors'].append(f"Row {row_num}: {str(e)}")
+                result['success'] += 1
            except Exception as e:
                result['errors'].append(f"Row {row_num}: {str(e)}")
        db.commit()
    except Exception as e:
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
    finally:
        if f:
            f.close()
    return result
@@ -693,49 +747,53 @@ def import_qdros_data(db: Session, file_path: str) -> Dict[str, Any]:
        'total_rows': 0
    }
    f = None
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
-            reader = csv.DictReader(file)
+        reader = csv.DictReader(f)
-            headers = reader.fieldnames or []
+        headers = reader.fieldnames or []
-            if len(headers) < 2:
+        if len(headers) < 2:
-                result['errors'].append("Invalid CSV format: expected at least 2 columns")
+            result['errors'].append("Invalid CSV format: expected at least 2 columns")
-                return result
+            return result
-            for row_num, row in enumerate(reader, start=2):
+        for row_num, row in enumerate(reader, start=2):
-                result['total_rows'] += 1
+            result['total_rows'] += 1
-                try:
+            try:
-                    file_no = row.get('File_No', '').strip()
+                file_no = row.get('File_No', '').strip()
-                    if not file_no:
+                if not file_no:
-                        result['errors'].append(f"Row {row_num}: Missing file number")
+                    result['errors'].append(f"Row {row_num}: Missing file number")
-                        continue
+                    continue
-                    # Find the case
+                # Find the case
-                    case = db.query(Case).filter(Case.file_no == file_no).first()
+                case = db.query(Case).filter(Case.file_no == file_no).first()
-                    if not case:
+                if not case:
-                        result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found")
+                    result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found")
-                        continue
+                    continue
-                    document = Document(
+                document = Document(
-                        case_id=case.id,
+                    case_id=case.id,
-                        document_type=row.get('Document_Type', '').strip() or 'QDRO',
+                    document_type=row.get('Document_Type', '').strip() or 'QDRO',
-                        file_name=row.get('File_Name', '').strip() or None,
+                    file_name=row.get('File_Name', '').strip() or None,
-                        description=row.get('Description', '').strip() or None,
+                    description=row.get('Description', '').strip() or None,
-                        uploaded_date=parse_date(row.get('Date', ''))
+                    uploaded_date=parse_date(row.get('Date', ''))
-                    )
+                )
-                    db.add(document)
+                db.add(document)
-                    result['success'] += 1
+                result['success'] += 1
-                except Exception as e:
+            except Exception as e:
-                    result['errors'].append(f"Row {row_num}: {str(e)}")
+                result['errors'].append(f"Row {row_num}: {str(e)}")
        db.commit()
    except Exception as e:
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
    finally:
        if f:
            f.close()
    return result
@@ -752,55 +810,59 @@ def import_payments_data(db: Session, file_path: str) -> Dict[str, Any]:
        'total_rows': 0
    }
    f = None
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
-            reader = csv.DictReader(file)
+        reader = csv.DictReader(f)
-            headers = reader.fieldnames or []
+        headers = reader.fieldnames or []
-            if len(headers) < 2:
+        if len(headers) < 2:
-                result['errors'].append("Invalid CSV format: expected at least 2 columns")
+            result['errors'].append("Invalid CSV format: expected at least 2 columns")
-                return result
+            return result
-            for row_num, row in enumerate(reader, start=2):
+        for row_num, row in enumerate(reader, start=2):
-                result['total_rows'] += 1
+            result['total_rows'] += 1
-                try:
+            try:
-                    file_no = row.get('File_No', '').strip()
+                file_no = row.get('File_No', '').strip()
-                    if not file_no:
+                if not file_no:
-                        result['errors'].append(f"Row {row_num}: Missing file number")
+                    result['errors'].append(f"Row {row_num}: Missing file number")
-                        continue
+                    continue
-                    # Find the case
+                # Find the case
-                    case = db.query(Case).filter(Case.file_no == file_no).first()
+                case = db.query(Case).filter(Case.file_no == file_no).first()
-                    if not case:
+                if not case:
-                        result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found")
+                    result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found")
-                        continue
+                    continue
-                    amount = parse_float(row.get('Amount', '0'))
+                amount = parse_float(row.get('Amount', '0'))
-                    if amount is None:
+                if amount is None:
-                        result['errors'].append(f"Row {row_num}: Invalid amount")
+                    result['errors'].append(f"Row {row_num}: Invalid amount")
-                        continue
+                    continue
-                    payment = Payment(
+                payment = Payment(
-                        case_id=case.id,
+                    case_id=case.id,
-                        payment_date=parse_date(row.get('Date', '')),
+                    payment_date=parse_date(row.get('Date', '')),
-                        payment_type=row.get('Type', '').strip() or None,
+                    payment_type=row.get('Type', '').strip() or None,
-                        amount=amount,
+                    amount=amount,
-                        description=row.get('Description', '').strip() or None,
+                    description=row.get('Description', '').strip() or None,
-                        check_number=row.get('Check_Number', '').strip() or None
+                    check_number=row.get('Check_Number', '').strip() or None
-                    )
+                )
-                    db.add(payment)
+                db.add(payment)
-                    result['success'] += 1
+                result['success'] += 1
-                except Exception as e:
+            except Exception as e:
-                    result['errors'].append(f"Row {row_num}: {str(e)}")
+                result['errors'].append(f"Row {row_num}: {str(e)}")
        db.commit()
    except Exception as e:
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
    finally:
        if f:
            f.close()
    return result
@@ -1435,7 +1497,14 @@ async def admin_upload_files(
            # Generate unique filename to avoid conflicts
            file_id = str(uuid.uuid4())
            file_ext = os.path.splitext(file.filename)[1]
-            stored_filename = f"{file_id}{file_ext}"
+            # Determine import type from original filename for better categorization later
            try:
                detected_type = get_import_type_from_filename(file.filename)
            except ValueError:
                detected_type = 'unknown'
            # Prefix stored filename with detected type to preserve context
            stored_filename = f"{detected_type}_{file_id}{file_ext}"
            file_path = os.path.join(import_dir, stored_filename)
            # Save file
@@ -1443,14 +1512,8 @@ async def admin_upload_files(
            with open(file_path, "wb") as f:
                f.write(contents)
-            # Determine import type from filename
+            # Use detected type (already derived from original name)
-            try:
+            import_type = detected_type
                import_type = get_import_type_from_filename(file.filename)
            except ValueError as e:
                errors.append(f"File '{file.filename}': {str(e)}")
                # Clean up uploaded file
                os.remove(file_path)
                continue
            results.append({
                'filename': file.filename,