Expand encoding fallback to handle more legacy CSV encodings

- Added windows-1252, cp1250, iso-8859-1 to encoding fallback list - Enhanced error logging in open_text_with_fallbacks function - Improved error messages to show all attempted encodings - Added warning logs for each encoding attempt that fails This should resolve 'charmap' codec errors and other encoding issues with legacy CSV files that use different Windows codepages or ISO encodings.
Apply encoding fallback to all CSV importers (phone, files, ledger, payments, qdros)
2025-10-07 22:25:34 -05:00 · 2025-10-07 22:21:07 -05:00 · 2025-10-07 22:15:08 -05:00
2 changed files with 338 additions and 216 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -61,6 +61,34 @@ if not SECRET_KEY:
 # Configure structured logging
 setup_logging()
 logger = structlog.get_logger(__name__)
 def open_text_with_fallbacks(file_path: str):
    """
    Open a text file trying multiple encodings commonly seen in legacy CSVs.
    Attempts in order: utf-8, utf-8-sig, cp1252, windows-1252, cp1250, iso-8859-1, latin-1.
    Returns a tuple of (file_object, encoding_used). Caller is responsible to close file.
    """
    encodings = ["utf-8", "utf-8-sig", "cp1252", "windows-1252", "cp1250", "iso-8859-1", "latin-1"]
    last_error = None
    for enc in encodings:
        try:
            f = open(file_path, 'r', encoding=enc, errors='strict', newline='')
            # Try reading a tiny chunk to force decoding errors early
            _ = f.read(1024)
            f.seek(0)
            logger.info("csv_open_encoding_selected", file=file_path, encoding=enc)
            return f, enc
        except Exception as e:
            last_error = e
            logger.warning("encoding_fallback_failed", file=file_path, encoding=enc, error=str(e))
            continue
    error_msg = f"Unable to open file '{file_path}' with any of the supported encodings: {', '.join(encodings)}"
    if last_error:
        error_msg += f". Last error: {str(last_error)}"
    raise RuntimeError(error_msg)
 # Configure Jinja2 templates
 templates = Jinja2Templates(directory="app/templates")
@@ -218,20 +246,37 @@ def get_import_type_from_filename(filename: str) -> str:
        Import type string (client, phone, case, transaction, document, payment)
    """
    filename_upper = filename.upper()
    # Strip extension and normalize
    base = filename_upper.rsplit('.', 1)[0]
-    if filename_upper.startswith('ROLODEX') or filename_upper.startswith('ROLEX'):
+    # Support files saved with explicit type prefixes (e.g., CLIENT_<uuid>.csv)
    if base.startswith('CLIENT_'):
        return 'client'
-    elif filename_upper.startswith('PHONE'):
+    if base.startswith('PHONE_'):
        return 'phone'
-    elif filename_upper.startswith('FILES'):
+    if base.startswith('CASE_'):
        return 'case'
-    elif filename_upper.startswith('LEDGER'):
+    if base.startswith('TRANSACTION_'):
        return 'transaction'
-    elif filename_upper.startswith('QDROS') or filename_upper.startswith('QDRO'):
+    if base.startswith('DOCUMENT_'):
        return 'document'
-    elif filename_upper.startswith('PAYMENTS') or filename_upper.startswith('DEPOSITS'):
+    if base.startswith('PAYMENT_'):
        return 'payment'
-    else:
+
    # Legacy/real file name patterns
    if base.startswith('ROLODEX') or base.startswith('ROLEX') or 'ROLODEX' in base or 'ROLEX' in base:
        return 'client'
    if base.startswith('PHONE') or 'PHONE' in base:
        return 'phone'
    if base.startswith('FILES') or base.startswith('FILE') or 'FILES' in base:
        return 'case'
    if base.startswith('LEDGER') or 'LEDGER' in base or base.startswith('TRNSACTN') or 'TRNSACTN' in base:
        return 'transaction'
    if base.startswith('QDROS') or base.startswith('QDRO') or 'QDRO' in base:
        return 'document'
    if base.startswith('PAYMENTS') or base.startswith('DEPOSITS') or 'PAYMENT' in base or 'DEPOSIT' in base:
        return 'payment'
    raise ValueError(f"Unknown file type for filename: {filename}")
@@ -370,7 +415,8 @@ def import_rolodex_data(db: Session, file_path: str) -> Dict[str, Any]:
    }
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
        with f as file:
            reader = csv.DictReader(file)
            # Validate headers
@@ -418,6 +464,7 @@ def import_rolodex_data(db: Session, file_path: str) -> Dict[str, Any]:
        db.commit()
    except Exception as e:
        logger.error("rolodex_import_failed", file=file_path, error=str(e))
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
@@ -436,9 +483,10 @@ def import_phone_data(db: Session, file_path: str) -> Dict[str, Any]:
        'total_rows': 0
    }
    f = None
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
-            reader = csv.DictReader(file)
+        reader = csv.DictReader(f)
        headers = reader.fieldnames or []
        if len(headers) < 2:
@@ -482,6 +530,9 @@ def import_phone_data(db: Session, file_path: str) -> Dict[str, Any]:
    except Exception as e:
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
    finally:
        if f:
            f.close()
    return result
@@ -530,9 +581,10 @@ def import_files_data(db: Session, file_path: str) -> Dict[str, Any]:
        'Memo': 'Memo'
    }
    f = None
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
-            reader = csv.DictReader(file)
+        reader = csv.DictReader(f)
        headers = reader.fieldnames or []
        validation = validate_csv_headers(headers, expected_fields)
@@ -586,6 +638,9 @@ def import_files_data(db: Session, file_path: str) -> Dict[str, Any]:
    except Exception as e:
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
    finally:
        if f:
            f.close()
    return result
@@ -602,9 +657,10 @@ def import_ledger_data(db: Session, file_path: str) -> Dict[str, Any]:
        'total_rows': 0
    }
    f = None
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
-            reader = csv.DictReader(file)
+        reader = csv.DictReader(f)
        headers = reader.fieldnames or []
        if len(headers) < 3:
@@ -677,6 +733,9 @@ def import_ledger_data(db: Session, file_path: str) -> Dict[str, Any]:
    except Exception as e:
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
    finally:
        if f:
            f.close()
    return result
@@ -693,9 +752,10 @@ def import_qdros_data(db: Session, file_path: str) -> Dict[str, Any]:
        'total_rows': 0
    }
    f = None
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
-            reader = csv.DictReader(file)
+        reader = csv.DictReader(f)
        headers = reader.fieldnames or []
        if len(headers) < 2:
@@ -736,6 +796,9 @@ def import_qdros_data(db: Session, file_path: str) -> Dict[str, Any]:
    except Exception as e:
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
    finally:
        if f:
            f.close()
    return result
@@ -752,9 +815,10 @@ def import_payments_data(db: Session, file_path: str) -> Dict[str, Any]:
        'total_rows': 0
    }
    f = None
    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
+        f, used_encoding = open_text_with_fallbacks(file_path)
-            reader = csv.DictReader(file)
+        reader = csv.DictReader(f)
        headers = reader.fieldnames or []
        if len(headers) < 2:
@@ -801,6 +865,9 @@ def import_payments_data(db: Session, file_path: str) -> Dict[str, Any]:
    except Exception as e:
        result['errors'].append(f"Import failed: {str(e)}")
        db.rollback()
    finally:
        if f:
            f.close()
    return result
@@ -1435,7 +1502,14 @@ async def admin_upload_files(
            # Generate unique filename to avoid conflicts
            file_id = str(uuid.uuid4())
            file_ext = os.path.splitext(file.filename)[1]
-            stored_filename = f"{file_id}{file_ext}"
+            # Determine import type from original filename for better categorization later
            try:
                detected_type = get_import_type_from_filename(file.filename)
            except ValueError:
                detected_type = 'unknown'
            # Prefix stored filename with detected type to preserve context
            stored_filename = f"{detected_type}_{file_id}{file_ext}"
            file_path = os.path.join(import_dir, stored_filename)
            # Save file
@@ -1443,14 +1517,8 @@ async def admin_upload_files(
            with open(file_path, "wb") as f:
                f.write(contents)
-            # Determine import type from filename
+            # Use detected type (already derived from original name)
-            try:
+            import_type = detected_type
                import_type = get_import_type_from_filename(file.filename)
            except ValueError as e:
                errors.append(f"File '{file.filename}': {str(e)}")
                # Clean up uploaded file
                os.remove(file_path)
                continue
            results.append({
                'filename': file.filename,
--- a/app/templates/admin.html
+++ b/app/templates/admin.html
@@ -75,6 +75,7 @@
                                    <thead>
                                        <tr>
                                            <th>Original Filename</th>
                                            <th>Stored Filename</th>
                                            <th>Import Type</th>
                                            <th>Size</th>
                                            <th>Status</th>
@@ -83,7 +84,16 @@
                                    <tbody>
                                        {% for result in upload_results %}
                                        <tr>
-                                            <td>{{ result.filename }}</td>
+                                            <td>
                                                <strong>{{ result.filename }}</strong>
                                                <br>
                                                <small class="text-muted">Original name</small>
                                            </td>
                                            <td>
                                                <code class="small">{{ result.stored_filename }}</code>
                                                <br>
                                                <small class="text-muted">Stored as</small>
                                            </td>
                                            <td>
                                                <span class="badge bg-primary">{{ result.import_type }}</span>
                                            </td>
@@ -148,12 +158,18 @@
                                <div class="card-body">
                                    <form action="/admin/import/{{ import_type }}" method="post">
                                        <div class="mb-3">
-                                            <label class="form-label">Available Files:</label>
+                                            <div class="d-flex justify-content-between align-items-center mb-2">
                                                <label class="form-label mb-0">Available Files:</label>
                                                <button type="button" class="btn btn-outline-primary btn-sm select-all-btn"
                                                        data-import-type="{{ import_type }}">
                                                    <i class="bi bi-check-all me-1"></i>Select All
                                                </button>
                                            </div>
                                            <div class="list-group">
                                                {% for file in files %}
                                                <label class="list-group-item d-flex justify-content-between align-items-center">
                                                    <div>
-                                                        <input class="form-check-input me-2" type="checkbox"
+                                                        <input class="form-check-input me-2 file-checkbox" type="checkbox"
                                                               name="selected_files" value="{{ file.filename }}" id="{{ file.filename }}">
                                                        <small class="text-muted">{{ file.filename }}</small>
                                                        <br>
@@ -350,16 +366,54 @@ document.addEventListener('DOMContentLoaded', function() {
    // Start refresh cycle if there are running imports
    refreshRunningImports();
    // Select All functionality
    document.querySelectorAll('.select-all-btn').forEach(button => {
        button.addEventListener('click', function() {
            const importType = this.getAttribute('data-import-type');
            const form = this.closest('form');
            const checkboxes = form.querySelectorAll('.file-checkbox');
            const submitBtn = form.querySelector('button[type="submit"]');
            // Toggle all checkboxes in this form
            const allChecked = Array.from(checkboxes).every(cb => cb.checked);
            checkboxes.forEach(checkbox => {
                checkbox.checked = !allChecked;
            });
            // Update button text
            this.innerHTML = allChecked ?
                '<i class="bi bi-check-all me-1"></i>Select All' :
                '<i class="bi bi-dash-square me-1"></i>Deselect All';
            // Update submit button state
            const hasSelection = Array.from(checkboxes).some(cb => cb.checked);
            submitBtn.disabled = !hasSelection;
        });
    });
    // File selection helpers
-    document.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
+    document.querySelectorAll('.file-checkbox').forEach(checkbox => {
        checkbox.addEventListener('change', function() {
            const form = this.closest('form');
-            const checkboxes = form.querySelectorAll('input[name="selected_files"]');
+            const checkboxes = form.querySelectorAll('.file-checkbox');
            const submitBtn = form.querySelector('button[type="submit"]');
            const selectAllBtn = form.querySelector('.select-all-btn');
            // Enable/disable submit button based on selection
            const hasSelection = Array.from(checkboxes).some(cb => cb.checked);
            submitBtn.disabled = !hasSelection;
            // Update select all button state
            const allChecked = Array.from(checkboxes).every(cb => cb.checked);
            const noneChecked = Array.from(checkboxes).every(cb => !cb.checked);
            if (allChecked) {
                selectAllBtn.innerHTML = '<i class="bi bi-dash-square me-1"></i>Deselect All';
            } else if (noneChecked) {
                selectAllBtn.innerHTML = '<i class="bi bi-check-all me-1"></i>Select All';
            } else {
                selectAllBtn.innerHTML = '<i class="bi bi-check-square me-1"></i>Select All';
            }
        });
    });