Fix UTF-8 encoding issue in CSV imports

- Updated open_text_with_fallbacks() in both main.py and import_legacy.py - Increased fallback read size from 1KB to 10KB to catch encoding issues deeper in files - Added proper fallback mechanism to main.py version of the function - This fixes the 'utf-8' codec can't decode byte 0xa9 error when uploading planinfo.csv files
2025-10-13 19:49:18 -05:00
parent 84c3dac83a
commit b6c09dc836
2 changed files with 124 additions and 11 deletions
--- a/app/import_legacy.py
+++ b/app/import_legacy.py
@@ -62,7 +62,7 @@ def open_text_with_fallbacks(file_path: str):
    try:
        # Try UTF-8 with error replacement first (most common case)
        f = open(file_path, 'r', encoding='utf-8', errors='replace', newline='')
-        _ = f.read(1024)
+        _ = f.read(10240)  # Read 10KB to catch encoding issues deeper in the file
        f.seek(0)
        logger.info("csv_open_encoding_with_replace", file=file_path, encoding="utf-8-replace")
        return f, "utf-8-replace"
@@ -72,7 +72,7 @@ def open_text_with_fallbacks(file_path: str):
    # Final fallback: use latin-1 with replace (handles any byte sequence)
    try:
        f = open(file_path, 'r', encoding='latin-1', errors='replace', newline='')
-        _ = f.read(1024)
+        _ = f.read(10240)  # Read 10KB to catch encoding issues deeper in the file
        f.seek(0)
        logger.info("csv_open_encoding_fallback", file=file_path, encoding="latin-1-replace")
        return f, "latin-1-replace"
--- a/app/main.py
+++ b/app/main.py
@@ -22,7 +22,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from sqlalchemy.orm import Session, joinedload
-from sqlalchemy import or_, and_, func as sa_func
+from sqlalchemy import or_, and_, func as sa_func, select
 from dotenv import load_dotenv
 from starlette.middleware.base import BaseHTTPMiddleware
 import structlog
@@ -92,7 +92,29 @@ def open_text_with_fallbacks(file_path: str):
                pass
            continue
-    error_msg = f"Unable to open file '{file_path}' with any of the supported encodings: {', '.join(encodings)}"
+    # If strict mode fails, try with error replacement for robustness
    logger.warning("strict_encoding_failed", file=file_path, trying_with_replace=True)
    try:
        # Try UTF-8 with error replacement first (most common case)
        f = open(file_path, 'r', encoding='utf-8', errors='replace', newline='')
        _ = f.read(10240)  # Read 10KB to catch encoding issues deeper in the file
        f.seek(0)
        logger.info("csv_open_encoding_with_replace", file=file_path, encoding="utf-8-replace")
        return f, "utf-8-replace"
    except Exception as e:
        logger.warning("utf8_replace_failed", file=file_path, error=str(e))
    # Final fallback: use latin-1 with replace (handles any byte sequence)
    try:
        f = open(file_path, 'r', encoding='latin-1', errors='replace', newline='')
        _ = f.read(10240)  # Read 10KB to catch encoding issues deeper in the file
        f.seek(0)
        logger.info("csv_open_encoding_fallback", file=file_path, encoding="latin-1-replace")
        return f, "latin-1-replace"
    except Exception as e:
        last_error = e
    error_msg = f"Unable to open file '{file_path}' with any supported encodings"
    if last_error:
        error_msg += f". Last error: {str(last_error)}"
    raise RuntimeError(error_msg)
@@ -2636,6 +2658,8 @@ async def rolodex_list(
    phone: str | None = Query(None, description="Search by phone contains"),
    page: int = Query(1, ge=1, description="Page number (1-indexed)"),
    page_size: int = Query(20, ge=1, le=100, description="Results per page"),
    sort_key: str | None = Query(None, description="Sort column key"),
    sort_dir: str | None = Query(None, description="Sort direction (asc/desc)"),
    db: Session = Depends(get_db),
 ):
    """
@@ -2647,6 +2671,44 @@ async def rolodex_list(
    if not user:
        return RedirectResponse(url="/login", status_code=302)
    allowed_sort_keys = {
        "name": "Name",
        "company": "Company",
        "address": "Address",
        "city": "City",
        "state": "State",
        "zip": "ZIP",
        "phones": "Phones",
        "updated": "Updated",
    }
    sort_defaults = {
        "name": "asc",
        "company": "asc",
        "address": "asc",
        "city": "asc",
        "state": "asc",
        "zip": "asc",
        "phones": "asc",
        "updated": "desc",
    }
    session_sort = request.session.get("rolodex_sort") or {}
    if not isinstance(session_sort, dict):
        session_sort = {}
    raw_sort_key = sort_key.lower().strip() if isinstance(sort_key, str) else None
    chosen_sort_key = raw_sort_key or session_sort.get("key") or "name"
    if chosen_sort_key not in allowed_sort_keys:
        chosen_sort_key = "name"
    default_direction = sort_defaults[chosen_sort_key]
    raw_sort_dir = sort_dir.lower().strip() if isinstance(sort_dir, str) else None
    chosen_sort_dir = raw_sort_dir or session_sort.get("direction") or default_direction
    if chosen_sort_dir not in {"asc", "desc"}:
        chosen_sort_dir = default_direction
    request.session["rolodex_sort"] = {"key": chosen_sort_key, "direction": chosen_sort_dir}
    # Eager-load phones to avoid N+1 in template
    query = db.query(Client).options(joinedload(Client.phones))
@@ -2665,14 +2727,60 @@ async def rolodex_list(
        # Use EXISTS over join to avoid duplicate rows
        query = query.filter(Client.phones.any(Phone.phone_number.ilike(like_phone)))
-    # Order by last then first for stable display (SQLite-safe nulls last)
+    phone_sort_expr = (
-    # SQLite does not support "NULLS LAST"; emulate by sorting non-nulls first, then value
+        select(sa_func.min(Phone.phone_number))
-    query = query.order_by(
+        .where(Phone.client_id == Client.id)
-        Client.last_name.is_(None),
+        .correlate(Client)
-        Client.last_name.asc(),
+        .scalar_subquery()
        Client.first_name.is_(None),
        Client.first_name.asc(),
    )
    updated_sort_expr = sa_func.coalesce(Client.updated_at, Client.created_at)
    order_map: dict[str, dict[str, list[Any]]] = {
        "name": {
            "asc": [
                Client.last_name.is_(None),
                Client.last_name.asc(),
                Client.first_name.is_(None),
                Client.first_name.asc(),
            ],
            "desc": [
                Client.last_name.is_(None),
                Client.last_name.desc(),
                Client.first_name.is_(None),
                Client.first_name.desc(),
            ],
        },
        "company": {
            "asc": [Client.company.is_(None), Client.company.asc()],
            "desc": [Client.company.is_(None), Client.company.desc()],
        },
        "address": {
            "asc": [Client.address.is_(None), Client.address.asc()],
            "desc": [Client.address.is_(None), Client.address.desc()],
        },
        "city": {
            "asc": [Client.city.is_(None), Client.city.asc()],
            "desc": [Client.city.is_(None), Client.city.desc()],
        },
        "state": {
            "asc": [Client.state.is_(None), Client.state.asc()],
            "desc": [Client.state.is_(None), Client.state.desc()],
        },
        "zip": {
            "asc": [Client.zip_code.is_(None), Client.zip_code.asc()],
            "desc": [Client.zip_code.is_(None), Client.zip_code.desc()],
        },
        "phones": {
            "asc": [phone_sort_expr.is_(None), phone_sort_expr.asc()],
            "desc": [phone_sort_expr.is_(None), phone_sort_expr.desc()],
        },
        "updated": {
            "asc": [updated_sort_expr.is_(None), updated_sort_expr.asc()],
            "desc": [updated_sort_expr.is_(None), updated_sort_expr.desc()],
        },
    }
    query = query.order_by(*order_map[chosen_sort_key][chosen_sort_dir])
    total: int = query.count()
    total_pages: int = (total + page_size - 1) // page_size if total > 0 else 1
@@ -2693,6 +2801,8 @@ async def rolodex_list(
        page=page,
        page_size=page_size,
        total=total,
        sort_key=chosen_sort_key,
        sort_dir=chosen_sort_dir,
    )
    return templates.TemplateResponse(
@@ -2711,6 +2821,9 @@ async def rolodex_list(
            "start_index": (offset + 1) if total > 0 else 0,
            "end_index": min(offset + len(clients), total),
            "enable_bulk": True,
            "sort_key": chosen_sort_key,
            "sort_dir": chosen_sort_dir,
            "sort_labels": allowed_sort_keys,
        },
    )