Fix UTF-8 encoding issue in CSV imports

- Updated open_text_with_fallbacks() in both main.py and import_legacy.py
- Increased fallback read size from 1KB to 10KB to catch encoding issues deeper in files
- Added proper fallback mechanism to main.py version of the function
- This fixes the 'utf-8' codec can't decode byte 0xa9 error when uploading planinfo.csv files
This commit is contained in:
HotSwapp
2025-10-13 19:49:18 -05:00
parent 84c3dac83a
commit b6c09dc836
2 changed files with 124 additions and 11 deletions

View File

@@ -62,7 +62,7 @@ def open_text_with_fallbacks(file_path: str):
try: try:
# Try UTF-8 with error replacement first (most common case) # Try UTF-8 with error replacement first (most common case)
f = open(file_path, 'r', encoding='utf-8', errors='replace', newline='') f = open(file_path, 'r', encoding='utf-8', errors='replace', newline='')
_ = f.read(1024) _ = f.read(10240) # Read 10KB to catch encoding issues deeper in the file
f.seek(0) f.seek(0)
logger.info("csv_open_encoding_with_replace", file=file_path, encoding="utf-8-replace") logger.info("csv_open_encoding_with_replace", file=file_path, encoding="utf-8-replace")
return f, "utf-8-replace" return f, "utf-8-replace"
@@ -72,7 +72,7 @@ def open_text_with_fallbacks(file_path: str):
# Final fallback: use latin-1 with replace (handles any byte sequence) # Final fallback: use latin-1 with replace (handles any byte sequence)
try: try:
f = open(file_path, 'r', encoding='latin-1', errors='replace', newline='') f = open(file_path, 'r', encoding='latin-1', errors='replace', newline='')
_ = f.read(1024) _ = f.read(10240) # Read 10KB to catch encoding issues deeper in the file
f.seek(0) f.seek(0)
logger.info("csv_open_encoding_fallback", file=file_path, encoding="latin-1-replace") logger.info("csv_open_encoding_fallback", file=file_path, encoding="latin-1-replace")
return f, "latin-1-replace" return f, "latin-1-replace"

View File

@@ -22,7 +22,7 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from sqlalchemy.orm import Session, joinedload from sqlalchemy.orm import Session, joinedload
from sqlalchemy import or_, and_, func as sa_func from sqlalchemy import or_, and_, func as sa_func, select
from dotenv import load_dotenv from dotenv import load_dotenv
from starlette.middleware.base import BaseHTTPMiddleware from starlette.middleware.base import BaseHTTPMiddleware
import structlog import structlog
@@ -92,7 +92,29 @@ def open_text_with_fallbacks(file_path: str):
pass pass
continue continue
error_msg = f"Unable to open file '{file_path}' with any of the supported encodings: {', '.join(encodings)}" # If strict mode fails, try with error replacement for robustness
logger.warning("strict_encoding_failed", file=file_path, trying_with_replace=True)
try:
# Try UTF-8 with error replacement first (most common case)
f = open(file_path, 'r', encoding='utf-8', errors='replace', newline='')
_ = f.read(10240) # Read 10KB to catch encoding issues deeper in the file
f.seek(0)
logger.info("csv_open_encoding_with_replace", file=file_path, encoding="utf-8-replace")
return f, "utf-8-replace"
except Exception as e:
logger.warning("utf8_replace_failed", file=file_path, error=str(e))
# Final fallback: use latin-1 with replace (handles any byte sequence)
try:
f = open(file_path, 'r', encoding='latin-1', errors='replace', newline='')
_ = f.read(10240) # Read 10KB to catch encoding issues deeper in the file
f.seek(0)
logger.info("csv_open_encoding_fallback", file=file_path, encoding="latin-1-replace")
return f, "latin-1-replace"
except Exception as e:
last_error = e
error_msg = f"Unable to open file '{file_path}' with any supported encodings"
if last_error: if last_error:
error_msg += f". Last error: {str(last_error)}" error_msg += f". Last error: {str(last_error)}"
raise RuntimeError(error_msg) raise RuntimeError(error_msg)
@@ -2636,6 +2658,8 @@ async def rolodex_list(
phone: str | None = Query(None, description="Search by phone contains"), phone: str | None = Query(None, description="Search by phone contains"),
page: int = Query(1, ge=1, description="Page number (1-indexed)"), page: int = Query(1, ge=1, description="Page number (1-indexed)"),
page_size: int = Query(20, ge=1, le=100, description="Results per page"), page_size: int = Query(20, ge=1, le=100, description="Results per page"),
sort_key: str | None = Query(None, description="Sort column key"),
sort_dir: str | None = Query(None, description="Sort direction (asc/desc)"),
db: Session = Depends(get_db), db: Session = Depends(get_db),
): ):
""" """
@@ -2647,6 +2671,44 @@ async def rolodex_list(
if not user: if not user:
return RedirectResponse(url="/login", status_code=302) return RedirectResponse(url="/login", status_code=302)
allowed_sort_keys = {
"name": "Name",
"company": "Company",
"address": "Address",
"city": "City",
"state": "State",
"zip": "ZIP",
"phones": "Phones",
"updated": "Updated",
}
sort_defaults = {
"name": "asc",
"company": "asc",
"address": "asc",
"city": "asc",
"state": "asc",
"zip": "asc",
"phones": "asc",
"updated": "desc",
}
session_sort = request.session.get("rolodex_sort") or {}
if not isinstance(session_sort, dict):
session_sort = {}
raw_sort_key = sort_key.lower().strip() if isinstance(sort_key, str) else None
chosen_sort_key = raw_sort_key or session_sort.get("key") or "name"
if chosen_sort_key not in allowed_sort_keys:
chosen_sort_key = "name"
default_direction = sort_defaults[chosen_sort_key]
raw_sort_dir = sort_dir.lower().strip() if isinstance(sort_dir, str) else None
chosen_sort_dir = raw_sort_dir or session_sort.get("direction") or default_direction
if chosen_sort_dir not in {"asc", "desc"}:
chosen_sort_dir = default_direction
request.session["rolodex_sort"] = {"key": chosen_sort_key, "direction": chosen_sort_dir}
# Eager-load phones to avoid N+1 in template # Eager-load phones to avoid N+1 in template
query = db.query(Client).options(joinedload(Client.phones)) query = db.query(Client).options(joinedload(Client.phones))
@@ -2665,14 +2727,60 @@ async def rolodex_list(
# Use EXISTS over join to avoid duplicate rows # Use EXISTS over join to avoid duplicate rows
query = query.filter(Client.phones.any(Phone.phone_number.ilike(like_phone))) query = query.filter(Client.phones.any(Phone.phone_number.ilike(like_phone)))
# Order by last then first for stable display (SQLite-safe nulls last) phone_sort_expr = (
# SQLite does not support "NULLS LAST"; emulate by sorting non-nulls first, then value select(sa_func.min(Phone.phone_number))
query = query.order_by( .where(Phone.client_id == Client.id)
Client.last_name.is_(None), .correlate(Client)
Client.last_name.asc(), .scalar_subquery()
Client.first_name.is_(None),
Client.first_name.asc(),
) )
updated_sort_expr = sa_func.coalesce(Client.updated_at, Client.created_at)
order_map: dict[str, dict[str, list[Any]]] = {
"name": {
"asc": [
Client.last_name.is_(None),
Client.last_name.asc(),
Client.first_name.is_(None),
Client.first_name.asc(),
],
"desc": [
Client.last_name.is_(None),
Client.last_name.desc(),
Client.first_name.is_(None),
Client.first_name.desc(),
],
},
"company": {
"asc": [Client.company.is_(None), Client.company.asc()],
"desc": [Client.company.is_(None), Client.company.desc()],
},
"address": {
"asc": [Client.address.is_(None), Client.address.asc()],
"desc": [Client.address.is_(None), Client.address.desc()],
},
"city": {
"asc": [Client.city.is_(None), Client.city.asc()],
"desc": [Client.city.is_(None), Client.city.desc()],
},
"state": {
"asc": [Client.state.is_(None), Client.state.asc()],
"desc": [Client.state.is_(None), Client.state.desc()],
},
"zip": {
"asc": [Client.zip_code.is_(None), Client.zip_code.asc()],
"desc": [Client.zip_code.is_(None), Client.zip_code.desc()],
},
"phones": {
"asc": [phone_sort_expr.is_(None), phone_sort_expr.asc()],
"desc": [phone_sort_expr.is_(None), phone_sort_expr.desc()],
},
"updated": {
"asc": [updated_sort_expr.is_(None), updated_sort_expr.asc()],
"desc": [updated_sort_expr.is_(None), updated_sort_expr.desc()],
},
}
query = query.order_by(*order_map[chosen_sort_key][chosen_sort_dir])
total: int = query.count() total: int = query.count()
total_pages: int = (total + page_size - 1) // page_size if total > 0 else 1 total_pages: int = (total + page_size - 1) // page_size if total > 0 else 1
@@ -2693,6 +2801,8 @@ async def rolodex_list(
page=page, page=page,
page_size=page_size, page_size=page_size,
total=total, total=total,
sort_key=chosen_sort_key,
sort_dir=chosen_sort_dir,
) )
return templates.TemplateResponse( return templates.TemplateResponse(
@@ -2711,6 +2821,9 @@ async def rolodex_list(
"start_index": (offset + 1) if total > 0 else 0, "start_index": (offset + 1) if total > 0 else 0,
"end_index": min(offset + len(clients), total), "end_index": min(offset + len(clients), total),
"enable_bulk": True, "enable_bulk": True,
"sort_key": chosen_sort_key,
"sort_dir": chosen_sort_dir,
"sort_labels": allowed_sort_keys,
}, },
) )