Fix UTF-8 encoding issue in CSV imports

- Updated open_text_with_fallbacks() in both main.py and import_legacy.py
- Increased fallback read size from 1KB to 10KB to catch encoding issues deeper in files
- Added proper fallback mechanism to main.py version of the function
- This fixes the 'utf-8' codec can't decode byte 0xa9 error when uploading planinfo.csv files
This commit is contained in:
HotSwapp
2025-10-13 19:49:18 -05:00
parent 84c3dac83a
commit b6c09dc836
2 changed files with 124 additions and 11 deletions

View File

@@ -62,7 +62,7 @@ def open_text_with_fallbacks(file_path: str):
try:
# Try UTF-8 with error replacement first (most common case)
f = open(file_path, 'r', encoding='utf-8', errors='replace', newline='')
_ = f.read(1024)
_ = f.read(10240) # Read 10KB to catch encoding issues deeper in the file
f.seek(0)
logger.info("csv_open_encoding_with_replace", file=file_path, encoding="utf-8-replace")
return f, "utf-8-replace"
@@ -72,7 +72,7 @@ def open_text_with_fallbacks(file_path: str):
# Final fallback: use latin-1 with replace (handles any byte sequence)
try:
f = open(file_path, 'r', encoding='latin-1', errors='replace', newline='')
_ = f.read(1024)
_ = f.read(10240) # Read 10KB to catch encoding issues deeper in the file
f.seek(0)
logger.info("csv_open_encoding_fallback", file=file_path, encoding="latin-1-replace")
return f, "latin-1-replace"

View File

@@ -22,7 +22,7 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from sqlalchemy.orm import Session, joinedload
from sqlalchemy import or_, and_, func as sa_func
from sqlalchemy import or_, and_, func as sa_func, select
from dotenv import load_dotenv
from starlette.middleware.base import BaseHTTPMiddleware
import structlog
@@ -92,7 +92,29 @@ def open_text_with_fallbacks(file_path: str):
pass
continue
error_msg = f"Unable to open file '{file_path}' with any of the supported encodings: {', '.join(encodings)}"
# If strict mode fails, try with error replacement for robustness
logger.warning("strict_encoding_failed", file=file_path, trying_with_replace=True)
try:
# Try UTF-8 with error replacement first (most common case)
f = open(file_path, 'r', encoding='utf-8', errors='replace', newline='')
_ = f.read(10240) # Read 10KB to catch encoding issues deeper in the file
f.seek(0)
logger.info("csv_open_encoding_with_replace", file=file_path, encoding="utf-8-replace")
return f, "utf-8-replace"
except Exception as e:
logger.warning("utf8_replace_failed", file=file_path, error=str(e))
# Final fallback: use latin-1 with replace (handles any byte sequence)
try:
f = open(file_path, 'r', encoding='latin-1', errors='replace', newline='')
_ = f.read(10240) # Read 10KB to catch encoding issues deeper in the file
f.seek(0)
logger.info("csv_open_encoding_fallback", file=file_path, encoding="latin-1-replace")
return f, "latin-1-replace"
except Exception as e:
last_error = e
error_msg = f"Unable to open file '{file_path}' with any supported encodings"
if last_error:
error_msg += f". Last error: {str(last_error)}"
raise RuntimeError(error_msg)
@@ -2636,6 +2658,8 @@ async def rolodex_list(
phone: str | None = Query(None, description="Search by phone contains"),
page: int = Query(1, ge=1, description="Page number (1-indexed)"),
page_size: int = Query(20, ge=1, le=100, description="Results per page"),
sort_key: str | None = Query(None, description="Sort column key"),
sort_dir: str | None = Query(None, description="Sort direction (asc/desc)"),
db: Session = Depends(get_db),
):
"""
@@ -2647,6 +2671,44 @@ async def rolodex_list(
if not user:
return RedirectResponse(url="/login", status_code=302)
allowed_sort_keys = {
"name": "Name",
"company": "Company",
"address": "Address",
"city": "City",
"state": "State",
"zip": "ZIP",
"phones": "Phones",
"updated": "Updated",
}
sort_defaults = {
"name": "asc",
"company": "asc",
"address": "asc",
"city": "asc",
"state": "asc",
"zip": "asc",
"phones": "asc",
"updated": "desc",
}
session_sort = request.session.get("rolodex_sort") or {}
if not isinstance(session_sort, dict):
session_sort = {}
raw_sort_key = sort_key.lower().strip() if isinstance(sort_key, str) else None
chosen_sort_key = raw_sort_key or session_sort.get("key") or "name"
if chosen_sort_key not in allowed_sort_keys:
chosen_sort_key = "name"
default_direction = sort_defaults[chosen_sort_key]
raw_sort_dir = sort_dir.lower().strip() if isinstance(sort_dir, str) else None
chosen_sort_dir = raw_sort_dir or session_sort.get("direction") or default_direction
if chosen_sort_dir not in {"asc", "desc"}:
chosen_sort_dir = default_direction
request.session["rolodex_sort"] = {"key": chosen_sort_key, "direction": chosen_sort_dir}
# Eager-load phones to avoid N+1 in template
query = db.query(Client).options(joinedload(Client.phones))
@@ -2665,14 +2727,60 @@ async def rolodex_list(
# Use EXISTS over join to avoid duplicate rows
query = query.filter(Client.phones.any(Phone.phone_number.ilike(like_phone)))
# Order by last then first for stable display (SQLite-safe nulls last)
# SQLite does not support "NULLS LAST"; emulate by sorting non-nulls first, then value
query = query.order_by(
Client.last_name.is_(None),
Client.last_name.asc(),
Client.first_name.is_(None),
Client.first_name.asc(),
phone_sort_expr = (
select(sa_func.min(Phone.phone_number))
.where(Phone.client_id == Client.id)
.correlate(Client)
.scalar_subquery()
)
updated_sort_expr = sa_func.coalesce(Client.updated_at, Client.created_at)
order_map: dict[str, dict[str, list[Any]]] = {
"name": {
"asc": [
Client.last_name.is_(None),
Client.last_name.asc(),
Client.first_name.is_(None),
Client.first_name.asc(),
],
"desc": [
Client.last_name.is_(None),
Client.last_name.desc(),
Client.first_name.is_(None),
Client.first_name.desc(),
],
},
"company": {
"asc": [Client.company.is_(None), Client.company.asc()],
"desc": [Client.company.is_(None), Client.company.desc()],
},
"address": {
"asc": [Client.address.is_(None), Client.address.asc()],
"desc": [Client.address.is_(None), Client.address.desc()],
},
"city": {
"asc": [Client.city.is_(None), Client.city.asc()],
"desc": [Client.city.is_(None), Client.city.desc()],
},
"state": {
"asc": [Client.state.is_(None), Client.state.asc()],
"desc": [Client.state.is_(None), Client.state.desc()],
},
"zip": {
"asc": [Client.zip_code.is_(None), Client.zip_code.asc()],
"desc": [Client.zip_code.is_(None), Client.zip_code.desc()],
},
"phones": {
"asc": [phone_sort_expr.is_(None), phone_sort_expr.asc()],
"desc": [phone_sort_expr.is_(None), phone_sort_expr.desc()],
},
"updated": {
"asc": [updated_sort_expr.is_(None), updated_sort_expr.asc()],
"desc": [updated_sort_expr.is_(None), updated_sort_expr.desc()],
},
}
query = query.order_by(*order_map[chosen_sort_key][chosen_sort_dir])
total: int = query.count()
total_pages: int = (total + page_size - 1) // page_size if total > 0 else 1
@@ -2693,6 +2801,8 @@ async def rolodex_list(
page=page,
page_size=page_size,
total=total,
sort_key=chosen_sort_key,
sort_dir=chosen_sort_dir,
)
return templates.TemplateResponse(
@@ -2711,6 +2821,9 @@ async def rolodex_list(
"start_index": (offset + 1) if total > 0 else 0,
"end_index": min(offset + len(clients), total),
"enable_bulk": True,
"sort_key": chosen_sort_key,
"sort_dir": chosen_sort_dir,
"sort_labels": allowed_sort_keys,
},
)