Fix UTF-8 encoding issue in CSV imports

- Updated open_text_with_fallbacks() in both main.py and import_legacy.py - Increased fallback read size from 1KB to 10KB to catch encoding issues deeper in files - Added proper fallback mechanism to main.py version of the function - This fixes the 'utf-8' codec can't decode byte 0xa9 error when uploading planinfo.csv files
2025-10-13 19:49:18 -05:00
parent 84c3dac83a
commit b6c09dc836
2 changed files with 124 additions and 11 deletions
--- a/app/import_legacy.py
+++ b/app/import_legacy.py
@@ -62,7 +62,7 @@ def open_text_with_fallbacks(file_path: str):
    try:
        # Try UTF-8 with error replacement first (most common case)
        f = open(file_path, 'r', encoding='utf-8', errors='replace', newline='')
-        _ = f.read(1024)
+        _ = f.read(10240)  # Read 10KB to catch encoding issues deeper in the file
        f.seek(0)
        logger.info("csv_open_encoding_with_replace", file=file_path, encoding="utf-8-replace")
        return f, "utf-8-replace"
@@ -72,7 +72,7 @@ def open_text_with_fallbacks(file_path: str):
    # Final fallback: use latin-1 with replace (handles any byte sequence)
    try:
        f = open(file_path, 'r', encoding='latin-1', errors='replace', newline='')
-        _ = f.read(1024)
+        _ = f.read(10240)  # Read 10KB to catch encoding issues deeper in the file
        f.seek(0)
        logger.info("csv_open_encoding_fallback", file=file_path, encoding="latin-1-replace")
        return f, "latin-1-replace"