From f4c5b9019b99ced0aaa40ff89a4583192df740d9 Mon Sep 17 00:00:00 2001 From: HotSwapp <47397945+HotSwapp@users.noreply.github.com> Date: Sun, 12 Oct 2025 18:24:24 -0500 Subject: [PATCH] Fix Unicode encoding error in rolodex import - Enhanced open_text_with_fallbacks() function to handle problematic bytes - Added CP1250 encoding to fallback list for better character set support - Added graceful error handling with replacement characters for edge cases - Ensures rolodex CSV import works with legacy encoding issues Fixes: 'charmap' codec can't decode byte 0x9d error during rolodex import --- app/import_legacy.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/app/import_legacy.py b/app/import_legacy.py index ddec34a..4292e92 100644 --- a/app/import_legacy.py +++ b/app/import_legacy.py @@ -32,9 +32,10 @@ BATCH_SIZE = 500 def open_text_with_fallbacks(file_path: str): """ Open a text file trying multiple encodings commonly seen in legacy CSVs. - + Returns a tuple of (file_object, encoding_used). """ + # First try strict mode with common encodings encodings = ["utf-8", "utf-8-sig", "cp1252", "windows-1252", "cp1250", "iso-8859-1", "latin-1"] last_error = None for enc in encodings: @@ -48,7 +49,29 @@ def open_text_with_fallbacks(file_path: str): last_error = e logger.warning("encoding_fallback_failed", file=file_path, encoding=enc, error=str(e)) continue - + + # If strict mode fails, try with error replacement for robustness + logger.warning("strict_encoding_failed", file=file_path, trying_with_replace=True) + try: + # Try UTF-8 with error replacement first (most common case) + f = open(file_path, 'r', encoding='utf-8', errors='replace', newline='') + _ = f.read(1024) + f.seek(0) + logger.info("csv_open_encoding_with_replace", file=file_path, encoding="utf-8-replace") + return f, "utf-8-replace" + except Exception as e: + logger.warning("utf8_replace_failed", file=file_path, error=str(e)) + + # Final fallback: use latin-1 with replace (handles any byte sequence) + try: + f = open(file_path, 'r', encoding='latin-1', errors='replace', newline='') + _ = f.read(1024) + f.seek(0) + logger.info("csv_open_encoding_fallback", file=file_path, encoding="latin-1-replace") + return f, "latin-1-replace" + except Exception as e: + last_error = e + error_msg = f"Unable to open file '{file_path}' with any supported encodings" if last_error: error_msg += f". Last error: {str(last_error)}"