From fdcff9fbb2b2c43bf806de40ab7a7299f5984f4c Mon Sep 17 00:00:00 2001 From: HotSwapp <47397945+HotSwapp@users.noreply.github.com> Date: Tue, 7 Oct 2025 22:25:34 -0500 Subject: [PATCH] Expand encoding fallback to handle more legacy CSV encodings - Added windows-1252, cp1250, iso-8859-1 to encoding fallback list - Enhanced error logging in open_text_with_fallbacks function - Improved error messages to show all attempted encodings - Added warning logs for each encoding attempt that fails This should resolve 'charmap' codec errors and other encoding issues with legacy CSV files that use different Windows codepages or ISO encodings. --- app/main.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/app/main.py b/app/main.py index 9866ac0..6c3533c 100644 --- a/app/main.py +++ b/app/main.py @@ -65,11 +65,11 @@ def open_text_with_fallbacks(file_path: str): """ Open a text file trying multiple encodings commonly seen in legacy CSVs. - Attempts in order: utf-8, utf-8-sig, cp1252, latin-1. + Attempts in order: utf-8, utf-8-sig, cp1252, windows-1252, cp1250, iso-8859-1, latin-1. Returns a tuple of (file_object, encoding_used). Caller is responsible to close file. """ - encodings = ["utf-8", "utf-8-sig", "cp1252", "latin-1"] + encodings = ["utf-8", "utf-8-sig", "cp1252", "windows-1252", "cp1250", "iso-8859-1", "latin-1"] last_error = None for enc in encodings: try: @@ -81,8 +81,13 @@ def open_text_with_fallbacks(file_path: str): return f, enc except Exception as e: last_error = e + logger.warning("encoding_fallback_failed", file=file_path, encoding=enc, error=str(e)) continue - raise last_error if last_error else RuntimeError("Unable to open file with known encodings") + + error_msg = f"Unable to open file '{file_path}' with any of the supported encodings: {', '.join(encodings)}" + if last_error: + error_msg += f". Last error: {str(last_error)}" + raise RuntimeError(error_msg) # Configure Jinja2 templates