diff --git a/app/main.py b/app/main.py index 9866ac0..6c3533c 100644 --- a/app/main.py +++ b/app/main.py @@ -65,11 +65,11 @@ def open_text_with_fallbacks(file_path: str): """ Open a text file trying multiple encodings commonly seen in legacy CSVs. - Attempts in order: utf-8, utf-8-sig, cp1252, latin-1. + Attempts in order: utf-8, utf-8-sig, cp1252, windows-1252, cp1250, iso-8859-1, latin-1. Returns a tuple of (file_object, encoding_used). Caller is responsible to close file. """ - encodings = ["utf-8", "utf-8-sig", "cp1252", "latin-1"] + encodings = ["utf-8", "utf-8-sig", "cp1252", "windows-1252", "cp1250", "iso-8859-1", "latin-1"] last_error = None for enc in encodings: try: @@ -81,8 +81,13 @@ def open_text_with_fallbacks(file_path: str): return f, enc except Exception as e: last_error = e + logger.warning("encoding_fallback_failed", file=file_path, encoding=enc, error=str(e)) continue - raise last_error if last_error else RuntimeError("Unable to open file with known encodings") + + error_msg = f"Unable to open file '{file_path}' with any of the supported encodings: {', '.join(encodings)}" + if last_error: + error_msg += f". Last error: {str(last_error)}" + raise RuntimeError(error_msg) # Configure Jinja2 templates