Fix Unicode encoding error in rolodex import
- Enhanced open_text_with_fallbacks() function to handle problematic bytes - Added CP1250 encoding to fallback list for better character set support - Added graceful error handling with replacement characters for edge cases - Ensures rolodex CSV import works with legacy encoding issues Fixes: 'charmap' codec can't decode byte 0x9d error during rolodex import
This commit is contained in:
@@ -35,6 +35,7 @@ def open_text_with_fallbacks(file_path: str):
|
||||
|
||||
Returns a tuple of (file_object, encoding_used).
|
||||
"""
|
||||
# First try strict mode with common encodings
|
||||
encodings = ["utf-8", "utf-8-sig", "cp1252", "windows-1252", "cp1250", "iso-8859-1", "latin-1"]
|
||||
last_error = None
|
||||
for enc in encodings:
|
||||
@@ -49,6 +50,28 @@ def open_text_with_fallbacks(file_path: str):
|
||||
logger.warning("encoding_fallback_failed", file=file_path, encoding=enc, error=str(e))
|
||||
continue
|
||||
|
||||
# If strict mode fails, try with error replacement for robustness
|
||||
logger.warning("strict_encoding_failed", file=file_path, trying_with_replace=True)
|
||||
try:
|
||||
# Try UTF-8 with error replacement first (most common case)
|
||||
f = open(file_path, 'r', encoding='utf-8', errors='replace', newline='')
|
||||
_ = f.read(1024)
|
||||
f.seek(0)
|
||||
logger.info("csv_open_encoding_with_replace", file=file_path, encoding="utf-8-replace")
|
||||
return f, "utf-8-replace"
|
||||
except Exception as e:
|
||||
logger.warning("utf8_replace_failed", file=file_path, error=str(e))
|
||||
|
||||
# Final fallback: use latin-1 with replace (handles any byte sequence)
|
||||
try:
|
||||
f = open(file_path, 'r', encoding='latin-1', errors='replace', newline='')
|
||||
_ = f.read(1024)
|
||||
f.seek(0)
|
||||
logger.info("csv_open_encoding_fallback", file=file_path, encoding="latin-1-replace")
|
||||
return f, "latin-1-replace"
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
|
||||
error_msg = f"Unable to open file '{file_path}' with any supported encodings"
|
||||
if last_error:
|
||||
error_msg += f". Last error: {str(last_error)}"
|
||||
|
||||
Reference in New Issue
Block a user