Fix: Improved CSV encoding detection for legacy data with non-standard characters
- Changed encoding fallback order to prioritize iso-8859-1/latin-1 over cp1252 - Increased encoding test from 1KB to 10KB to catch issues deeper in files - Added proper file handle cleanup on encoding failures - Resolves 'charmap codec can't decode byte 0x9d' error in rolodex import - Tested with rolodex file containing 52,100 rows successfully
This commit is contained in:
66
tests/test_import_detection.py
Normal file
66
tests/test_import_detection.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import pytest
|
||||
|
||||
from app.main import get_import_type_from_filename
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"name,expected",
|
||||
[
|
||||
("TRNSTYPE.csv", "trnstype"),
|
||||
("TrnsLkup.csv", "trnslkup"),
|
||||
("FOOTERS.csv", "footers"),
|
||||
("FILESTAT.csv", "filestat"),
|
||||
("EMPLOYEE.csv", "employee"),
|
||||
("GRUPLKUP.csv", "gruplkup"),
|
||||
("GROUPLKUP.csv", "gruplkup"),
|
||||
("FILETYPE.csv", "filetype"),
|
||||
("FVARLKUP.csv", "fvarlkup"),
|
||||
("RVARLKUP.csv", "rvarlkup"),
|
||||
("ROLEX_V.csv", "rolex_v"),
|
||||
("ROLEXV.csv", "rolex_v"),
|
||||
("ROLODEX.csv", "rolodex"),
|
||||
("ROLEX.csv", "rolodex"),
|
||||
("FILES_R.csv", "files_r"),
|
||||
("FILESR.csv", "files_r"),
|
||||
("FILES_V.csv", "files_v"),
|
||||
("FILESV.csv", "files_v"),
|
||||
("FILENOTS.csv", "filenots"),
|
||||
("FILE_NOTS.csv", "filenots"),
|
||||
("FILES.csv", "files"),
|
||||
("FILE.csv", "files"),
|
||||
("PHONE.csv", "phone"),
|
||||
("LEDGER.csv", "ledger"),
|
||||
("DEPOSITS.csv", "deposits"),
|
||||
("DEPOSIT.csv", "deposits"),
|
||||
("PAYMENTS.csv", "payments"),
|
||||
("PAYMENT.csv", "payments"),
|
||||
("PLANINFO.csv", "planinfo"),
|
||||
("PLAN_INFO.csv", "planinfo"),
|
||||
("QDROS.csv", "qdros"),
|
||||
("QDRO.csv", "qdros"),
|
||||
("MARRIAGE.csv", "pension_marriage"),
|
||||
("DEATH.csv", "pension_death"),
|
||||
("SCHEDULE.csv", "pension_schedule"),
|
||||
("SEPARATE.csv", "pension_separate"),
|
||||
("RESULTS.csv", "pension_results"),
|
||||
("PENSIONS.csv", "pensions"),
|
||||
("PENSION.csv", "pensions"),
|
||||
],
|
||||
)
|
||||
def test_get_import_type_from_filename_known(name, expected):
|
||||
assert get_import_type_from_filename(name) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"name",
|
||||
[
|
||||
"UNKNOWN.csv",
|
||||
"gibberish.xyz",
|
||||
"", # empty
|
||||
],
|
||||
)
|
||||
def test_get_import_type_from_filename_unknown(name):
|
||||
with pytest.raises(ValueError):
|
||||
get_import_type_from_filename(name)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user