diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..5a3dc60 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,2 @@ +# Make app a package for reliable imports in tests and runtime + diff --git a/app/__pycache__/__init__.cpython-313.pyc b/app/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..b35c77a Binary files /dev/null and b/app/__pycache__/__init__.cpython-313.pyc differ diff --git a/app/__pycache__/import_legacy.cpython-313.pyc b/app/__pycache__/import_legacy.cpython-313.pyc new file mode 100644 index 0000000..fd86165 Binary files /dev/null and b/app/__pycache__/import_legacy.cpython-313.pyc differ diff --git a/app/__pycache__/models.cpython-313.pyc b/app/__pycache__/models.cpython-313.pyc index 1809b45..50e05c2 100644 Binary files a/app/__pycache__/models.cpython-313.pyc and b/app/__pycache__/models.cpython-313.pyc differ diff --git a/app/import_legacy.py b/app/import_legacy.py index 4292e92..66c0d3f 100644 --- a/app/import_legacy.py +++ b/app/import_legacy.py @@ -36,18 +36,25 @@ def open_text_with_fallbacks(file_path: str): Returns a tuple of (file_object, encoding_used). """ # First try strict mode with common encodings - encodings = ["utf-8", "utf-8-sig", "cp1252", "windows-1252", "cp1250", "iso-8859-1", "latin-1"] + # Try latin-1/iso-8859-1 earlier as they are more forgiving and commonly used in legacy data + encodings = ["utf-8", "utf-8-sig", "iso-8859-1", "latin-1", "cp1252", "windows-1252", "cp1250"] last_error = None for enc in encodings: try: f = open(file_path, 'r', encoding=enc, errors='strict', newline='') - _ = f.read(1024) + # Read more than 1KB to catch encoding issues deeper in the file + # Many legacy CSVs have issues beyond the first few rows + _ = f.read(10240) # Read 10KB to test f.seek(0) logger.info("csv_open_encoding_selected", file=file_path, encoding=enc) return f, enc except Exception as e: last_error = e logger.warning("encoding_fallback_failed", file=file_path, encoding=enc, error=str(e)) + try: + f.close() + except: + pass continue # If strict mode fails, try with error replacement for robustness diff --git a/app/main.py b/app/main.py index d292094..0777f7a 100644 --- a/app/main.py +++ b/app/main.py @@ -67,23 +67,29 @@ def open_text_with_fallbacks(file_path: str): """ Open a text file trying multiple encodings commonly seen in legacy CSVs. - Attempts in order: utf-8, utf-8-sig, cp1252, windows-1252, cp1250, iso-8859-1, latin-1. + Attempts in order: utf-8, utf-8-sig, iso-8859-1, latin-1, cp1252, windows-1252, cp1250. + Prioritizes latin-1/iso-8859-1 as they handle legacy data better than cp1252. Returns a tuple of (file_object, encoding_used). Caller is responsible to close file. """ - encodings = ["utf-8", "utf-8-sig", "cp1252", "windows-1252", "cp1250", "iso-8859-1", "latin-1"] + encodings = ["utf-8", "utf-8-sig", "iso-8859-1", "latin-1", "cp1252", "windows-1252", "cp1250"] last_error = None for enc in encodings: try: f = open(file_path, 'r', encoding=enc, errors='strict', newline='') - # Try reading a tiny chunk to force decoding errors early - _ = f.read(1024) + # Read more than 1KB to catch encoding issues deeper in the file + # Many legacy CSVs have issues beyond the first few rows + _ = f.read(10240) # Read 10KB to test f.seek(0) logger.info("csv_open_encoding_selected", file=file_path, encoding=enc) return f, enc except Exception as e: last_error = e logger.warning("encoding_fallback_failed", file=file_path, encoding=enc, error=str(e)) + try: + f.close() + except: + pass continue error_msg = f"Unable to open file '{file_path}' with any of the supported encodings: {', '.join(encodings)}" @@ -250,6 +256,19 @@ VALID_IMPORT_TYPES: List[str] = [ ] +# Centralized import order for auto-import after upload +# Reference tables first, then core tables, then specialized tables +IMPORT_ORDER: List[str] = [ + # Reference tables + 'trnstype', 'trnslkup', 'footers', 'filestat', 'employee', 'gruplkup', 'filetype', 'fvarlkup', 'rvarlkup', + # Core tables + 'rolodex', 'phone', 'rolex_v', 'files', 'files_r', 'files_v', 'filenots', 'ledger', 'deposits', 'payments', + # Specialized tables + 'planinfo', 'qdros', 'pensions', 'pension_marriage', 'pension_death', 'pension_schedule', 'pension_separate', 'pension_results', +] +ORDER_INDEX: Dict[str, int] = {t: i for i, t in enumerate(IMPORT_ORDER)} + + def get_import_type_from_filename(filename: str) -> str: """ Determine import type based on filename pattern for legacy CSV files. @@ -1066,6 +1085,105 @@ def process_csv_import(db: Session, import_type: str, file_path: str) -> Dict[st return import_func(db, file_path) +# --------------------------------- +# Auto-import helper after upload +# --------------------------------- +def run_auto_import_for_upload(db: Session, uploaded_items: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Run auto-import for the files just uploaded, following IMPORT_ORDER. + + Stops after the first file that reports any row errors. Unknown types are + skipped. Logs each file via ImportLog. + """ + # Filter out unknowns; keep metadata + known_items: List[Dict[str, Any]] = [ + item for item in uploaded_items if item.get("import_type") in ORDER_INDEX + ] + + # Sort by import order, then by filename for stability + known_items.sort(key=lambda x: (ORDER_INDEX.get(x.get("import_type"), 1_000_000), x.get("filename", ""))) + + files_summary: List[Dict[str, Any]] = [] + stopped = False + stopped_on: Optional[str] = None + + for item in known_items: + import_type = item["import_type"] + file_path = item["file_path"] + stored_filename = item["stored_filename"] + + # Create import log + import_log = ImportLog( + import_type=import_type, + file_name=stored_filename, + file_path=file_path, + status="running", + ) + db.add(import_log) + db.commit() + + try: + result = process_csv_import(db, import_type, file_path) + + import_log.status = "completed" if not result.get("errors") else "failed" + import_log.total_rows = result.get("total_rows", 0) + import_log.success_count = result.get("success", 0) + import_log.error_count = len(result.get("errors", [])) + import_log.error_details = json.dumps(result.get("errors", [])) + import_log.completed_at = datetime.now() + db.commit() + + files_summary.append({ + "filename": item.get("filename"), + "stored_filename": stored_filename, + "import_type": import_type, + "status": "success" if result.get("success", 0) > 0 and not result.get("errors") else "error", + "total_rows": result.get("total_rows", 0), + "success_count": result.get("success", 0), + "error_count": len(result.get("errors", [])), + "errors": (result.get("errors", [])[:10] if result.get("errors") else []), + }) + + if result.get("errors"): + stopped = True + stopped_on = stored_filename + break + + except Exception as e: + import_log.status = "failed" + import_log.error_details = json.dumps([str(e)]) + import_log.completed_at = datetime.now() + db.commit() + + files_summary.append({ + "filename": item.get("filename"), + "stored_filename": stored_filename, + "import_type": import_type, + "status": "error", + "total_rows": 0, + "success_count": 0, + "error_count": 1, + "errors": [str(e)][:10], + }) + + stopped = True + stopped_on = stored_filename + break + + # Build skipped notes for unknowns + skipped_unknowns = [ + {"filename": item.get("filename"), "stored_filename": item.get("stored_filename")} + for item in uploaded_items + if item.get("import_type") not in ORDER_INDEX + ] + + return { + "files": files_summary, + "stopped": stopped, + "stopped_on": stopped_on, + "skipped_unknowns": skipped_unknowns, + } + # ------------------------------ # Ledger CRUD and helpers # ------------------------------ @@ -1635,6 +1753,7 @@ async def dashboard( async def admin_upload_files( request: Request, files: List[UploadFile] = File(...), + auto_import: bool = Form(True), db: Session = Depends(get_db) ): """ @@ -1700,13 +1819,29 @@ async def admin_upload_files( uploaded_count=len(results), error_count=len(errors), username=user.username, + auto_import=auto_import, ) + auto_import_results: Dict[str, Any] | None = None + if auto_import and results: + try: + auto_import_results = run_auto_import_for_upload(db, results) + logger.info( + "admin_upload_auto_import", + processed_files=len(auto_import_results.get("files", [])), + stopped=auto_import_results.get("stopped", False), + stopped_on=auto_import_results.get("stopped_on"), + username=user.username, + ) + except Exception as e: + logger.error("admin_upload_auto_import_failed", error=str(e), username=user.username) + return templates.TemplateResponse("admin.html", { "request": request, "user": user, "upload_results": results, "upload_errors": errors, + "auto_import_results": auto_import_results, "show_upload_results": True }) diff --git a/app/sync_legacy_to_modern.py b/app/sync_legacy_to_modern.py index 8078a4a..4675b11 100644 --- a/app/sync_legacy_to_modern.py +++ b/app/sync_legacy_to_modern.py @@ -526,3 +526,5 @@ def sync_all(db: Session, clear_existing: bool = False) -> Dict[str, Any]: return results + + diff --git a/app/templates/admin.html b/app/templates/admin.html index 2e7dfbb..6316c9a 100644 --- a/app/templates/admin.html +++ b/app/templates/admin.html @@ -53,6 +53,14 @@ TRNSTYPE, TRNSLKUP, FOOTERS, FILESTAT, EMPLOYEE, GRUPLKUP, FILETYPE, and all related tables (*.csv) +
+ + +
@@ -117,6 +125,80 @@ {% endif %} + + {% if auto_import_results %} +
+
+
+ Auto Import Results +
+
+
+ {% if auto_import_results.stopped %} +
+ + Stopped after {{ auto_import_results.files|length }} file(s) due to errors in {{ auto_import_results.stopped_on }}. +
+ {% endif %} + +
+ + + + + + + + + + + + + + {% for item in auto_import_results.files %} + + + + + + + + + + {% endfor %} + +
FilenameTypeStatusTotalSuccessErrorsError Details
{{ item.filename }}{{ item.import_type }} + {% if item.status == 'success' %} + Completed + {% else %} + Failed + {% endif %} + {{ item.total_rows }}{{ item.success_count }}{{ item.error_count }} + {% if item.errors %} +
+ View Errors ({{ item.errors|length }}) +
    + {% for err in item.errors %} +
  • {{ err }}
  • + {% endfor %} +
+
+ {% else %} + None + {% endif %} +
+
+ + {% if auto_import_results.skipped_unknowns and auto_import_results.skipped_unknowns|length > 0 %} +
+ + {{ auto_import_results.skipped_unknowns|length }} unknown file(s) were skipped. Map them in the Data Import section. +
+ {% endif %} +
+
+ {% endif %} + {% if upload_errors %}
diff --git a/delphi.db b/delphi.db index 35ae41b..d23b9a1 100644 Binary files a/delphi.db and b/delphi.db differ diff --git a/docs/IMPORT_GUIDE.md b/docs/IMPORT_GUIDE.md index c930376..4025271 100644 --- a/docs/IMPORT_GUIDE.md +++ b/docs/IMPORT_GUIDE.md @@ -66,8 +66,9 @@ These tables contain lookup data used by other tables. 1. Scroll to the **File Upload** section 2. Click "Select CSV Files" and choose your CSV files 3. You can upload multiple files at once -4. Click "Upload Files" -5. Review the upload results to ensure files were recognized correctly +4. Choose whether to enable "Auto-import after upload" (default ON). When enabled, the system will import the uploaded files immediately following the Import Order Guide and will stop after the first file that reports any row errors. +5. Click "Upload Files" +6. Review the upload and auto-import results to ensure files were recognized and processed correctly ### 3. Import Reference Tables First @@ -307,3 +308,5 @@ Contains sync functions that: - **Legacy Models**: Preserve original Paradox schema (Rolodex, LegacyPhone, LegacyFile, Ledger, etc.) - **Modern Models**: Simplified application schema (Client, Phone, Case, Transaction, Payment, Document) + + diff --git a/docs/IMPORT_SYSTEM_SUMMARY.md b/docs/IMPORT_SYSTEM_SUMMARY.md index 01e4522..fc37f63 100644 --- a/docs/IMPORT_SYSTEM_SUMMARY.md +++ b/docs/IMPORT_SYSTEM_SUMMARY.md @@ -361,3 +361,5 @@ The implementation follows best practices: Total implementation: ~3,000 lines of production-quality code supporting 27+ table types across 35 functions. + + diff --git a/requirements.txt b/requirements.txt index c6ca725..6dc268f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ aiofiles==23.2.1 structlog==24.1.0 itsdangerous==2.2.0 fpdf2>=2.7,<3 +pytest==8.3.3 diff --git a/scripts/smoke.sh b/scripts/smoke.sh old mode 100644 new mode 100755 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..d64adf6 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,10 @@ +import os +import sys + + +# Ensure project root is on sys.path so `import app.*` works in Docker +PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) +if PROJECT_ROOT not in sys.path: + sys.path.insert(0, PROJECT_ROOT) + + diff --git a/tests/test_auto_import.py b/tests/test_auto_import.py new file mode 100644 index 0000000..147f917 --- /dev/null +++ b/tests/test_auto_import.py @@ -0,0 +1,120 @@ +import json +from datetime import datetime + +import pytest + +from app.main import run_auto_import_for_upload, ORDER_INDEX + + +class DummyImportLog: + # Minimal stand-in to satisfy attribute assignments; not used by SQLAlchemy + def __init__(self, **kwargs): + self.import_type = kwargs.get("import_type") + self.file_name = kwargs.get("file_name") + self.file_path = kwargs.get("file_path") + self.status = kwargs.get("status", "pending") + self.total_rows = 0 + self.success_count = 0 + self.error_count = 0 + self.error_details = "[]" + self.completed_at = None + + +class DummyDB: + """Very small in-memory stub for the DB session interactions used by helper.""" + + def __init__(self, import_results_by_type): + self.import_results_by_type = import_results_by_type + self.logs = [] + + # SQLAlchemy-like API surface used by helper + def add(self, obj): + # record created logs + self.logs.append(obj) + + def commit(self): + # no-op for tests + return None + + +# Monkeypatch target lookup inside the helper by patching process function in module namespace +@pytest.fixture(autouse=True) +def patch_process_csv_import(monkeypatch): + from app import main as main_mod + + def fake_process_csv_import(db, import_type, file_path): + # return test-configured results from DummyDB + return db.import_results_by_type.get(import_type, {"success": 0, "errors": []}) + + monkeypatch.setattr(main_mod, "process_csv_import", fake_process_csv_import) + # Replace ImportLog class with Dummy for isolation (no DB) + monkeypatch.setattr(main_mod, "ImportLog", DummyImportLog) + + +def sorted_by_order(items): + return sorted(items, key=lambda x: (ORDER_INDEX.get(x["import_type"], 1_000_000), x.get("filename", ""))) + + +def test_auto_import_sorts_and_runs_in_order(): + uploaded = [ + {"filename": "B.csv", "stored_filename": "files_b.csv", "file_path": "/tmp/b.csv", "import_type": "files"}, + {"filename": "A.csv", "stored_filename": "trnstype_a.csv", "file_path": "/tmp/a.csv", "import_type": "trnstype"}, + {"filename": "C.csv", "stored_filename": "payments_c.csv", "file_path": "/tmp/c.csv", "import_type": "payments"}, + ] + + # Make all succeed + db = DummyDB({ + "trnstype": {"success": 1, "errors": [], "total_rows": 10}, + "files": {"success": 1, "errors": [], "total_rows": 20}, + "payments": {"success": 1, "errors": [], "total_rows": 5}, + }) + + result = run_auto_import_for_upload(db, uploaded) + + # Should be ordered by IMPORT_ORDER + filenames_in_result = [f["stored_filename"] for f in result["files"]] + expected_order = [i["stored_filename"] for i in sorted_by_order(uploaded)] + assert filenames_in_result == expected_order + assert result["stopped"] is False + assert result["stopped_on"] is None + assert result["skipped_unknowns"] == [] + + +def test_auto_import_skips_unknown_and_reports(): + uploaded = [ + {"filename": "Unknown.csv", "stored_filename": "unknown_1.csv", "file_path": "/tmp/u1.csv", "import_type": "unknown"}, + {"filename": "Rolodex.csv", "stored_filename": "rolodex_2.csv", "file_path": "/tmp/r2.csv", "import_type": "rolodex"}, + ] + + db = DummyDB({"rolodex": {"success": 1, "errors": [], "total_rows": 1}}) + result = run_auto_import_for_upload(db, uploaded) + + # Only known type processed + assert [f["stored_filename"] for f in result["files"]] == ["rolodex_2.csv"] + # Unknown skipped and listed + assert result["skipped_unknowns"] == [{"filename": "Unknown.csv", "stored_filename": "unknown_1.csv"}] + + +def test_auto_import_stops_on_first_error_and_sets_stopped_on(): + uploaded = [ + {"filename": "A.csv", "stored_filename": "trnstype_a.csv", "file_path": "/tmp/a.csv", "import_type": "trnstype"}, + {"filename": "B.csv", "stored_filename": "files_b.csv", "file_path": "/tmp/b.csv", "import_type": "files"}, + {"filename": "C.csv", "stored_filename": "payments_c.csv", "file_path": "/tmp/c.csv", "import_type": "payments"}, + ] + + # First succeeds, second returns errors -> should stop before third + db = DummyDB({ + "trnstype": {"success": 1, "errors": [], "total_rows": 10}, + "files": {"success": 0, "errors": ["bad row"], "total_rows": 2}, + "payments": {"success": 1, "errors": [], "total_rows": 5}, + }) + + result = run_auto_import_for_upload(db, uploaded) + + processed = [f["stored_filename"] for f in result["files"]] + # The run order starts with trnstype then files; payments should not run + assert processed == ["trnstype_a.csv", "files_b.csv"] + assert result["stopped"] is True + assert result["stopped_on"] == "files_b.csv" + + diff --git a/tests/test_import_detection.py b/tests/test_import_detection.py new file mode 100644 index 0000000..65fbac4 --- /dev/null +++ b/tests/test_import_detection.py @@ -0,0 +1,66 @@ +import pytest + +from app.main import get_import_type_from_filename + + +@pytest.mark.parametrize( + "name,expected", + [ + ("TRNSTYPE.csv", "trnstype"), + ("TrnsLkup.csv", "trnslkup"), + ("FOOTERS.csv", "footers"), + ("FILESTAT.csv", "filestat"), + ("EMPLOYEE.csv", "employee"), + ("GRUPLKUP.csv", "gruplkup"), + ("GROUPLKUP.csv", "gruplkup"), + ("FILETYPE.csv", "filetype"), + ("FVARLKUP.csv", "fvarlkup"), + ("RVARLKUP.csv", "rvarlkup"), + ("ROLEX_V.csv", "rolex_v"), + ("ROLEXV.csv", "rolex_v"), + ("ROLODEX.csv", "rolodex"), + ("ROLEX.csv", "rolodex"), + ("FILES_R.csv", "files_r"), + ("FILESR.csv", "files_r"), + ("FILES_V.csv", "files_v"), + ("FILESV.csv", "files_v"), + ("FILENOTS.csv", "filenots"), + ("FILE_NOTS.csv", "filenots"), + ("FILES.csv", "files"), + ("FILE.csv", "files"), + ("PHONE.csv", "phone"), + ("LEDGER.csv", "ledger"), + ("DEPOSITS.csv", "deposits"), + ("DEPOSIT.csv", "deposits"), + ("PAYMENTS.csv", "payments"), + ("PAYMENT.csv", "payments"), + ("PLANINFO.csv", "planinfo"), + ("PLAN_INFO.csv", "planinfo"), + ("QDROS.csv", "qdros"), + ("QDRO.csv", "qdros"), + ("MARRIAGE.csv", "pension_marriage"), + ("DEATH.csv", "pension_death"), + ("SCHEDULE.csv", "pension_schedule"), + ("SEPARATE.csv", "pension_separate"), + ("RESULTS.csv", "pension_results"), + ("PENSIONS.csv", "pensions"), + ("PENSION.csv", "pensions"), + ], +) +def test_get_import_type_from_filename_known(name, expected): + assert get_import_type_from_filename(name) == expected + + +@pytest.mark.parametrize( + "name", + [ + "UNKNOWN.csv", + "gibberish.xyz", + "", # empty + ], +) +def test_get_import_type_from_filename_unknown(name): + with pytest.raises(ValueError): + get_import_type_from_filename(name) + +