fixes and refactor
This commit is contained in:
@@ -7,7 +7,7 @@ import re
|
||||
import os
|
||||
from pathlib import Path
|
||||
from difflib import SequenceMatcher
|
||||
from datetime import datetime, date
|
||||
from datetime import datetime, date, timezone
|
||||
from decimal import Decimal
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File as UploadFileForm, Form, Query
|
||||
@@ -19,8 +19,8 @@ from app.models.rolodex import Rolodex, Phone
|
||||
from app.models.files import File
|
||||
from app.models.ledger import Ledger
|
||||
from app.models.qdro import QDRO
|
||||
from app.models.pensions import Pension, PensionSchedule, MarriageHistory, DeathBenefit, SeparationAgreement, LifeTable, NumberTable
|
||||
from app.models.lookups import Employee, FileType, FileStatus, TransactionType, TransactionCode, State, GroupLookup, Footer, PlanInfo, FormIndex, FormList, PrinterSetup, SystemSetup
|
||||
from app.models.pensions import Pension, PensionSchedule, MarriageHistory, DeathBenefit, SeparationAgreement, LifeTable, NumberTable, PensionResult
|
||||
from app.models.lookups import Employee, FileType, FileStatus, TransactionType, TransactionCode, State, GroupLookup, Footer, PlanInfo, FormIndex, FormList, PrinterSetup, SystemSetup, FormKeyword
|
||||
from app.models.additional import Payment, Deposit, FileNote, FormVariable, ReportVariable
|
||||
from app.models.flexible import FlexibleImport
|
||||
from app.models.audit import ImportAudit, ImportAuditFile
|
||||
@@ -28,6 +28,25 @@ from app.config import settings
|
||||
|
||||
router = APIRouter(tags=["import"])
|
||||
|
||||
# Common encodings to try for legacy CSV files (order matters)
|
||||
ENCODINGS = [
|
||||
'utf-8-sig',
|
||||
'utf-8',
|
||||
'windows-1252',
|
||||
'iso-8859-1',
|
||||
'cp1252',
|
||||
]
|
||||
|
||||
# Unified import order used across batch operations
|
||||
IMPORT_ORDER = [
|
||||
"STATES.csv", "GRUPLKUP.csv", "EMPLOYEE.csv", "FILETYPE.csv", "FILESTAT.csv",
|
||||
"TRNSTYPE.csv", "TRNSLKUP.csv", "FOOTERS.csv", "SETUP.csv", "PRINTERS.csv",
|
||||
"INX_LKUP.csv",
|
||||
"ROLODEX.csv", "PHONE.csv", "FILES.csv", "LEDGER.csv", "TRNSACTN.csv",
|
||||
"QDROS.csv", "PENSIONS.csv", "LIFETABL.csv", "NUMBERAL.csv", "PLANINFO.csv", "RESULTS.csv", "PAYMENTS.csv", "DEPOSITS.csv",
|
||||
"FILENOTS.csv", "FORM_INX.csv", "FORM_LST.csv", "FVARLKUP.csv", "RVARLKUP.csv"
|
||||
]
|
||||
|
||||
|
||||
# CSV to Model mapping
|
||||
CSV_MODEL_MAPPING = {
|
||||
@@ -56,7 +75,6 @@ CSV_MODEL_MAPPING = {
|
||||
"FOOTERS.csv": Footer,
|
||||
"PLANINFO.csv": PlanInfo,
|
||||
# Legacy alternate names from export directories
|
||||
"SCHEDULE.csv": PensionSchedule,
|
||||
"FORM_INX.csv": FormIndex,
|
||||
"FORM_LST.csv": FormList,
|
||||
"PRINTERS.csv": PrinterSetup,
|
||||
@@ -67,7 +85,9 @@ CSV_MODEL_MAPPING = {
|
||||
"FVARLKUP.csv": FormVariable,
|
||||
"RVARLKUP.csv": ReportVariable,
|
||||
"PAYMENTS.csv": Payment,
|
||||
"TRNSACTN.csv": Ledger # Maps to existing Ledger model (same structure)
|
||||
"TRNSACTN.csv": Ledger, # Maps to existing Ledger model (same structure)
|
||||
"INX_LKUP.csv": FormKeyword,
|
||||
"RESULTS.csv": PensionResult
|
||||
}
|
||||
|
||||
# Field mappings for CSV columns to database fields
|
||||
@@ -230,8 +250,12 @@ FIELD_MAPPINGS = {
|
||||
"Default_Rate": "default_rate"
|
||||
},
|
||||
"FILESTAT.csv": {
|
||||
"Status": "status_code",
|
||||
"Status_Code": "status_code",
|
||||
"Definition": "description",
|
||||
"Description": "description",
|
||||
"Send": "send",
|
||||
"Footer_Code": "footer_code",
|
||||
"Sort_Order": "sort_order"
|
||||
},
|
||||
"FOOTERS.csv": {
|
||||
@@ -253,22 +277,44 @@ FIELD_MAPPINGS = {
|
||||
"Phone": "phone",
|
||||
"Notes": "notes"
|
||||
},
|
||||
"INX_LKUP.csv": {
|
||||
"Keyword": "keyword",
|
||||
"Description": "description"
|
||||
},
|
||||
"FORM_INX.csv": {
|
||||
"Form_Id": "form_id",
|
||||
"Form_Name": "form_name",
|
||||
"Category": "category"
|
||||
"Name": "form_id",
|
||||
"Keyword": "keyword"
|
||||
},
|
||||
"FORM_LST.csv": {
|
||||
"Form_Id": "form_id",
|
||||
"Line_Number": "line_number",
|
||||
"Content": "content"
|
||||
"Name": "form_id",
|
||||
"Memo": "content",
|
||||
"Status": "status"
|
||||
},
|
||||
"PRINTERS.csv": {
|
||||
# Legacy variants
|
||||
"Printer_Name": "printer_name",
|
||||
"Description": "description",
|
||||
"Driver": "driver",
|
||||
"Port": "port",
|
||||
"Default_Printer": "default_printer"
|
||||
"Default_Printer": "default_printer",
|
||||
# Observed legacy headers from export
|
||||
"Number": "number",
|
||||
"Name": "printer_name",
|
||||
"Page_Break": "page_break",
|
||||
"Setup_St": "setup_st",
|
||||
"Reset_St": "reset_st",
|
||||
"B_Underline": "b_underline",
|
||||
"E_Underline": "e_underline",
|
||||
"B_Bold": "b_bold",
|
||||
"E_Bold": "e_bold",
|
||||
# Optional report toggles
|
||||
"Phone_Book": "phone_book",
|
||||
"Rolodex_Info": "rolodex_info",
|
||||
"Envelope": "envelope",
|
||||
"File_Cabinet": "file_cabinet",
|
||||
"Accounts": "accounts",
|
||||
"Statements": "statements",
|
||||
"Calendar": "calendar",
|
||||
},
|
||||
"SETUP.csv": {
|
||||
"Setting_Key": "setting_key",
|
||||
@@ -285,32 +331,98 @@ FIELD_MAPPINGS = {
|
||||
"MARRIAGE.csv": {
|
||||
"File_No": "file_no",
|
||||
"Version": "version",
|
||||
"Marriage_Date": "marriage_date",
|
||||
"Separation_Date": "separation_date",
|
||||
"Divorce_Date": "divorce_date"
|
||||
"Married_From": "married_from",
|
||||
"Married_To": "married_to",
|
||||
"Married_Years": "married_years",
|
||||
"Service_From": "service_from",
|
||||
"Service_To": "service_to",
|
||||
"Service_Years": "service_years",
|
||||
"Marital_%": "marital_percent"
|
||||
},
|
||||
"DEATH.csv": {
|
||||
"File_No": "file_no",
|
||||
"Version": "version",
|
||||
"Benefit_Type": "benefit_type",
|
||||
"Benefit_Amount": "benefit_amount",
|
||||
"Beneficiary": "beneficiary"
|
||||
"Lump1": "lump1",
|
||||
"Lump2": "lump2",
|
||||
"Growth1": "growth1",
|
||||
"Growth2": "growth2",
|
||||
"Disc1": "disc1",
|
||||
"Disc2": "disc2"
|
||||
},
|
||||
"SEPARATE.csv": {
|
||||
"File_No": "file_no",
|
||||
"Version": "version",
|
||||
"Agreement_Date": "agreement_date",
|
||||
"Terms": "terms"
|
||||
"Separation_Rate": "terms"
|
||||
},
|
||||
"LIFETABL.csv": {
|
||||
"Age": "age",
|
||||
"Male_Mortality": "male_mortality",
|
||||
"Female_Mortality": "female_mortality"
|
||||
"AGE": "age",
|
||||
"LE_AA": "le_aa",
|
||||
"NA_AA": "na_aa",
|
||||
"LE_AM": "le_am",
|
||||
"NA_AM": "na_am",
|
||||
"LE_AF": "le_af",
|
||||
"NA_AF": "na_af",
|
||||
"LE_WA": "le_wa",
|
||||
"NA_WA": "na_wa",
|
||||
"LE_WM": "le_wm",
|
||||
"NA_WM": "na_wm",
|
||||
"LE_WF": "le_wf",
|
||||
"NA_WF": "na_wf",
|
||||
"LE_BA": "le_ba",
|
||||
"NA_BA": "na_ba",
|
||||
"LE_BM": "le_bm",
|
||||
"NA_BM": "na_bm",
|
||||
"LE_BF": "le_bf",
|
||||
"NA_BF": "na_bf",
|
||||
"LE_HA": "le_ha",
|
||||
"NA_HA": "na_ha",
|
||||
"LE_HM": "le_hm",
|
||||
"NA_HM": "na_hm",
|
||||
"LE_HF": "le_hf",
|
||||
"NA_HF": "na_hf"
|
||||
},
|
||||
"NUMBERAL.csv": {
|
||||
"Table_Name": "table_name",
|
||||
"Month": "month",
|
||||
"NA_AA": "na_aa",
|
||||
"NA_AM": "na_am",
|
||||
"NA_AF": "na_af",
|
||||
"NA_WA": "na_wa",
|
||||
"NA_WM": "na_wm",
|
||||
"NA_WF": "na_wf",
|
||||
"NA_BA": "na_ba",
|
||||
"NA_BM": "na_bm",
|
||||
"NA_BF": "na_bf",
|
||||
"NA_HA": "na_ha",
|
||||
"NA_HM": "na_hm",
|
||||
"NA_HF": "na_hf"
|
||||
},
|
||||
"RESULTS.csv": {
|
||||
"Accrued": "accrued",
|
||||
"Start_Age": "start_age",
|
||||
"COLA": "cola",
|
||||
"Withdrawal": "withdrawal",
|
||||
"Pre_DR": "pre_dr",
|
||||
"Post_DR": "post_dr",
|
||||
"Tax_Rate": "tax_rate",
|
||||
"Age": "age",
|
||||
"Value": "value"
|
||||
"Years_From": "years_from",
|
||||
"Life_Exp": "life_exp",
|
||||
"EV_Monthly": "ev_monthly",
|
||||
"Payments": "payments",
|
||||
"Pay_Out": "pay_out",
|
||||
"Fund_Value": "fund_value",
|
||||
"PV": "pv",
|
||||
"Mortality": "mortality",
|
||||
"PV_AM": "pv_am",
|
||||
"PV_AMT": "pv_amt",
|
||||
"PV_Pre_DB": "pv_pre_db",
|
||||
"PV_Annuity": "pv_annuity",
|
||||
"WV_AT": "wv_at",
|
||||
"PV_Plan": "pv_plan",
|
||||
"Years_Married": "years_married",
|
||||
"Years_Service": "years_service",
|
||||
"Marr_Per": "marr_per",
|
||||
"Marr_Amt": "marr_amt"
|
||||
},
|
||||
# Additional CSV file mappings
|
||||
"DEPOSITS.csv": {
|
||||
@@ -357,7 +469,7 @@ FIELD_MAPPINGS = {
|
||||
}
|
||||
|
||||
|
||||
def parse_date(date_str: str) -> Optional[datetime]:
|
||||
def parse_date(date_str: str) -> Optional[date]:
|
||||
"""Parse date string in various formats"""
|
||||
if not date_str or date_str.strip() == "":
|
||||
return None
|
||||
@@ -612,7 +724,11 @@ def convert_value(value: str, field_name: str) -> Any:
|
||||
return parsed_date
|
||||
|
||||
# Boolean fields
|
||||
if any(word in field_name.lower() for word in ["active", "default_printer", "billed", "transferable"]):
|
||||
if any(word in field_name.lower() for word in [
|
||||
"active", "default_printer", "billed", "transferable", "send",
|
||||
# PrinterSetup legacy toggles
|
||||
"phone_book", "rolodex_info", "envelope", "file_cabinet", "accounts", "statements", "calendar"
|
||||
]):
|
||||
if value.lower() in ["true", "1", "yes", "y", "on", "active"]:
|
||||
return True
|
||||
elif value.lower() in ["false", "0", "no", "n", "off", "inactive"]:
|
||||
@@ -621,7 +737,11 @@ def convert_value(value: str, field_name: str) -> Any:
|
||||
return None
|
||||
|
||||
# Numeric fields (float)
|
||||
if any(word in field_name.lower() for word in ["rate", "hour", "bal", "fee", "amount", "owing", "transfer", "valu", "accrued", "vested", "cola", "tax", "percent", "benefit_amount", "mortality", "value"]):
|
||||
if any(word in field_name.lower() for word in [
|
||||
"rate", "hour", "bal", "fee", "amount", "owing", "transfer", "valu",
|
||||
"accrued", "vested", "cola", "tax", "percent", "benefit_amount", "mortality",
|
||||
"value"
|
||||
]) or field_name.lower().startswith(("na_", "le_")):
|
||||
try:
|
||||
# Remove currency symbols and commas
|
||||
cleaned_value = value.replace("$", "").replace(",", "").replace("%", "")
|
||||
@@ -630,7 +750,9 @@ def convert_value(value: str, field_name: str) -> Any:
|
||||
return 0.0
|
||||
|
||||
# Integer fields
|
||||
if any(word in field_name.lower() for word in ["item_no", "age", "start_age", "version", "line_number", "sort_order", "empl_num"]):
|
||||
if any(word in field_name.lower() for word in [
|
||||
"item_no", "age", "start_age", "version", "line_number", "sort_order", "empl_num", "month", "number"
|
||||
]):
|
||||
try:
|
||||
return int(float(value)) # Handle cases like "1.0"
|
||||
except ValueError:
|
||||
@@ -673,11 +795,18 @@ async def get_available_csv_files(current_user: User = Depends(get_current_user)
|
||||
"available_files": list(CSV_MODEL_MAPPING.keys()),
|
||||
"descriptions": {
|
||||
"ROLODEX.csv": "Customer/contact information",
|
||||
"ROLEX_V.csv": "Customer/contact information (alias)",
|
||||
"PHONE.csv": "Phone numbers linked to customers",
|
||||
"FILES.csv": "Client files and cases",
|
||||
"FILES_R.csv": "Client files and cases (alias)",
|
||||
"FILES_V.csv": "Client files and cases (alias)",
|
||||
"LEDGER.csv": "Financial transactions per file",
|
||||
"QDROS.csv": "Legal documents and court orders",
|
||||
"PENSIONS.csv": "Pension calculation data",
|
||||
"SCHEDULE.csv": "Vesting schedules for pensions",
|
||||
"MARRIAGE.csv": "Marriage history data",
|
||||
"DEATH.csv": "Death benefit calculations",
|
||||
"SEPARATE.csv": "Separation agreements",
|
||||
"EMPLOYEE.csv": "Staff and employee information",
|
||||
"STATES.csv": "US States lookup table",
|
||||
"FILETYPE.csv": "File type categories",
|
||||
@@ -688,7 +817,12 @@ async def get_available_csv_files(current_user: User = Depends(get_current_user)
|
||||
"FVARLKUP.csv": "Form template variables",
|
||||
"RVARLKUP.csv": "Report template variables",
|
||||
"PAYMENTS.csv": "Individual payments within deposits",
|
||||
"TRNSACTN.csv": "Transaction details (maps to Ledger)"
|
||||
"TRNSACTN.csv": "Transaction details (maps to Ledger)",
|
||||
"INX_LKUP.csv": "Form keywords lookup",
|
||||
"PLANINFO.csv": "Pension plan information",
|
||||
"RESULTS.csv": "Pension computed results",
|
||||
"LIFETABL.csv": "Life expectancy table by age, sex, and race (rich typed)",
|
||||
"NUMBERAL.csv": "Monthly survivor counts by sex and race (rich typed)"
|
||||
},
|
||||
"auto_discovery": True
|
||||
}
|
||||
@@ -724,7 +858,7 @@ async def import_csv_data(
|
||||
content = await file.read()
|
||||
|
||||
# Try multiple encodings for legacy CSV files
|
||||
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
||||
encodings = ENCODINGS
|
||||
csv_content = None
|
||||
for encoding in encodings:
|
||||
try:
|
||||
@@ -736,34 +870,7 @@ async def import_csv_data(
|
||||
if csv_content is None:
|
||||
raise HTTPException(status_code=400, detail="Could not decode CSV file. Please ensure it's saved in UTF-8, Windows-1252, or ISO-8859-1 encoding.")
|
||||
|
||||
# Preprocess CSV content to fix common legacy issues
|
||||
def preprocess_csv(content):
|
||||
lines = content.split('\n')
|
||||
cleaned_lines = []
|
||||
i = 0
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
# If line doesn't have the expected number of commas, it might be a broken multi-line field
|
||||
if i == 0: # Header line
|
||||
cleaned_lines.append(line)
|
||||
expected_comma_count = line.count(',')
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check if this line has the expected number of commas
|
||||
if line.count(',') < expected_comma_count:
|
||||
# This might be a continuation of the previous line
|
||||
# Try to merge with previous line
|
||||
if cleaned_lines:
|
||||
cleaned_lines[-1] += " " + line.replace('\n', ' ').replace('\r', ' ')
|
||||
else:
|
||||
cleaned_lines.append(line)
|
||||
else:
|
||||
cleaned_lines.append(line)
|
||||
i += 1
|
||||
|
||||
return '\n'.join(cleaned_lines)
|
||||
# Note: preprocess_csv helper removed as unused; robust parsing handled below
|
||||
|
||||
# Custom robust parser for problematic legacy CSV files
|
||||
class MockCSVReader:
|
||||
@@ -791,7 +898,7 @@ async def import_csv_data(
|
||||
header_reader = csv.reader(io.StringIO(lines[0]))
|
||||
headers = next(header_reader)
|
||||
headers = [h.strip() for h in headers]
|
||||
print(f"DEBUG: Found {len(headers)} headers: {headers}")
|
||||
# Debug logging removed in API path; rely on audit/logging if needed
|
||||
# Build dynamic header mapping for this file/model
|
||||
mapping_info = _build_dynamic_mapping(headers, model_class, file_type)
|
||||
|
||||
@@ -829,17 +936,21 @@ async def import_csv_data(
|
||||
continue
|
||||
|
||||
csv_reader = MockCSVReader(rows_data, headers)
|
||||
print(f"SUCCESS: Parsed {len(rows_data)} rows (skipped {skipped_rows} malformed rows)")
|
||||
# Parsing summary suppressed to avoid noisy stdout in API
|
||||
|
||||
except Exception as e:
|
||||
print(f"Custom parsing failed: {e}")
|
||||
# Keep error minimal for client; internal logging can capture 'e'
|
||||
raise HTTPException(status_code=400, detail=f"Could not parse CSV file. The file appears to have serious formatting issues. Error: {str(e)}")
|
||||
|
||||
imported_count = 0
|
||||
created_count = 0
|
||||
updated_count = 0
|
||||
errors = []
|
||||
flexible_saved = 0
|
||||
mapped_headers = mapping_info.get("mapped_headers", {})
|
||||
unmapped_headers = mapping_info.get("unmapped_headers", [])
|
||||
# Special handling: assign line numbers per form for FORM_LST.csv
|
||||
form_lst_line_counters: Dict[str, int] = {}
|
||||
|
||||
# If replace_existing is True, delete all existing records and related flexible extras
|
||||
if replace_existing:
|
||||
@@ -860,6 +971,16 @@ async def import_csv_data(
|
||||
converted_value = convert_value(row[csv_field], db_field)
|
||||
if converted_value is not None:
|
||||
model_data[db_field] = converted_value
|
||||
|
||||
# Inject sequential line_number for FORM_LST rows grouped by form_id
|
||||
if file_type == "FORM_LST.csv":
|
||||
form_id_value = model_data.get("form_id")
|
||||
if form_id_value:
|
||||
current = form_lst_line_counters.get(str(form_id_value), 0) + 1
|
||||
form_lst_line_counters[str(form_id_value)] = current
|
||||
# Only set if not provided
|
||||
if "line_number" not in model_data:
|
||||
model_data["line_number"] = current
|
||||
|
||||
# Skip empty rows
|
||||
if not any(model_data.values()):
|
||||
@@ -902,10 +1023,43 @@ async def import_csv_data(
|
||||
if 'file_no' not in model_data or not model_data['file_no']:
|
||||
continue # Skip ledger records without file number
|
||||
|
||||
# Create model instance
|
||||
instance = model_class(**model_data)
|
||||
db.add(instance)
|
||||
db.flush() # Ensure PK is available
|
||||
# Create or update model instance
|
||||
instance = None
|
||||
# Upsert behavior for printers
|
||||
if model_class == PrinterSetup:
|
||||
# Determine primary key field name
|
||||
_, pk_names = _get_model_columns(model_class)
|
||||
pk_field_name_local = pk_names[0] if len(pk_names) == 1 else None
|
||||
pk_value_local = model_data.get(pk_field_name_local) if pk_field_name_local else None
|
||||
if pk_field_name_local and pk_value_local:
|
||||
existing = db.query(model_class).filter(getattr(model_class, pk_field_name_local) == pk_value_local).first()
|
||||
if existing:
|
||||
# Update mutable fields
|
||||
for k, v in model_data.items():
|
||||
if k != pk_field_name_local:
|
||||
setattr(existing, k, v)
|
||||
instance = existing
|
||||
updated_count += 1
|
||||
else:
|
||||
instance = model_class(**model_data)
|
||||
db.add(instance)
|
||||
created_count += 1
|
||||
else:
|
||||
# Fallback to insert if PK missing
|
||||
instance = model_class(**model_data)
|
||||
db.add(instance)
|
||||
created_count += 1
|
||||
db.flush()
|
||||
# Enforce single default
|
||||
try:
|
||||
if bool(model_data.get("default_printer")):
|
||||
db.query(model_class).filter(getattr(model_class, pk_field_name_local) != getattr(instance, pk_field_name_local)).update({model_class.default_printer: False})
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
instance = model_class(**model_data)
|
||||
db.add(instance)
|
||||
db.flush() # Ensure PK is available
|
||||
|
||||
# Capture PK details for flexible storage linkage (single-column PKs only)
|
||||
_, pk_names = _get_model_columns(model_class)
|
||||
@@ -980,6 +1134,10 @@ async def import_csv_data(
|
||||
"flexible_saved_rows": flexible_saved,
|
||||
},
|
||||
}
|
||||
# Include create/update breakdown for printers
|
||||
if file_type == "PRINTERS.csv":
|
||||
result["created_count"] = created_count
|
||||
result["updated_count"] = updated_count
|
||||
|
||||
if errors:
|
||||
result["warning"] = f"Import completed with {len(errors)} errors"
|
||||
@@ -987,9 +1145,7 @@ async def import_csv_data(
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
print(f"IMPORT ERROR DEBUG: {type(e).__name__}: {str(e)}")
|
||||
import traceback
|
||||
print(f"TRACEBACK: {traceback.format_exc()}")
|
||||
# Suppress stdout debug prints in API layer
|
||||
db.rollback()
|
||||
raise HTTPException(status_code=500, detail=f"Import failed: {str(e)}")
|
||||
|
||||
@@ -1071,7 +1227,7 @@ async def validate_csv_file(
|
||||
content = await file.read()
|
||||
|
||||
# Try multiple encodings for legacy CSV files
|
||||
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
||||
encodings = ENCODINGS
|
||||
csv_content = None
|
||||
for encoding in encodings:
|
||||
try:
|
||||
@@ -1083,18 +1239,6 @@ async def validate_csv_file(
|
||||
if csv_content is None:
|
||||
raise HTTPException(status_code=400, detail="Could not decode CSV file. Please ensure it's saved in UTF-8, Windows-1252, or ISO-8859-1 encoding.")
|
||||
|
||||
# Parse CSV with fallback to robust line-by-line parsing
|
||||
def parse_csv_with_fallback(text: str) -> Tuple[List[Dict[str, str]], List[str]]:
|
||||
try:
|
||||
reader = csv.DictReader(io.StringIO(text), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
headers_local = reader.fieldnames or []
|
||||
rows_local = []
|
||||
for r in reader:
|
||||
rows_local.append(r)
|
||||
return rows_local, headers_local
|
||||
except Exception:
|
||||
return parse_csv_robust(text)
|
||||
|
||||
rows_list, csv_headers = parse_csv_with_fallback(csv_content)
|
||||
model_class = CSV_MODEL_MAPPING[file_type]
|
||||
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
|
||||
@@ -1142,9 +1286,7 @@ async def validate_csv_file(
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"VALIDATION ERROR DEBUG: {type(e).__name__}: {str(e)}")
|
||||
import traceback
|
||||
print(f"VALIDATION TRACEBACK: {traceback.format_exc()}")
|
||||
# Suppress stdout debug prints in API layer
|
||||
raise HTTPException(status_code=500, detail=f"Validation failed: {str(e)}")
|
||||
|
||||
|
||||
@@ -1199,7 +1341,7 @@ async def batch_validate_csv_files(
|
||||
content = await file.read()
|
||||
|
||||
# Try multiple encodings for legacy CSV files (include BOM-friendly utf-8-sig)
|
||||
encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
||||
encodings = ENCODINGS
|
||||
csv_content = None
|
||||
for encoding in encodings:
|
||||
try:
|
||||
@@ -1302,13 +1444,7 @@ async def batch_import_csv_files(
|
||||
raise HTTPException(status_code=400, detail="Maximum 25 files allowed per batch")
|
||||
|
||||
# Define optimal import order based on dependencies
|
||||
import_order = [
|
||||
"STATES.csv", "GRUPLKUP.csv", "EMPLOYEE.csv", "FILETYPE.csv", "FILESTAT.csv",
|
||||
"TRNSTYPE.csv", "TRNSLKUP.csv", "FOOTERS.csv", "SETUP.csv", "PRINTERS.csv",
|
||||
"ROLODEX.csv", "PHONE.csv", "FILES.csv", "LEDGER.csv", "TRNSACTN.csv",
|
||||
"QDROS.csv", "PENSIONS.csv", "PLANINFO.csv", "PAYMENTS.csv", "DEPOSITS.csv",
|
||||
"FILENOTS.csv", "FORM_INX.csv", "FORM_LST.csv", "FVARLKUP.csv", "RVARLKUP.csv"
|
||||
]
|
||||
import_order = IMPORT_ORDER
|
||||
|
||||
# Sort uploaded files by optimal import order
|
||||
file_map = {f.filename: f for f in files}
|
||||
@@ -1365,7 +1501,7 @@ async def batch_import_csv_files(
|
||||
saved_path = str(file_path)
|
||||
except Exception:
|
||||
saved_path = None
|
||||
encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
||||
encodings = ENCODINGS
|
||||
csv_content = None
|
||||
for encoding in encodings:
|
||||
try:
|
||||
@@ -1466,7 +1602,7 @@ async def batch_import_csv_files(
|
||||
saved_path = None
|
||||
|
||||
# Try multiple encodings for legacy CSV files
|
||||
encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
||||
encodings = ENCODINGS
|
||||
csv_content = None
|
||||
for encoding in encodings:
|
||||
try:
|
||||
@@ -1505,6 +1641,8 @@ async def batch_import_csv_files(
|
||||
imported_count = 0
|
||||
errors = []
|
||||
flexible_saved = 0
|
||||
# Special handling: assign line numbers per form for FORM_LST.csv
|
||||
form_lst_line_counters: Dict[str, int] = {}
|
||||
|
||||
# If replace_existing is True and this is the first file of this type
|
||||
if replace_existing:
|
||||
@@ -1523,6 +1661,15 @@ async def batch_import_csv_files(
|
||||
converted_value = convert_value(row[csv_field], db_field)
|
||||
if converted_value is not None:
|
||||
model_data[db_field] = converted_value
|
||||
|
||||
# Inject sequential line_number for FORM_LST rows grouped by form_id
|
||||
if file_type == "FORM_LST.csv":
|
||||
form_id_value = model_data.get("form_id")
|
||||
if form_id_value:
|
||||
current = form_lst_line_counters.get(str(form_id_value), 0) + 1
|
||||
form_lst_line_counters[str(form_id_value)] = current
|
||||
if "line_number" not in model_data:
|
||||
model_data["line_number"] = current
|
||||
|
||||
if not any(model_data.values()):
|
||||
continue
|
||||
@@ -1697,7 +1844,7 @@ async def batch_import_csv_files(
|
||||
"completed_with_errors" if summary["successful_files"] > 0 else "failed"
|
||||
)
|
||||
audit_row.message = f"Batch import completed: {audit_row.successful_files}/{audit_row.total_files} files"
|
||||
audit_row.finished_at = datetime.utcnow()
|
||||
audit_row.finished_at = datetime.now(timezone.utc)
|
||||
audit_row.details = {
|
||||
"files": [
|
||||
{"file_type": r.get("file_type"), "status": r.get("status"), "imported_count": r.get("imported_count", 0), "errors": r.get("errors", 0)}
|
||||
@@ -1844,13 +1991,7 @@ async def rerun_failed_files(
|
||||
raise HTTPException(status_code=400, detail="No saved files available to rerun. Upload again.")
|
||||
|
||||
# Import order for sorting
|
||||
import_order = [
|
||||
"STATES.csv", "GRUPLKUP.csv", "EMPLOYEE.csv", "FILETYPE.csv", "FILESTAT.csv",
|
||||
"TRNSTYPE.csv", "TRNSLKUP.csv", "FOOTERS.csv", "SETUP.csv", "PRINTERS.csv",
|
||||
"ROLODEX.csv", "PHONE.csv", "FILES.csv", "LEDGER.csv", "TRNSACTN.csv",
|
||||
"QDROS.csv", "PENSIONS.csv", "PLANINFO.csv", "PAYMENTS.csv", "DEPOSITS.csv",
|
||||
"FILENOTS.csv", "FORM_INX.csv", "FORM_LST.csv", "FVARLKUP.csv", "RVARLKUP.csv"
|
||||
]
|
||||
import_order = IMPORT_ORDER
|
||||
order_index = {name: i for i, name in enumerate(import_order)}
|
||||
items.sort(key=lambda x: order_index.get(x[0], len(import_order) + 1))
|
||||
|
||||
@@ -1898,7 +2039,7 @@ async def rerun_failed_files(
|
||||
|
||||
if file_type not in CSV_MODEL_MAPPING:
|
||||
# Flexible-only path
|
||||
encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
||||
encodings = ENCODINGS
|
||||
csv_content = None
|
||||
for enc in encodings:
|
||||
try:
|
||||
@@ -1964,7 +2105,7 @@ async def rerun_failed_files(
|
||||
|
||||
# Known model path
|
||||
model_class = CSV_MODEL_MAPPING[file_type]
|
||||
encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
||||
encodings = ENCODINGS
|
||||
csv_content = None
|
||||
for enc in encodings:
|
||||
try:
|
||||
@@ -1996,6 +2137,8 @@ async def rerun_failed_files(
|
||||
unmapped_headers = mapping_info["unmapped_headers"]
|
||||
imported_count = 0
|
||||
errors: List[Dict[str, Any]] = []
|
||||
# Special handling: assign line numbers per form for FORM_LST.csv
|
||||
form_lst_line_counters: Dict[str, int] = {}
|
||||
|
||||
if replace_existing:
|
||||
db.query(model_class).delete()
|
||||
@@ -2013,6 +2156,14 @@ async def rerun_failed_files(
|
||||
converted_value = convert_value(row[csv_field], db_field)
|
||||
if converted_value is not None:
|
||||
model_data[db_field] = converted_value
|
||||
# Inject sequential line_number for FORM_LST rows grouped by form_id
|
||||
if file_type == "FORM_LST.csv":
|
||||
form_id_value = model_data.get("form_id")
|
||||
if form_id_value:
|
||||
current = form_lst_line_counters.get(str(form_id_value), 0) + 1
|
||||
form_lst_line_counters[str(form_id_value)] = current
|
||||
if "line_number" not in model_data:
|
||||
model_data["line_number"] = current
|
||||
if not any(model_data.values()):
|
||||
continue
|
||||
required_fields = _get_required_fields(model_class)
|
||||
@@ -2147,7 +2298,7 @@ async def rerun_failed_files(
|
||||
"completed_with_errors" if summary["successful_files"] > 0 else "failed"
|
||||
)
|
||||
rerun_audit.message = f"Rerun completed: {rerun_audit.successful_files}/{rerun_audit.total_files} files"
|
||||
rerun_audit.finished_at = datetime.utcnow()
|
||||
rerun_audit.finished_at = datetime.now(timezone.utc)
|
||||
rerun_audit.details = {"rerun_of": audit_id}
|
||||
db.add(rerun_audit)
|
||||
db.commit()
|
||||
@@ -2183,7 +2334,7 @@ async def upload_flexible_only(
|
||||
db.commit()
|
||||
|
||||
content = await file.read()
|
||||
encodings = ["utf-8-sig", "utf-8", "windows-1252", "iso-8859-1", "cp1252"]
|
||||
encodings = ENCODINGS
|
||||
csv_content = None
|
||||
for encoding in encodings:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user