This commit is contained in:
HotSwapp
2025-08-18 20:20:04 -05:00
parent 89b2bc0aa2
commit bac8cc4bd5
114 changed files with 30258 additions and 1341 deletions

View File

@@ -3,6 +3,7 @@ Data import API endpoints for CSV file uploads with auto-discovery mapping.
"""
import csv
import io
import zipfile
import re
import os
from pathlib import Path
@@ -11,6 +12,7 @@ from datetime import datetime, date, timezone
from decimal import Decimal
from typing import List, Dict, Any, Optional, Tuple
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File as UploadFileForm, Form, Query
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from app.database.base import get_db
from app.auth.security import get_current_user
@@ -40,8 +42,8 @@ ENCODINGS = [
# Unified import order used across batch operations
IMPORT_ORDER = [
"STATES.csv", "GRUPLKUP.csv", "EMPLOYEE.csv", "FILETYPE.csv", "FILESTAT.csv",
"TRNSTYPE.csv", "TRNSLKUP.csv", "FOOTERS.csv", "SETUP.csv", "PRINTERS.csv",
"STATES.csv", "GRUPLKUP.csv", "EMPLOYEE.csv", "FILETYPE.csv", "FOOTERS.csv", "FILESTAT.csv",
"TRNSTYPE.csv", "TRNSLKUP.csv", "SETUP.csv", "PRINTERS.csv",
"INX_LKUP.csv",
"ROLODEX.csv", "PHONE.csv", "FILES.csv", "LEDGER.csv", "TRNSACTN.csv",
"QDROS.csv", "PENSIONS.csv", "SCHEDULE.csv", "MARRIAGE.csv", "DEATH.csv", "SEPARATE.csv", "LIFETABL.csv", "NUMBERAL.csv", "PLANINFO.csv", "RESULTS.csv", "PAYMENTS.csv", "DEPOSITS.csv",
@@ -91,8 +93,83 @@ CSV_MODEL_MAPPING = {
"RESULTS.csv": PensionResult
}
# Minimal CSV template definitions (headers + one sample row) used for template downloads
CSV_IMPORT_TEMPLATES: Dict[str, Dict[str, List[str]]] = {
"FILES.csv": {
"headers": ["File_No", "Id", "Empl_Num", "File_Type", "Opened", "Status", "Rate_Per_Hour"],
"sample": ["F-001", "CLIENT-1", "EMP01", "CIVIL", "2024-01-01", "ACTIVE", "150"],
},
"LEDGER.csv": {
"headers": ["File_No", "Date", "Empl_Num", "T_Code", "T_Type", "Amount"],
"sample": ["F-001", "2024-01-15", "EMP01", "FEE", "1", "500.00"],
},
"PAYMENTS.csv": {
"headers": ["Deposit_Date", "Amount"],
"sample": ["2024-01-15", "1500.00"],
},
# Additional templates for convenience
"TRNSACTN.csv": {
# Same structure as LEDGER.csv
"headers": ["File_No", "Date", "Empl_Num", "T_Code", "T_Type", "Amount"],
"sample": ["F-002", "2024-02-10", "EMP02", "FEE", "1", "250.00"],
},
"DEPOSITS.csv": {
"headers": ["Deposit_Date", "Total"],
"sample": ["2024-02-10", "1500.00"],
},
"ROLODEX.csv": {
# Minimal common contact fields
"headers": ["Id", "Last", "First", "A1", "City", "Abrev", "Zip", "Email"],
"sample": ["CLIENT-1", "Smith", "John", "123 Main St", "Denver", "CO", "80202", "john.smith@example.com"],
},
}
def _generate_csv_template_bytes(file_type: str) -> bytes:
"""Return CSV template content for the given file type as bytes.
Raises HTTPException if unsupported.
"""
key = (file_type or "").strip()
if key not in CSV_IMPORT_TEMPLATES:
raise HTTPException(status_code=400, detail=f"Unsupported template type: {file_type}. Choose one of: {list(CSV_IMPORT_TEMPLATES.keys())}")
cfg = CSV_IMPORT_TEMPLATES[key]
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(cfg["headers"])
writer.writerow(cfg["sample"])
output.seek(0)
return output.getvalue().encode("utf-8")
# Field mappings for CSV columns to database fields
# Legacy header synonyms used as hints only (not required). Auto-discovery will work without exact matches.
REQUIRED_MODEL_FIELDS: Dict[str, List[str]] = {
# Files: core identifiers and billing/status fields used throughout the app
"FILES.csv": [
"file_no",
"id",
"empl_num",
"file_type",
"opened",
"status",
"rate_per_hour",
],
# Ledger: core transaction fields
"LEDGER.csv": [
"file_no",
"date",
"empl_num",
"t_code",
"t_type",
"amount",
],
# Payments: deposit date and amount are the only strictly required model fields
"PAYMENTS.csv": [
"deposit_date",
"amount",
],
}
FIELD_MAPPINGS = {
"ROLODEX.csv": {
"Id": "id",
@@ -191,7 +268,14 @@ FIELD_MAPPINGS = {
"Draft_Apr": "draft_apr",
"Final_Out": "final_out",
"Judge": "judge",
"Form_Name": "form_name"
"Form_Name": "form_name",
# Extended workflow/document fields (present in new exports or manual CSVs)
"Status": "status",
"Content": "content",
"Notes": "notes",
"Approval_Status": "approval_status",
"Approved_Date": "approved_date",
"Filed_Date": "filed_date"
},
"PENSIONS.csv": {
"File_No": "file_no",
@@ -218,9 +302,17 @@ FIELD_MAPPINGS = {
},
"EMPLOYEE.csv": {
"Empl_Num": "empl_num",
"Rate_Per_Hour": "rate_per_hour"
# "Empl_Id": not a field in Employee model, using empl_num as identifier
# Model has additional fields (first_name, last_name, title, etc.) not in CSV
"Empl_Id": "initials", # Map employee ID to initials field
"Rate_Per_Hour": "rate_per_hour",
# Optional extended fields when present in enhanced exports
"First": "first_name",
"First_Name": "first_name",
"Last": "last_name",
"Last_Name": "last_name",
"Title": "title",
"Email": "email",
"Phone": "phone",
"Active": "active"
},
"STATES.csv": {
"Abrev": "abbreviation",
@@ -228,8 +320,8 @@ FIELD_MAPPINGS = {
},
"GRUPLKUP.csv": {
"Code": "group_code",
"Description": "description"
# "Title": field not present in model, skipping
"Description": "description",
"Title": "title"
},
"TRNSLKUP.csv": {
"T_Code": "t_code",
@@ -240,10 +332,9 @@ FIELD_MAPPINGS = {
},
"TRNSTYPE.csv": {
"T_Type": "t_type",
"T_Type_L": "description"
# "Header": maps to debit_credit but needs data transformation
# "Footer": doesn't align with active boolean field
# These fields may need custom handling or model updates
"T_Type_L": "debit_credit", # D=Debit, C=Credit
"Header": "description",
"Footer": "footer_code"
},
"FILETYPE.csv": {
"File_Type": "type_code",
@@ -343,6 +434,10 @@ FIELD_MAPPINGS = {
"DEATH.csv": {
"File_No": "file_no",
"Version": "version",
"Beneficiary_Name": "beneficiary_name",
"Benefit_Amount": "benefit_amount",
"Benefit_Type": "benefit_type",
"Notes": "notes",
"Lump1": "lump1",
"Lump2": "lump2",
"Growth1": "growth1",
@@ -353,6 +448,9 @@ FIELD_MAPPINGS = {
"SEPARATE.csv": {
"File_No": "file_no",
"Version": "version",
"Agreement_Date": "agreement_date",
"Terms": "terms",
"Notes": "notes",
"Separation_Rate": "terms"
},
"LIFETABL.csv": {
@@ -466,6 +564,40 @@ FIELD_MAPPINGS = {
"Amount": "amount",
"Billed": "billed",
"Note": "note"
},
"EMPLOYEE.csv": {
"Empl_Num": "empl_num",
"Empl_Id": "initials", # Map employee ID to initials field
"Rate_Per_Hour": "rate_per_hour",
# Note: first_name, last_name, title, active, email, phone will need manual entry or separate import
# as they're not present in the legacy CSV structure
},
"QDROS.csv": {
"File_No": "file_no",
"Version": "version",
"Plan_Id": "plan_id",
"^1": "field1",
"^2": "field2",
"^Part": "part",
"^AltP": "altp",
"^Pet": "pet",
"^Res": "res",
"Case_Type": "case_type",
"Case_Code": "case_code",
"Section": "section",
"Case_Number": "case_number",
"Judgment_Date": "judgment_date",
"Valuation_Date": "valuation_date",
"Married_On": "married_on",
"Percent_Awarded": "percent_awarded",
"Ven_City": "ven_city",
"Ven_Cnty": "ven_cnty",
"Ven_St": "ven_st",
"Draft_Out": "draft_out",
"Draft_Apr": "draft_apr",
"Final_Out": "final_out",
"Judge": "judge",
"Form_Name": "form_name"
}
}
@@ -691,6 +823,21 @@ def _build_dynamic_mapping(headers: List[str], model_class, file_type: str) -> D
}
def _validate_required_headers(file_type: str, mapped_headers: Dict[str, str]) -> Dict[str, Any]:
"""Check that minimal required model fields for a given CSV type are present in mapped headers.
Returns dict with: required_fields, missing_fields, ok.
"""
required_fields = REQUIRED_MODEL_FIELDS.get(file_type, [])
present_fields = set((mapped_headers or {}).values())
missing_fields = [f for f in required_fields if f not in present_fields]
return {
"required_fields": required_fields,
"missing_fields": missing_fields,
"ok": len(missing_fields) == 0,
}
def _get_required_fields(model_class) -> List[str]:
"""Infer required (non-nullable) fields for a model to avoid DB errors.
@@ -721,7 +868,7 @@ def convert_value(value: str, field_name: str) -> Any:
# Date fields
if any(word in field_name.lower() for word in [
"date", "dob", "birth", "opened", "closed", "judgment", "valuation", "married", "vests_on", "service"
"date", "dob", "birth", "opened", "closed", "judgment", "valuation", "married", "vests_on", "service", "approved", "filed", "agreement"
]):
parsed_date = parse_date(value)
return parsed_date
@@ -752,6 +899,15 @@ def convert_value(value: str, field_name: str) -> Any:
except ValueError:
return 0.0
# Normalize debit_credit textual variants
if field_name.lower() == "debit_credit":
normalized = value.strip().upper()
if normalized in ["D", "DEBIT"]:
return "D"
if normalized in ["C", "CREDIT"]:
return "C"
return normalized[:1] if normalized else None
# Integer fields
if any(word in field_name.lower() for word in [
"item_no", "age", "start_age", "version", "line_number", "sort_order", "empl_num", "month", "number"
@@ -786,6 +942,69 @@ def validate_foreign_keys(model_data: dict, model_class, db: Session) -> list[st
rolodex_id = model_data["id"]
if rolodex_id and not db.query(Rolodex).filter(Rolodex.id == rolodex_id).first():
errors.append(f"Owner Rolodex ID '{rolodex_id}' not found")
# Check File -> Footer relationship (default footer on file)
if model_class == File and "footer_code" in model_data:
footer = model_data.get("footer_code")
if footer:
exists = db.query(Footer).filter(Footer.footer_code == footer).first()
if not exists:
errors.append(f"Footer code '{footer}' not found for File")
# Check FileStatus -> Footer (default footer exists)
if model_class == FileStatus and "footer_code" in model_data:
footer = model_data.get("footer_code")
if footer:
exists = db.query(Footer).filter(Footer.footer_code == footer).first()
if not exists:
errors.append(f"Footer code '{footer}' not found for FileStatus")
# Check TransactionType -> Footer (default footer exists)
if model_class == TransactionType and "footer_code" in model_data:
footer = model_data.get("footer_code")
if footer:
exists = db.query(Footer).filter(Footer.footer_code == footer).first()
if not exists:
errors.append(f"Footer code '{footer}' not found for TransactionType")
# Check Ledger -> TransactionType/TransactionCode cross references
if model_class == Ledger:
# Validate t_type exists
if "t_type" in model_data:
t_type_value = model_data.get("t_type")
if t_type_value and not db.query(TransactionType).filter(TransactionType.t_type == t_type_value).first():
errors.append(f"Transaction type '{t_type_value}' not found")
# Validate t_code exists and matches t_type if both provided
if "t_code" in model_data:
t_code_value = model_data.get("t_code")
if t_code_value:
code_row = db.query(TransactionCode).filter(TransactionCode.t_code == t_code_value).first()
if not code_row:
errors.append(f"Transaction code '{t_code_value}' not found")
else:
ledger_t_type = model_data.get("t_type")
if ledger_t_type and getattr(code_row, "t_type", None) and code_row.t_type != ledger_t_type:
errors.append(
f"Transaction code '{t_code_value}' t_type '{code_row.t_type}' does not match ledger t_type '{ledger_t_type}'"
)
# Check Payment -> File and Rolodex relationships
if model_class == Payment:
if "file_no" in model_data:
file_no_value = model_data.get("file_no")
if file_no_value and not db.query(File).filter(File.file_no == file_no_value).first():
errors.append(f"File number '{file_no_value}' not found for Payment")
if "client_id" in model_data:
client_id_value = model_data.get("client_id")
if client_id_value and not db.query(Rolodex).filter(Rolodex.id == client_id_value).first():
errors.append(f"Client ID '{client_id_value}' not found for Payment")
# Check QDRO -> PlanInfo (plan_id exists)
if model_class == QDRO and "plan_id" in model_data:
plan_id = model_data.get("plan_id")
if plan_id:
exists = db.query(PlanInfo).filter(PlanInfo.plan_id == plan_id).first()
if not exists:
errors.append(f"Plan ID '{plan_id}' not found for QDRO")
# Add more foreign key validations as needed
return errors
@@ -831,6 +1050,96 @@ async def get_available_csv_files(current_user: User = Depends(get_current_user)
}
@router.get("/template/{file_type}")
async def download_csv_template(
file_type: str,
current_user: User = Depends(get_current_user)
):
"""Download a minimal CSV template with required headers and one sample row.
Supported templates include: {list(CSV_IMPORT_TEMPLATES.keys())}
"""
key = (file_type or "").strip()
if key not in CSV_IMPORT_TEMPLATES:
raise HTTPException(status_code=400, detail=f"Unsupported template type: {file_type}. Choose one of: {list(CSV_IMPORT_TEMPLATES.keys())}")
content = _generate_csv_template_bytes(key)
from datetime import datetime as _dt
ts = _dt.now().strftime("%Y%m%d_%H%M%S")
safe_name = key.replace(".csv", "")
filename = f"{safe_name}_template_{ts}.csv"
return StreamingResponse(
iter([content]),
media_type="text/csv",
headers={"Content-Disposition": f"attachment; filename=\"{filename}\""},
)
@router.get("/templates/bundle")
async def download_csv_templates_bundle(
files: Optional[List[str]] = Query(None, description="Repeat for each CSV template, e.g., files=FILES.csv&files=LEDGER.csv"),
current_user: User = Depends(get_current_user)
):
"""Bundle selected CSV templates into a single ZIP.
Example: GET /api/import/templates/bundle?files=FILES.csv&files=LEDGER.csv
"""
requested = files or []
if not requested:
raise HTTPException(status_code=400, detail="Specify at least one 'files' query parameter")
# Normalize and validate
normalized: List[str] = []
for name in requested:
if not name:
continue
n = name.strip()
if not n.lower().endswith(".csv"):
n = f"{n}.csv"
n = n.upper()
if n in CSV_IMPORT_TEMPLATES:
normalized.append(n)
else:
# Ignore unknowns rather than fail the whole bundle
continue
# Deduplicate while preserving order
seen = set()
selected = []
for n in normalized:
if n not in seen:
seen.add(n)
selected.append(n)
if not selected:
raise HTTPException(status_code=400, detail=f"No supported templates requested. Supported: {list(CSV_IMPORT_TEMPLATES.keys())}")
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
for fname in selected:
try:
content = _generate_csv_template_bytes(fname)
# Friendly name in zip: <BASENAME>_template.csv
base = fname.replace(".CSV", "").upper()
arcname = f"{base}_template.csv"
zf.writestr(arcname, content)
except HTTPException:
# Skip unsupported just in case
continue
zip_buffer.seek(0)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"csv_templates_{ts}.zip"
return StreamingResponse(
iter([zip_buffer.getvalue()]),
media_type="application/zip",
headers={
"Content-Disposition": f"attachment; filename=\"{filename}\""
},
)
@router.post("/upload/{file_type}")
async def import_csv_data(
file_type: str,
@@ -1060,6 +1369,26 @@ async def import_csv_data(
except Exception:
pass
else:
# FK validation for known relationships
fk_errors = validate_foreign_keys(model_data, model_class, db)
if fk_errors:
for msg in fk_errors:
errors.append({"row": row_num, "error": msg})
# Persist as flexible for traceability
db.add(
FlexibleImport(
file_type=file_type,
target_table=model_class.__tablename__,
primary_key_field=None,
primary_key_value=None,
extra_data={
"mapped": model_data,
"fk_errors": fk_errors,
},
)
)
flexible_saved += 1
continue
instance = model_class(**model_data)
db.add(instance)
db.flush() # Ensure PK is available
@@ -1136,6 +1465,9 @@ async def import_csv_data(
"unmapped_headers": unmapped_headers,
"flexible_saved_rows": flexible_saved,
},
"validation": {
"fk_errors": len([e for e in errors if isinstance(e, dict) and 'error' in e and 'not found' in str(e['error']).lower()])
}
}
# Include create/update breakdown for printers
if file_type == "PRINTERS.csv":
@@ -1368,6 +1700,10 @@ async def batch_validate_csv_files(
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
mapped_headers = mapping_info["mapped_headers"]
unmapped_headers = mapping_info["unmapped_headers"]
header_validation = _validate_required_headers(file_type, mapped_headers)
header_validation = _validate_required_headers(file_type, mapped_headers)
header_validation = _validate_required_headers(file_type, mapped_headers)
header_validation = _validate_required_headers(file_type, mapped_headers)
# Sample data validation
sample_rows = []
@@ -1394,12 +1730,13 @@ async def batch_validate_csv_files(
validation_results.append({
"file_type": file_type,
"valid": len(mapped_headers) > 0 and len(errors) == 0,
"valid": (len(mapped_headers) > 0 and len(errors) == 0 and header_validation.get("ok", True)),
"headers": {
"found": csv_headers,
"mapped": mapped_headers,
"unmapped": unmapped_headers
},
"header_validation": header_validation,
"sample_data": sample_rows[:5], # Limit sample data for batch operation
"validation_errors": errors[:5], # First 5 errors only
"total_errors": len(errors),
@@ -1493,17 +1830,34 @@ async def batch_import_csv_files(
if file_type not in CSV_MODEL_MAPPING:
# Fallback flexible-only import for unknown file structures
try:
await file.seek(0)
content = await file.read()
# Save original upload to disk for potential reruns
# Use async file operations for better performance
from app.services.async_file_operations import async_file_ops
# Stream save to disk for potential reruns and processing
saved_path = None
try:
file_path = audit_dir.joinpath(file_type)
with open(file_path, "wb") as fh:
fh.write(content)
saved_path = str(file_path)
except Exception:
saved_path = None
relative_path = f"import_audits/{audit_row.id}/{file_type}"
saved_file_path, file_size, checksum = await async_file_ops.stream_upload_file(
file, relative_path
)
saved_path = str(async_file_ops.base_upload_dir / relative_path)
# Stream read for processing
content = b""
async for chunk in async_file_ops.stream_read_file(relative_path):
content += chunk
except Exception as e:
# Fallback to traditional method
await file.seek(0)
content = await file.read()
try:
file_path = audit_dir.joinpath(file_type)
with open(file_path, "wb") as fh:
fh.write(content)
saved_path = str(file_path)
except Exception:
saved_path = None
encodings = ENCODINGS
csv_content = None
for encoding in encodings:
@@ -1640,10 +1994,12 @@ async def batch_import_csv_files(
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
mapped_headers = mapping_info["mapped_headers"]
unmapped_headers = mapping_info["unmapped_headers"]
header_validation = _validate_required_headers(file_type, mapped_headers)
imported_count = 0
errors = []
flexible_saved = 0
fk_error_summary: Dict[str, int] = {}
# Special handling: assign line numbers per form for FORM_LST.csv
form_lst_line_counters: Dict[str, int] = {}
@@ -1713,6 +2069,26 @@ async def batch_import_csv_files(
if 'file_no' not in model_data or not model_data['file_no']:
continue # Skip ledger records without file number
# FK validation for known relationships
fk_errors = validate_foreign_keys(model_data, model_class, db)
if fk_errors:
for msg in fk_errors:
errors.append({"row": row_num, "error": msg})
fk_error_summary[msg] = fk_error_summary.get(msg, 0) + 1
db.add(
FlexibleImport(
file_type=file_type,
target_table=model_class.__tablename__,
primary_key_field=None,
primary_key_value=None,
extra_data=make_json_safe({
"mapped": model_data,
"fk_errors": fk_errors,
}),
)
)
flexible_saved += 1
continue
instance = model_class(**model_data)
db.add(instance)
db.flush()
@@ -1779,10 +2155,15 @@ async def batch_import_csv_files(
results.append({
"file_type": file_type,
"status": "success" if len(errors) == 0 else "completed_with_errors",
"status": "success" if (len(errors) == 0 and header_validation.get("ok", True)) else "completed_with_errors",
"imported_count": imported_count,
"errors": len(errors),
"message": f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
"header_validation": header_validation,
"validation": {
"fk_errors_total": sum(fk_error_summary.values()),
"fk_error_summary": fk_error_summary,
},
"auto_mapping": {
"mapped_headers": mapped_headers,
"unmapped_headers": unmapped_headers,
@@ -1793,7 +2174,7 @@ async def batch_import_csv_files(
db.add(ImportAuditFile(
audit_id=audit_row.id,
file_type=file_type,
status="success" if len(errors) == 0 else "completed_with_errors",
status="success" if (len(errors) == 0 and header_validation.get("ok", True)) else "completed_with_errors",
imported_count=imported_count,
errors=len(errors),
message=f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
@@ -1801,6 +2182,9 @@ async def batch_import_csv_files(
"mapped_headers": list(mapped_headers.keys()),
"unmapped_count": len(unmapped_headers),
"flexible_saved_rows": flexible_saved,
"fk_errors_total": sum(fk_error_summary.values()),
"fk_error_summary": fk_error_summary,
"header_validation": header_validation,
**({"saved_path": saved_path} if saved_path else {}),
}
))
@@ -2138,6 +2522,7 @@ async def rerun_failed_files(
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
mapped_headers = mapping_info["mapped_headers"]
unmapped_headers = mapping_info["unmapped_headers"]
header_validation = _validate_required_headers(file_type, mapped_headers)
imported_count = 0
errors: List[Dict[str, Any]] = []
# Special handling: assign line numbers per form for FORM_LST.csv
@@ -2248,20 +2633,21 @@ async def rerun_failed_files(
total_errors += len(errors)
results.append({
"file_type": file_type,
"status": "success" if len(errors) == 0 else "completed_with_errors",
"status": "success" if (len(errors) == 0 and header_validation.get("ok", True)) else "completed_with_errors",
"imported_count": imported_count,
"errors": len(errors),
"message": f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
"header_validation": header_validation,
})
try:
db.add(ImportAuditFile(
audit_id=rerun_audit.id,
file_type=file_type,
status="success" if len(errors) == 0 else "completed_with_errors",
status="success" if (len(errors) == 0 and header_validation.get("ok", True)) else "completed_with_errors",
imported_count=imported_count,
errors=len(errors),
message=f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
details={"saved_path": saved_path} if saved_path else {}
details={**({"saved_path": saved_path} if saved_path else {}), "header_validation": header_validation}
))
db.commit()
except Exception: