3061 lines
118 KiB
Python
3061 lines
118 KiB
Python
"""
|
|
Data import API endpoints for CSV file uploads with auto-discovery mapping.
|
|
"""
|
|
import csv
|
|
import io
|
|
import zipfile
|
|
import re
|
|
import os
|
|
from pathlib import Path
|
|
from difflib import SequenceMatcher
|
|
from datetime import datetime, date, timezone
|
|
from decimal import Decimal
|
|
from typing import List, Dict, Any, Optional, Tuple
|
|
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File as UploadFileForm, Form, Query, WebSocket
|
|
from fastapi.responses import StreamingResponse
|
|
from sqlalchemy.orm import Session
|
|
from app.database.base import get_db, SessionLocal
|
|
from app.auth.security import get_current_user
|
|
from app.models.user import User
|
|
from app.models.rolodex import Rolodex, Phone
|
|
from app.models.files import File
|
|
from app.models.ledger import Ledger
|
|
from app.models.qdro import QDRO
|
|
from app.models.pensions import Pension, PensionSchedule, MarriageHistory, DeathBenefit, SeparationAgreement, LifeTable, NumberTable, PensionResult
|
|
from app.models.lookups import Employee, FileType, FileStatus, TransactionType, TransactionCode, State, GroupLookup, Footer, PlanInfo, FormIndex, FormList, PrinterSetup, SystemSetup, FormKeyword
|
|
from app.models.additional import Payment, Deposit, FileNote, FormVariable, ReportVariable
|
|
from app.models.flexible import FlexibleImport
|
|
from app.models.audit import ImportAudit, ImportAuditFile
|
|
from app.config import settings
|
|
from app.utils.logging import import_logger
|
|
from app.middleware.websocket_middleware import get_websocket_manager
|
|
from app.services.websocket_pool import WebSocketMessage
|
|
|
|
router = APIRouter(tags=["import"])
|
|
|
|
# WebSocket manager for import progress notifications
|
|
websocket_manager = get_websocket_manager()
|
|
|
|
# Common encodings to try for legacy CSV files (order matters)
|
|
ENCODINGS = [
|
|
'utf-8-sig',
|
|
'utf-8',
|
|
'windows-1252',
|
|
'iso-8859-1',
|
|
'cp1252',
|
|
]
|
|
|
|
# Unified import order used across batch operations
|
|
IMPORT_ORDER = [
|
|
"STATES.csv", "GRUPLKUP.csv", "EMPLOYEE.csv", "FILETYPE.csv", "FOOTERS.csv", "FILESTAT.csv",
|
|
"TRNSTYPE.csv", "TRNSLKUP.csv", "SETUP.csv", "PRINTERS.csv",
|
|
"INX_LKUP.csv",
|
|
"ROLODEX.csv", "PHONE.csv", "FILES.csv", "LEDGER.csv", "TRNSACTN.csv",
|
|
"QDROS.csv", "PENSIONS.csv", "SCHEDULE.csv", "MARRIAGE.csv", "DEATH.csv", "SEPARATE.csv", "LIFETABL.csv", "NUMBERAL.csv", "PLANINFO.csv", "RESULTS.csv", "PAYMENTS.csv", "DEPOSITS.csv",
|
|
"FILENOTS.csv", "FORM_INX.csv", "FORM_LST.csv", "FVARLKUP.csv", "RVARLKUP.csv"
|
|
]
|
|
|
|
|
|
# CSV to Model mapping
|
|
CSV_MODEL_MAPPING = {
|
|
"ROLODEX.csv": Rolodex,
|
|
"ROLEX_V.csv": Rolodex, # Legacy/view alias
|
|
"PHONE.csv": Phone,
|
|
"FILES.csv": File,
|
|
"FILES_R.csv": File, # Legacy/report alias
|
|
"FILES_V.csv": File, # Legacy/view alias
|
|
"LEDGER.csv": Ledger,
|
|
"QDROS.csv": QDRO,
|
|
"PENSIONS.csv": Pension,
|
|
"SCHEDULE.csv": PensionSchedule,
|
|
"MARRIAGE.csv": MarriageHistory,
|
|
"DEATH.csv": DeathBenefit,
|
|
"SEPARATE.csv": SeparationAgreement,
|
|
"LIFETABL.csv": LifeTable,
|
|
"NUMBERAL.csv": NumberTable,
|
|
"EMPLOYEE.csv": Employee,
|
|
"FILETYPE.csv": FileType,
|
|
"FILESTAT.csv": FileStatus,
|
|
"TRNSTYPE.csv": TransactionType,
|
|
"TRNSLKUP.csv": TransactionCode,
|
|
"STATES.csv": State,
|
|
"GRUPLKUP.csv": GroupLookup,
|
|
"FOOTERS.csv": Footer,
|
|
"PLANINFO.csv": PlanInfo,
|
|
# Legacy alternate names from export directories
|
|
"FORM_INX.csv": FormIndex,
|
|
"FORM_LST.csv": FormList,
|
|
"PRINTERS.csv": PrinterSetup,
|
|
"SETUP.csv": SystemSetup,
|
|
# Additional models for complete legacy coverage
|
|
"DEPOSITS.csv": Deposit,
|
|
"FILENOTS.csv": FileNote,
|
|
"FVARLKUP.csv": FormVariable,
|
|
"RVARLKUP.csv": ReportVariable,
|
|
"PAYMENTS.csv": Payment,
|
|
"TRNSACTN.csv": Ledger, # Maps to existing Ledger model (same structure)
|
|
"INX_LKUP.csv": FormKeyword,
|
|
"RESULTS.csv": PensionResult
|
|
}
|
|
|
|
|
|
# -----------------------------
|
|
# Progress aggregation helpers
|
|
# -----------------------------
|
|
def _aggregate_batch_progress(db: Session, audit: ImportAudit) -> Dict[str, Any]:
|
|
"""Compute progress summary for the given audit row."""
|
|
processed_files = db.query(ImportAuditFile).filter(ImportAuditFile.audit_id == audit.id).count()
|
|
successful_files = db.query(ImportAuditFile).filter(
|
|
ImportAuditFile.audit_id == audit.id,
|
|
ImportAuditFile.status.in_(["success", "completed_with_errors", "skipped"])
|
|
).count()
|
|
failed_files = db.query(ImportAuditFile).filter(
|
|
ImportAuditFile.audit_id == audit.id,
|
|
ImportAuditFile.status == "failed"
|
|
).count()
|
|
|
|
total_files = audit.total_files or 0
|
|
percent_complete: float = 0.0
|
|
if total_files > 0:
|
|
try:
|
|
percent_complete = (processed_files / total_files) * 100.0
|
|
except Exception:
|
|
percent_complete = 0.0
|
|
|
|
data = {
|
|
"audit_id": audit.id,
|
|
"status": audit.status,
|
|
"total_files": total_files,
|
|
"processed_files": processed_files,
|
|
"successful_files": successful_files,
|
|
"failed_files": failed_files,
|
|
"started_at": audit.started_at.isoformat() if audit.started_at else None,
|
|
"finished_at": audit.finished_at.isoformat() if audit.finished_at else None,
|
|
"percent": percent_complete,
|
|
"message": audit.message,
|
|
}
|
|
|
|
try:
|
|
last_file = (
|
|
db.query(ImportAuditFile)
|
|
.filter(ImportAuditFile.audit_id == audit.id)
|
|
.order_by(ImportAuditFile.id.desc())
|
|
.first()
|
|
)
|
|
if last_file:
|
|
data["last_file"] = {
|
|
"file_type": last_file.file_type,
|
|
"status": last_file.status,
|
|
"imported_count": last_file.imported_count,
|
|
"errors": last_file.errors,
|
|
"message": last_file.message,
|
|
"created_at": last_file.created_at.isoformat() if last_file.created_at else None,
|
|
}
|
|
except Exception:
|
|
pass
|
|
|
|
return data
|
|
|
|
|
|
async def _broadcast_import_progress(db: Session, audit_id: int) -> None:
|
|
"""Broadcast current progress for audit_id to its WebSocket topic."""
|
|
audit = db.query(ImportAudit).filter(ImportAudit.id == audit_id).first()
|
|
if not audit:
|
|
return
|
|
payload = _aggregate_batch_progress(db, audit)
|
|
topic = f"import_batch_progress_{audit_id}"
|
|
try:
|
|
await websocket_manager.broadcast_to_topic(
|
|
topic=topic,
|
|
message_type="progress",
|
|
data=payload,
|
|
)
|
|
except Exception:
|
|
# Non-fatal; continue API flow
|
|
pass
|
|
|
|
# Minimal CSV template definitions (headers + one sample row) used for template downloads
|
|
CSV_IMPORT_TEMPLATES: Dict[str, Dict[str, List[str]]] = {
|
|
"FILES.csv": {
|
|
"headers": ["File_No", "Id", "Empl_Num", "File_Type", "Opened", "Status", "Rate_Per_Hour"],
|
|
"sample": ["F-001", "CLIENT-1", "EMP01", "CIVIL", "2024-01-01", "ACTIVE", "150"],
|
|
},
|
|
"LEDGER.csv": {
|
|
"headers": ["File_No", "Date", "Empl_Num", "T_Code", "T_Type", "Amount"],
|
|
"sample": ["F-001", "2024-01-15", "EMP01", "FEE", "1", "500.00"],
|
|
},
|
|
"PAYMENTS.csv": {
|
|
"headers": ["Deposit_Date", "Amount"],
|
|
"sample": ["2024-01-15", "1500.00"],
|
|
},
|
|
# Additional templates for convenience
|
|
"TRNSACTN.csv": {
|
|
# Same structure as LEDGER.csv
|
|
"headers": ["File_No", "Date", "Empl_Num", "T_Code", "T_Type", "Amount"],
|
|
"sample": ["F-002", "2024-02-10", "EMP02", "FEE", "1", "250.00"],
|
|
},
|
|
"DEPOSITS.csv": {
|
|
"headers": ["Deposit_Date", "Total"],
|
|
"sample": ["2024-02-10", "1500.00"],
|
|
},
|
|
"ROLODEX.csv": {
|
|
# Minimal common contact fields
|
|
"headers": ["Id", "Last", "First", "A1", "City", "Abrev", "Zip", "Email"],
|
|
"sample": ["CLIENT-1", "Smith", "John", "123 Main St", "Denver", "CO", "80202", "john.smith@example.com"],
|
|
},
|
|
}
|
|
|
|
def _generate_csv_template_bytes(file_type: str) -> bytes:
|
|
"""Return CSV template content for the given file type as bytes.
|
|
|
|
Raises HTTPException if unsupported.
|
|
"""
|
|
key = (file_type or "").strip()
|
|
if key not in CSV_IMPORT_TEMPLATES:
|
|
raise HTTPException(status_code=400, detail=f"Unsupported template type: {file_type}. Choose one of: {list(CSV_IMPORT_TEMPLATES.keys())}")
|
|
|
|
cfg = CSV_IMPORT_TEMPLATES[key]
|
|
output = io.StringIO()
|
|
writer = csv.writer(output)
|
|
writer.writerow(cfg["headers"])
|
|
writer.writerow(cfg["sample"])
|
|
output.seek(0)
|
|
return output.getvalue().encode("utf-8")
|
|
|
|
# Field mappings for CSV columns to database fields
|
|
# Legacy header synonyms used as hints only (not required). Auto-discovery will work without exact matches.
|
|
REQUIRED_MODEL_FIELDS: Dict[str, List[str]] = {
|
|
# Files: core identifiers and billing/status fields used throughout the app
|
|
"FILES.csv": [
|
|
"file_no",
|
|
"id",
|
|
"empl_num",
|
|
"file_type",
|
|
"opened",
|
|
"status",
|
|
"rate_per_hour",
|
|
],
|
|
# Ledger: core transaction fields
|
|
"LEDGER.csv": [
|
|
"file_no",
|
|
"date",
|
|
"empl_num",
|
|
"t_code",
|
|
"t_type",
|
|
"amount",
|
|
],
|
|
# Payments: deposit date and amount are the only strictly required model fields
|
|
"PAYMENTS.csv": [
|
|
"deposit_date",
|
|
"amount",
|
|
],
|
|
}
|
|
|
|
FIELD_MAPPINGS = {
|
|
"ROLODEX.csv": {
|
|
"Id": "id",
|
|
"Prefix": "prefix",
|
|
"First": "first",
|
|
"Middle": "middle",
|
|
"Last": "last",
|
|
"Suffix": "suffix",
|
|
"Title": "title",
|
|
"A1": "a1",
|
|
"A2": "a2",
|
|
"A3": "a3",
|
|
"City": "city",
|
|
"Abrev": "abrev",
|
|
"St": None, # Full state name - skip this field as model only has abrev
|
|
"Zip": "zip",
|
|
"Email": "email",
|
|
"DOB": "dob",
|
|
"SS#": "ss_number",
|
|
"Legal_Status": "legal_status",
|
|
"Group": "group",
|
|
"Memo": "memo"
|
|
},
|
|
"PHONE.csv": {
|
|
"Id": "rolodex_id",
|
|
"Phone": "phone",
|
|
"Location": "location"
|
|
},
|
|
"FILES.csv": {
|
|
"File_No": "file_no",
|
|
"Id": "id",
|
|
"File_Type": "file_type",
|
|
"Regarding": "regarding",
|
|
"Opened": "opened",
|
|
"Closed": "closed",
|
|
"Empl_Num": "empl_num",
|
|
"Rate_Per_Hour": "rate_per_hour",
|
|
"Status": "status",
|
|
"Footer_Code": "footer_code",
|
|
"Opposing": "opposing",
|
|
"Hours": "hours",
|
|
"Hours_P": "hours_p",
|
|
"Trust_Bal": "trust_bal",
|
|
"Trust_Bal_P": "trust_bal_p",
|
|
"Hourly_Fees": "hourly_fees",
|
|
"Hourly_Fees_P": "hourly_fees_p",
|
|
"Flat_Fees": "flat_fees",
|
|
"Flat_Fees_P": "flat_fees_p",
|
|
"Disbursements": "disbursements",
|
|
"Disbursements_P": "disbursements_p",
|
|
"Credit_Bal": "credit_bal",
|
|
"Credit_Bal_P": "credit_bal_p",
|
|
"Total_Charges": "total_charges",
|
|
"Total_Charges_P": "total_charges_p",
|
|
"Amount_Owing": "amount_owing",
|
|
"Amount_Owing_P": "amount_owing_p",
|
|
"Transferable": "transferable",
|
|
"Memo": "memo"
|
|
},
|
|
"LEDGER.csv": {
|
|
"File_No": "file_no",
|
|
"Date": "date",
|
|
"Item_No": "item_no",
|
|
"Empl_Num": "empl_num",
|
|
"T_Code": "t_code",
|
|
"T_Type": "t_type",
|
|
"T_Type_L": "t_type_l",
|
|
"Quantity": "quantity",
|
|
"Rate": "rate",
|
|
"Amount": "amount",
|
|
"Billed": "billed",
|
|
"Note": "note"
|
|
},
|
|
"QDROS.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Plan_Id": "plan_id",
|
|
"^1": "field1",
|
|
"^2": "field2",
|
|
"^Part": "part",
|
|
"^AltP": "altp",
|
|
"^Pet": "pet",
|
|
"^Res": "res",
|
|
"Case_Type": "case_type",
|
|
"Case_Code": "case_code",
|
|
"Section": "section",
|
|
"Case_Number": "case_number",
|
|
"Judgment_Date": "judgment_date",
|
|
"Valuation_Date": "valuation_date",
|
|
"Married_On": "married_on",
|
|
"Percent_Awarded": "percent_awarded",
|
|
"Ven_City": "ven_city",
|
|
"Ven_Cnty": "ven_cnty",
|
|
"Ven_St": "ven_st",
|
|
"Draft_Out": "draft_out",
|
|
"Draft_Apr": "draft_apr",
|
|
"Final_Out": "final_out",
|
|
"Judge": "judge",
|
|
"Form_Name": "form_name",
|
|
# Extended workflow/document fields (present in new exports or manual CSVs)
|
|
"Status": "status",
|
|
"Content": "content",
|
|
"Notes": "notes",
|
|
"Approval_Status": "approval_status",
|
|
"Approved_Date": "approved_date",
|
|
"Filed_Date": "filed_date"
|
|
},
|
|
"PENSIONS.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Plan_Id": "plan_id",
|
|
"Plan_Name": "plan_name",
|
|
"Title": "title",
|
|
"First": "first",
|
|
"Last": "last",
|
|
"Birth": "birth",
|
|
"Race": "race",
|
|
"Sex": "sex",
|
|
"Info": "info",
|
|
"Valu": "valu",
|
|
"Accrued": "accrued",
|
|
"Vested_Per": "vested_per",
|
|
"Start_Age": "start_age",
|
|
"COLA": "cola",
|
|
"Max_COLA": "max_cola",
|
|
"Withdrawal": "withdrawal",
|
|
"Pre_DR": "pre_dr",
|
|
"Post_DR": "post_dr",
|
|
"Tax_Rate": "tax_rate"
|
|
},
|
|
"EMPLOYEE.csv": {
|
|
"Empl_Num": "empl_num",
|
|
"Empl_Id": "initials", # Map employee ID to initials field
|
|
"Rate_Per_Hour": "rate_per_hour",
|
|
# Optional extended fields when present in enhanced exports
|
|
"First": "first_name",
|
|
"First_Name": "first_name",
|
|
"Last": "last_name",
|
|
"Last_Name": "last_name",
|
|
"Title": "title",
|
|
"Email": "email",
|
|
"Phone": "phone",
|
|
"Active": "active"
|
|
},
|
|
"STATES.csv": {
|
|
"Abrev": "abbreviation",
|
|
"St": "name"
|
|
},
|
|
"GRUPLKUP.csv": {
|
|
"Code": "group_code",
|
|
"Description": "description",
|
|
"Title": "title"
|
|
},
|
|
"TRNSLKUP.csv": {
|
|
"T_Code": "t_code",
|
|
"T_Type": "t_type",
|
|
# "T_Type_L": not a field in TransactionCode model
|
|
"Amount": "default_rate",
|
|
"Description": "description"
|
|
},
|
|
"TRNSTYPE.csv": {
|
|
"T_Type": "t_type",
|
|
"T_Type_L": "debit_credit", # D=Debit, C=Credit
|
|
"Header": "description",
|
|
"Footer": "footer_code"
|
|
},
|
|
"FILETYPE.csv": {
|
|
"File_Type": "type_code",
|
|
"Description": "description",
|
|
"Default_Rate": "default_rate"
|
|
},
|
|
"FILESTAT.csv": {
|
|
"Status": "status_code",
|
|
"Status_Code": "status_code",
|
|
"Definition": "description",
|
|
"Description": "description",
|
|
"Send": "send",
|
|
"Footer_Code": "footer_code",
|
|
"Sort_Order": "sort_order"
|
|
},
|
|
"FOOTERS.csv": {
|
|
"F_Code": "footer_code",
|
|
"F_Footer": "content"
|
|
# Description is optional - not required for footers
|
|
},
|
|
"PLANINFO.csv": {
|
|
"Plan_Id": "plan_id",
|
|
"Plan_Name": "plan_name",
|
|
"Plan_Type": "plan_type",
|
|
"Sponsor": "sponsor",
|
|
"Administrator": "administrator",
|
|
"Address1": "address1",
|
|
"Address2": "address2",
|
|
"City": "city",
|
|
"State": "state",
|
|
"Zip_Code": "zip_code",
|
|
"Phone": "phone",
|
|
"Notes": "notes"
|
|
},
|
|
"INX_LKUP.csv": {
|
|
"Keyword": "keyword",
|
|
"Description": "description"
|
|
},
|
|
"FORM_INX.csv": {
|
|
"Name": "form_id",
|
|
"Keyword": "keyword"
|
|
},
|
|
"FORM_LST.csv": {
|
|
"Name": "form_id",
|
|
"Memo": "content",
|
|
"Status": "status"
|
|
},
|
|
"PRINTERS.csv": {
|
|
# Legacy variants
|
|
"Printer_Name": "printer_name",
|
|
"Description": "description",
|
|
"Driver": "driver",
|
|
"Port": "port",
|
|
"Default_Printer": "default_printer",
|
|
# Observed legacy headers from export
|
|
"Number": "number",
|
|
"Name": "printer_name",
|
|
"Page_Break": "page_break",
|
|
"Setup_St": "setup_st",
|
|
"Reset_St": "reset_st",
|
|
"B_Underline": "b_underline",
|
|
"E_Underline": "e_underline",
|
|
"B_Bold": "b_bold",
|
|
"E_Bold": "e_bold",
|
|
# Optional report toggles
|
|
"Phone_Book": "phone_book",
|
|
"Rolodex_Info": "rolodex_info",
|
|
"Envelope": "envelope",
|
|
"File_Cabinet": "file_cabinet",
|
|
"Accounts": "accounts",
|
|
"Statements": "statements",
|
|
"Calendar": "calendar",
|
|
},
|
|
"SETUP.csv": {
|
|
"Setting_Key": "setting_key",
|
|
"Setting_Value": "setting_value",
|
|
"Description": "description",
|
|
"Setting_Type": "setting_type"
|
|
},
|
|
"SCHEDULE.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Vests_On": "vests_on",
|
|
"Vests_At": "vests_at"
|
|
},
|
|
"MARRIAGE.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Married_From": "married_from",
|
|
"Married_To": "married_to",
|
|
"Married_Years": "married_years",
|
|
"Service_From": "service_from",
|
|
"Service_To": "service_to",
|
|
"Service_Years": "service_years",
|
|
"Marital_%": "marital_percent"
|
|
},
|
|
"DEATH.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Beneficiary_Name": "beneficiary_name",
|
|
"Benefit_Amount": "benefit_amount",
|
|
"Benefit_Type": "benefit_type",
|
|
"Notes": "notes",
|
|
"Lump1": "lump1",
|
|
"Lump2": "lump2",
|
|
"Growth1": "growth1",
|
|
"Growth2": "growth2",
|
|
"Disc1": "disc1",
|
|
"Disc2": "disc2"
|
|
},
|
|
"SEPARATE.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Agreement_Date": "agreement_date",
|
|
"Terms": "terms",
|
|
"Notes": "notes",
|
|
"Separation_Rate": "terms"
|
|
},
|
|
"LIFETABL.csv": {
|
|
"AGE": "age",
|
|
"LE_AA": "le_aa",
|
|
"NA_AA": "na_aa",
|
|
"LE_AM": "le_am",
|
|
"NA_AM": "na_am",
|
|
"LE_AF": "le_af",
|
|
"NA_AF": "na_af",
|
|
"LE_WA": "le_wa",
|
|
"NA_WA": "na_wa",
|
|
"LE_WM": "le_wm",
|
|
"NA_WM": "na_wm",
|
|
"LE_WF": "le_wf",
|
|
"NA_WF": "na_wf",
|
|
"LE_BA": "le_ba",
|
|
"NA_BA": "na_ba",
|
|
"LE_BM": "le_bm",
|
|
"NA_BM": "na_bm",
|
|
"LE_BF": "le_bf",
|
|
"NA_BF": "na_bf",
|
|
"LE_HA": "le_ha",
|
|
"NA_HA": "na_ha",
|
|
"LE_HM": "le_hm",
|
|
"NA_HM": "na_hm",
|
|
"LE_HF": "le_hf",
|
|
"NA_HF": "na_hf"
|
|
},
|
|
"NUMBERAL.csv": {
|
|
"Month": "month",
|
|
"NA_AA": "na_aa",
|
|
"NA_AM": "na_am",
|
|
"NA_AF": "na_af",
|
|
"NA_WA": "na_wa",
|
|
"NA_WM": "na_wm",
|
|
"NA_WF": "na_wf",
|
|
"NA_BA": "na_ba",
|
|
"NA_BM": "na_bm",
|
|
"NA_BF": "na_bf",
|
|
"NA_HA": "na_ha",
|
|
"NA_HM": "na_hm",
|
|
"NA_HF": "na_hf"
|
|
},
|
|
"RESULTS.csv": {
|
|
"Accrued": "accrued",
|
|
"Start_Age": "start_age",
|
|
"COLA": "cola",
|
|
"Withdrawal": "withdrawal",
|
|
"Pre_DR": "pre_dr",
|
|
"Post_DR": "post_dr",
|
|
"Tax_Rate": "tax_rate",
|
|
"Age": "age",
|
|
"Years_From": "years_from",
|
|
"Life_Exp": "life_exp",
|
|
"EV_Monthly": "ev_monthly",
|
|
"Payments": "payments",
|
|
"Pay_Out": "pay_out",
|
|
"Fund_Value": "fund_value",
|
|
"PV": "pv",
|
|
"Mortality": "mortality",
|
|
"PV_AM": "pv_am",
|
|
"PV_AMT": "pv_amt",
|
|
"PV_Pre_DB": "pv_pre_db",
|
|
"PV_Annuity": "pv_annuity",
|
|
"WV_AT": "wv_at",
|
|
"PV_Plan": "pv_plan",
|
|
"Years_Married": "years_married",
|
|
"Years_Service": "years_service",
|
|
"Marr_Per": "marr_per",
|
|
"Marr_Amt": "marr_amt"
|
|
},
|
|
# Additional CSV file mappings
|
|
"DEPOSITS.csv": {
|
|
"Deposit_Date": "deposit_date",
|
|
"Total": "total"
|
|
},
|
|
"FILENOTS.csv": {
|
|
"File_No": "file_no",
|
|
"Memo_Date": "memo_date",
|
|
"Memo_Note": "memo_note"
|
|
},
|
|
"FVARLKUP.csv": {
|
|
"Identifier": "identifier",
|
|
"Query": "query",
|
|
"Response": "response"
|
|
},
|
|
"RVARLKUP.csv": {
|
|
"Identifier": "identifier",
|
|
"Query": "query"
|
|
},
|
|
"PAYMENTS.csv": {
|
|
"Deposit_Date": "deposit_date",
|
|
"File_No": "file_no",
|
|
"Id": "client_id",
|
|
"Regarding": "regarding",
|
|
"Amount": "amount",
|
|
"Note": "note"
|
|
},
|
|
"TRNSACTN.csv": {
|
|
# Maps to Ledger model - same structure as LEDGER.csv
|
|
"File_No": "file_no",
|
|
"Date": "date",
|
|
"Item_No": "item_no",
|
|
"Empl_Num": "empl_num",
|
|
"T_Code": "t_code",
|
|
"T_Type": "t_type",
|
|
"T_Type_L": "t_type_l",
|
|
"Quantity": "quantity",
|
|
"Rate": "rate",
|
|
"Amount": "amount",
|
|
"Billed": "billed",
|
|
"Note": "note"
|
|
},
|
|
"EMPLOYEE.csv": {
|
|
"Empl_Num": "empl_num",
|
|
"Empl_Id": "initials", # Map employee ID to initials field
|
|
"Rate_Per_Hour": "rate_per_hour",
|
|
# Note: first_name, last_name, title, active, email, phone will need manual entry or separate import
|
|
# as they're not present in the legacy CSV structure
|
|
},
|
|
"QDROS.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Plan_Id": "plan_id",
|
|
"^1": "field1",
|
|
"^2": "field2",
|
|
"^Part": "part",
|
|
"^AltP": "altp",
|
|
"^Pet": "pet",
|
|
"^Res": "res",
|
|
"Case_Type": "case_type",
|
|
"Case_Code": "case_code",
|
|
"Section": "section",
|
|
"Case_Number": "case_number",
|
|
"Judgment_Date": "judgment_date",
|
|
"Valuation_Date": "valuation_date",
|
|
"Married_On": "married_on",
|
|
"Percent_Awarded": "percent_awarded",
|
|
"Ven_City": "ven_city",
|
|
"Ven_Cnty": "ven_cnty",
|
|
"Ven_St": "ven_st",
|
|
"Draft_Out": "draft_out",
|
|
"Draft_Apr": "draft_apr",
|
|
"Final_Out": "final_out",
|
|
"Judge": "judge",
|
|
"Form_Name": "form_name"
|
|
}
|
|
}
|
|
|
|
|
|
def parse_date(date_str: str) -> Optional[date]:
|
|
"""Parse date string in various formats"""
|
|
if not date_str or date_str.strip() == "":
|
|
return None
|
|
|
|
date_formats = [
|
|
"%Y-%m-%d",
|
|
"%m/%d/%Y",
|
|
"%d/%m/%Y",
|
|
"%m-%d-%Y",
|
|
"%d-%m-%Y",
|
|
"%Y/%m/%d"
|
|
]
|
|
|
|
for fmt in date_formats:
|
|
try:
|
|
return datetime.strptime(date_str.strip(), fmt).date()
|
|
except ValueError:
|
|
continue
|
|
|
|
return None
|
|
|
|
|
|
|
|
def make_json_safe(value: Any) -> Any:
|
|
"""Recursively convert values to JSON-serializable types.
|
|
|
|
- date/datetime -> ISO string
|
|
- Decimal -> float
|
|
- dict/list -> recurse
|
|
"""
|
|
if isinstance(value, (datetime, date)):
|
|
return value.isoformat()
|
|
if isinstance(value, Decimal):
|
|
try:
|
|
return float(value)
|
|
except Exception:
|
|
return str(value)
|
|
if isinstance(value, dict):
|
|
return {k: make_json_safe(v) for k, v in value.items()}
|
|
if isinstance(value, list):
|
|
return [make_json_safe(v) for v in value]
|
|
return value
|
|
|
|
|
|
def parse_csv_robust(csv_content: str) -> Tuple[List[Dict[str, str]], List[str]]:
|
|
"""Parse CSV text robustly by handling broken newlines in unquoted fields.
|
|
|
|
Returns tuple of (rows_as_dicts, headers)
|
|
"""
|
|
lines = (csv_content or "").strip().split('\n')
|
|
if not lines or (len(lines) == 1 and not lines[0].strip()):
|
|
return [], []
|
|
|
|
# Parse headers using the csv module to respect quoting
|
|
header_reader = csv.reader(io.StringIO(lines[0]))
|
|
headers = next(header_reader)
|
|
headers = [h.strip() for h in headers]
|
|
|
|
rows_data: List[Dict[str, str]] = []
|
|
for line_num, line in enumerate(lines[1:], start=2):
|
|
# Skip empty lines
|
|
if not line.strip():
|
|
continue
|
|
try:
|
|
# Parse each line independently; avoids multiline parse explosions
|
|
line_reader = csv.reader(io.StringIO(line))
|
|
fields = next(line_reader)
|
|
fields = [f.strip() for f in fields]
|
|
|
|
# If clearly malformed (too few fields), skip
|
|
if len(fields) < max(1, len(headers) // 2):
|
|
continue
|
|
|
|
# Pad or truncate to header length
|
|
while len(fields) < len(headers):
|
|
fields.append("")
|
|
fields = fields[:len(headers)]
|
|
|
|
row_dict = dict(zip(headers, fields))
|
|
rows_data.append(row_dict)
|
|
except Exception:
|
|
# Skip malformed row
|
|
continue
|
|
|
|
return rows_data, headers
|
|
|
|
|
|
def parse_csv_with_fallback(text: str) -> Tuple[List[Dict[str, str]], List[str]]:
|
|
"""Try csv.DictReader first; on failure, fall back to robust parser."""
|
|
try:
|
|
reader = csv.DictReader(io.StringIO(text), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
|
headers_local = reader.fieldnames or []
|
|
rows_local: List[Dict[str, str]] = []
|
|
for r in reader:
|
|
rows_local.append(r)
|
|
return rows_local, headers_local
|
|
except Exception:
|
|
return parse_csv_robust(text)
|
|
|
|
|
|
def _normalize_label(label: str) -> str:
|
|
"""Normalize a header/field label for fuzzy comparison."""
|
|
if not label:
|
|
return ""
|
|
# Lowercase, replace separators with space, remove non-alphanumerics, expand common short forms
|
|
lowered = label.strip().lower()
|
|
# Replace separators
|
|
lowered = re.sub(r"[\s\-]+", "_", lowered)
|
|
# Remove non-word characters except underscore
|
|
lowered = re.sub(r"[^a-z0-9_]", "", lowered)
|
|
# Expand a few common abbreviations
|
|
replacements = {
|
|
"num": "number",
|
|
"no": "number",
|
|
"amt": "amount",
|
|
"addr": "address",
|
|
"st": "state",
|
|
"dob": "dateofbirth",
|
|
"ss": "ssnumber",
|
|
}
|
|
tokens = [replacements.get(t, t) for t in lowered.split("_") if t]
|
|
return "".join(tokens)
|
|
|
|
|
|
def _get_model_columns(model_class) -> Tuple[Dict[str, Any], List[str]]:
|
|
"""Return model columns mapping name->Column and list of primary key column names."""
|
|
columns = {}
|
|
pk_names = []
|
|
for col in model_class.__table__.columns:
|
|
if col.name in {"created_at", "updated_at"}:
|
|
continue
|
|
columns[col.name] = col
|
|
if col.primary_key:
|
|
pk_names.append(col.name)
|
|
return columns, pk_names
|
|
|
|
|
|
def _build_dynamic_mapping(headers: List[str], model_class, file_type: str) -> Dict[str, Any]:
|
|
"""Create a mapping from CSV headers to model fields using synonyms and fuzzy similarity.
|
|
|
|
Returns a dict with keys: mapping (csv_header->db_field), suggestions, unmapped_headers, mapped_headers
|
|
"""
|
|
model_columns, _ = _get_model_columns(model_class)
|
|
model_field_names = list(model_columns.keys())
|
|
|
|
# Start with legacy mapping hints when available
|
|
legacy_map = FIELD_MAPPINGS.get(file_type, {}) or {}
|
|
|
|
mapping: Dict[str, Optional[str]] = {}
|
|
suggestions: Dict[str, List[Tuple[str, float]]] = {}
|
|
used_db_fields: set[str] = set()
|
|
|
|
# 1) Exact legacy header key usage
|
|
for header in headers:
|
|
if header in legacy_map and legacy_map[header] is not None:
|
|
candidate = legacy_map[header]
|
|
if candidate in model_field_names and candidate not in used_db_fields:
|
|
mapping[header] = candidate
|
|
used_db_fields.add(candidate)
|
|
|
|
# 2) Direct exact match against model fields (case-insensitive and normalized)
|
|
normalized_model = {name: _normalize_label(name) for name in model_field_names}
|
|
normalized_to_model = {v: k for k, v in normalized_model.items()}
|
|
|
|
for header in headers:
|
|
if header in mapping:
|
|
continue
|
|
normalized_header = _normalize_label(header)
|
|
if normalized_header in normalized_to_model:
|
|
candidate = normalized_to_model[normalized_header]
|
|
if candidate not in used_db_fields:
|
|
mapping[header] = candidate
|
|
used_db_fields.add(candidate)
|
|
|
|
# 3) Fuzzy best-match based on normalized strings
|
|
for header in headers:
|
|
if header in mapping:
|
|
continue
|
|
normalized_header = _normalize_label(header)
|
|
best_candidate = None
|
|
best_score = 0.0
|
|
candidate_list: List[Tuple[str, float]] = []
|
|
for model_field in model_field_names:
|
|
if model_field in used_db_fields:
|
|
continue
|
|
nm = normalized_model[model_field]
|
|
if not nm or not normalized_header:
|
|
score = 0.0
|
|
else:
|
|
# Combine ratio and partial containment heuristic
|
|
ratio = SequenceMatcher(None, normalized_header, nm).ratio()
|
|
containment = 1.0 if (normalized_header in nm or nm in normalized_header) else 0.0
|
|
score = max(ratio, 0.85 if containment else 0.0)
|
|
candidate_list.append((model_field, score))
|
|
if score > best_score:
|
|
best_score = score
|
|
best_candidate = model_field
|
|
# Keep top 3 suggestions for UI
|
|
suggestions[header] = sorted(candidate_list, key=lambda x: x[1], reverse=True)[:3]
|
|
# Apply only if score above threshold
|
|
if best_candidate and best_score >= 0.82:
|
|
mapping[header] = best_candidate
|
|
used_db_fields.add(best_candidate)
|
|
|
|
# 4) Any header explicitly mapped to None in legacy map is considered intentionally skipped
|
|
for header in headers:
|
|
if header not in mapping and header in legacy_map and legacy_map[header] is None:
|
|
mapping[header] = None
|
|
|
|
mapped_headers = {h: f for h, f in mapping.items() if f is not None}
|
|
unmapped_headers = [h for h in headers if h not in mapping or mapping[h] is None]
|
|
|
|
return {
|
|
"mapping": mapping,
|
|
"mapped_headers": mapped_headers,
|
|
"unmapped_headers": unmapped_headers,
|
|
"suggestions": suggestions,
|
|
}
|
|
|
|
|
|
def _validate_required_headers(file_type: str, mapped_headers: Dict[str, str]) -> Dict[str, Any]:
|
|
"""Check that minimal required model fields for a given CSV type are present in mapped headers.
|
|
|
|
Returns dict with: required_fields, missing_fields, ok.
|
|
"""
|
|
required_fields = REQUIRED_MODEL_FIELDS.get(file_type, [])
|
|
present_fields = set((mapped_headers or {}).values())
|
|
missing_fields = [f for f in required_fields if f not in present_fields]
|
|
return {
|
|
"required_fields": required_fields,
|
|
"missing_fields": missing_fields,
|
|
"ok": len(missing_fields) == 0,
|
|
}
|
|
|
|
|
|
def _get_required_fields(model_class) -> List[str]:
|
|
"""Infer required (non-nullable) fields for a model to avoid DB errors.
|
|
|
|
Excludes primary keys (which might be autoincrement or provided) and timestamp mixins.
|
|
"""
|
|
required = []
|
|
for col in model_class.__table__.columns:
|
|
if col.name in {"created_at", "updated_at"}:
|
|
continue
|
|
if col.primary_key:
|
|
# If PK is a string or composite, we cannot assume optional; handle separately
|
|
continue
|
|
try:
|
|
is_required = not getattr(col, "nullable", True)
|
|
except Exception:
|
|
is_required = False
|
|
if is_required:
|
|
required.append(col.name)
|
|
return required
|
|
|
|
|
|
def convert_value(value: str, field_name: str) -> Any:
|
|
"""Convert string value to appropriate type based on field name"""
|
|
if not value or value.strip() == "" or value.strip().lower() in ["null", "none", "n/a"]:
|
|
return None
|
|
|
|
value = value.strip()
|
|
|
|
# Date fields
|
|
if any(word in field_name.lower() for word in [
|
|
"date", "dob", "birth", "opened", "closed", "judgment", "valuation", "married", "vests_on", "service", "approved", "filed", "agreement"
|
|
]):
|
|
parsed_date = parse_date(value)
|
|
return parsed_date
|
|
|
|
# Boolean fields
|
|
if any(word in field_name.lower() for word in [
|
|
"active", "default_printer", "billed", "transferable", "send",
|
|
# PrinterSetup legacy toggles
|
|
"phone_book", "rolodex_info", "envelope", "file_cabinet", "accounts", "statements", "calendar"
|
|
]):
|
|
if value.lower() in ["true", "1", "yes", "y", "on", "active"]:
|
|
return True
|
|
elif value.lower() in ["false", "0", "no", "n", "off", "inactive"]:
|
|
return False
|
|
else:
|
|
return None
|
|
|
|
# Numeric fields (float)
|
|
if any(word in field_name.lower() for word in [
|
|
"rate", "hour", "bal", "fee", "amount", "owing", "transfer", "valu",
|
|
"accrued", "vested", "cola", "tax", "percent", "benefit_amount", "mortality",
|
|
"value"
|
|
]) or field_name.lower().startswith(("na_", "le_")):
|
|
try:
|
|
# Remove currency symbols and commas
|
|
cleaned_value = value.replace("$", "").replace(",", "").replace("%", "")
|
|
return float(cleaned_value)
|
|
except ValueError:
|
|
return 0.0
|
|
|
|
# Normalize debit_credit textual variants
|
|
if field_name.lower() == "debit_credit":
|
|
normalized = value.strip().upper()
|
|
if normalized in ["D", "DEBIT"]:
|
|
return "D"
|
|
if normalized in ["C", "CREDIT"]:
|
|
return "C"
|
|
return normalized[:1] if normalized else None
|
|
|
|
# Integer fields
|
|
if any(word in field_name.lower() for word in [
|
|
"item_no", "age", "start_age", "version", "line_number", "sort_order", "empl_num", "month", "number"
|
|
]):
|
|
try:
|
|
return int(float(value)) # Handle cases like "1.0"
|
|
except ValueError:
|
|
# For employee numbers, return None to skip the record rather than 0
|
|
if "empl_num" in field_name.lower():
|
|
return None
|
|
return 0
|
|
|
|
# String fields - limit length to prevent database errors
|
|
if len(value) > 500: # Reasonable limit for most string fields
|
|
return value[:500]
|
|
|
|
return value
|
|
|
|
|
|
def validate_foreign_keys(model_data: dict, model_class, db: Session) -> list[str]:
|
|
"""Validate foreign key relationships before inserting data"""
|
|
errors = []
|
|
|
|
# Check Phone -> Rolodex relationship
|
|
if model_class == Phone and "rolodex_id" in model_data:
|
|
rolodex_id = model_data["rolodex_id"]
|
|
if rolodex_id and not db.query(Rolodex).filter(Rolodex.id == rolodex_id).first():
|
|
errors.append(f"Rolodex ID '{rolodex_id}' not found")
|
|
|
|
# Check File -> Rolodex relationship
|
|
if model_class == File and "id" in model_data:
|
|
rolodex_id = model_data["id"]
|
|
if rolodex_id and not db.query(Rolodex).filter(Rolodex.id == rolodex_id).first():
|
|
errors.append(f"Owner Rolodex ID '{rolodex_id}' not found")
|
|
# Check File -> Footer relationship (default footer on file)
|
|
if model_class == File and "footer_code" in model_data:
|
|
footer = model_data.get("footer_code")
|
|
if footer:
|
|
exists = db.query(Footer).filter(Footer.footer_code == footer).first()
|
|
if not exists:
|
|
errors.append(f"Footer code '{footer}' not found for File")
|
|
|
|
# Check FileStatus -> Footer (default footer exists)
|
|
if model_class == FileStatus and "footer_code" in model_data:
|
|
footer = model_data.get("footer_code")
|
|
if footer:
|
|
exists = db.query(Footer).filter(Footer.footer_code == footer).first()
|
|
if not exists:
|
|
errors.append(f"Footer code '{footer}' not found for FileStatus")
|
|
|
|
# Check TransactionType -> Footer (default footer exists)
|
|
if model_class == TransactionType and "footer_code" in model_data:
|
|
footer = model_data.get("footer_code")
|
|
if footer:
|
|
exists = db.query(Footer).filter(Footer.footer_code == footer).first()
|
|
if not exists:
|
|
errors.append(f"Footer code '{footer}' not found for TransactionType")
|
|
|
|
# Check Ledger -> TransactionType/TransactionCode cross references
|
|
if model_class == Ledger:
|
|
# Validate t_type exists
|
|
if "t_type" in model_data:
|
|
t_type_value = model_data.get("t_type")
|
|
if t_type_value and not db.query(TransactionType).filter(TransactionType.t_type == t_type_value).first():
|
|
errors.append(f"Transaction type '{t_type_value}' not found")
|
|
# Validate t_code exists and matches t_type if both provided
|
|
if "t_code" in model_data:
|
|
t_code_value = model_data.get("t_code")
|
|
if t_code_value:
|
|
code_row = db.query(TransactionCode).filter(TransactionCode.t_code == t_code_value).first()
|
|
if not code_row:
|
|
errors.append(f"Transaction code '{t_code_value}' not found")
|
|
else:
|
|
ledger_t_type = model_data.get("t_type")
|
|
if ledger_t_type and getattr(code_row, "t_type", None) and code_row.t_type != ledger_t_type:
|
|
errors.append(
|
|
f"Transaction code '{t_code_value}' t_type '{code_row.t_type}' does not match ledger t_type '{ledger_t_type}'"
|
|
)
|
|
|
|
# Check Payment -> File and Rolodex relationships
|
|
if model_class == Payment:
|
|
if "file_no" in model_data:
|
|
file_no_value = model_data.get("file_no")
|
|
if file_no_value and not db.query(File).filter(File.file_no == file_no_value).first():
|
|
errors.append(f"File number '{file_no_value}' not found for Payment")
|
|
if "client_id" in model_data:
|
|
client_id_value = model_data.get("client_id")
|
|
if client_id_value and not db.query(Rolodex).filter(Rolodex.id == client_id_value).first():
|
|
errors.append(f"Client ID '{client_id_value}' not found for Payment")
|
|
|
|
# Check QDRO -> PlanInfo (plan_id exists)
|
|
if model_class == QDRO and "plan_id" in model_data:
|
|
plan_id = model_data.get("plan_id")
|
|
if plan_id:
|
|
exists = db.query(PlanInfo).filter(PlanInfo.plan_id == plan_id).first()
|
|
if not exists:
|
|
errors.append(f"Plan ID '{plan_id}' not found for QDRO")
|
|
|
|
# Add more foreign key validations as needed
|
|
return errors
|
|
|
|
|
|
@router.get("/available-files")
|
|
async def get_available_csv_files(current_user: User = Depends(get_current_user)):
|
|
"""Get list of available CSV files for import"""
|
|
return {
|
|
"available_files": list(CSV_MODEL_MAPPING.keys()),
|
|
"descriptions": {
|
|
"ROLODEX.csv": "Customer/contact information",
|
|
"ROLEX_V.csv": "Customer/contact information (alias)",
|
|
"PHONE.csv": "Phone numbers linked to customers",
|
|
"FILES.csv": "Client files and cases",
|
|
"FILES_R.csv": "Client files and cases (alias)",
|
|
"FILES_V.csv": "Client files and cases (alias)",
|
|
"LEDGER.csv": "Financial transactions per file",
|
|
"QDROS.csv": "Legal documents and court orders",
|
|
"PENSIONS.csv": "Pension calculation data",
|
|
"SCHEDULE.csv": "Vesting schedules for pensions",
|
|
"MARRIAGE.csv": "Marriage history data",
|
|
"DEATH.csv": "Death benefit calculations",
|
|
"SEPARATE.csv": "Separation agreements",
|
|
"EMPLOYEE.csv": "Staff and employee information",
|
|
"STATES.csv": "US States lookup table",
|
|
"FILETYPE.csv": "File type categories",
|
|
"FILESTAT.csv": "File status codes",
|
|
"FOOTERS.csv": "Document footers and signatures",
|
|
"DEPOSITS.csv": "Daily bank deposit summaries",
|
|
"FILENOTS.csv": "File notes and case memos",
|
|
"FVARLKUP.csv": "Form template variables",
|
|
"RVARLKUP.csv": "Report template variables",
|
|
"PAYMENTS.csv": "Individual payments within deposits",
|
|
"TRNSACTN.csv": "Transaction details (maps to Ledger)",
|
|
"INX_LKUP.csv": "Form keywords lookup",
|
|
"PLANINFO.csv": "Pension plan information",
|
|
"RESULTS.csv": "Pension computed results",
|
|
"LIFETABL.csv": "Life expectancy table by age, sex, and race (rich typed)",
|
|
"NUMBERAL.csv": "Monthly survivor counts by sex and race (rich typed)"
|
|
},
|
|
"auto_discovery": True
|
|
}
|
|
|
|
|
|
@router.get("/template/{file_type}")
|
|
async def download_csv_template(
|
|
file_type: str,
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Download a minimal CSV template with required headers and one sample row.
|
|
|
|
Supported templates include: {list(CSV_IMPORT_TEMPLATES.keys())}
|
|
"""
|
|
key = (file_type or "").strip()
|
|
if key not in CSV_IMPORT_TEMPLATES:
|
|
raise HTTPException(status_code=400, detail=f"Unsupported template type: {file_type}. Choose one of: {list(CSV_IMPORT_TEMPLATES.keys())}")
|
|
|
|
content = _generate_csv_template_bytes(key)
|
|
|
|
from datetime import datetime as _dt
|
|
ts = _dt.now().strftime("%Y%m%d_%H%M%S")
|
|
safe_name = key.replace(".csv", "")
|
|
filename = f"{safe_name}_template_{ts}.csv"
|
|
return StreamingResponse(
|
|
iter([content]),
|
|
media_type="text/csv",
|
|
headers={"Content-Disposition": f"attachment; filename=\"{filename}\""},
|
|
)
|
|
|
|
|
|
@router.get("/templates/bundle")
|
|
async def download_csv_templates_bundle(
|
|
files: Optional[List[str]] = Query(None, description="Repeat for each CSV template, e.g., files=FILES.csv&files=LEDGER.csv"),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Bundle selected CSV templates into a single ZIP.
|
|
|
|
Example: GET /api/import/templates/bundle?files=FILES.csv&files=LEDGER.csv
|
|
"""
|
|
requested = files or []
|
|
if not requested:
|
|
raise HTTPException(status_code=400, detail="Specify at least one 'files' query parameter")
|
|
|
|
# Normalize and validate
|
|
normalized: List[str] = []
|
|
for name in requested:
|
|
if not name:
|
|
continue
|
|
n = name.strip()
|
|
if not n.lower().endswith(".csv"):
|
|
n = f"{n}.csv"
|
|
n = n.upper()
|
|
if n in CSV_IMPORT_TEMPLATES:
|
|
normalized.append(n)
|
|
else:
|
|
# Ignore unknowns rather than fail the whole bundle
|
|
continue
|
|
|
|
# Deduplicate while preserving order
|
|
seen = set()
|
|
selected = []
|
|
for n in normalized:
|
|
if n not in seen:
|
|
seen.add(n)
|
|
selected.append(n)
|
|
|
|
if not selected:
|
|
raise HTTPException(status_code=400, detail=f"No supported templates requested. Supported: {list(CSV_IMPORT_TEMPLATES.keys())}")
|
|
|
|
zip_buffer = io.BytesIO()
|
|
with zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
for fname in selected:
|
|
try:
|
|
content = _generate_csv_template_bytes(fname)
|
|
# Friendly name in zip: <BASENAME>_template.csv
|
|
base = fname.replace(".CSV", "").upper()
|
|
arcname = f"{base}_template.csv"
|
|
zf.writestr(arcname, content)
|
|
except HTTPException:
|
|
# Skip unsupported just in case
|
|
continue
|
|
|
|
zip_buffer.seek(0)
|
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"csv_templates_{ts}.zip"
|
|
return StreamingResponse(
|
|
iter([zip_buffer.getvalue()]),
|
|
media_type="application/zip",
|
|
headers={
|
|
"Content-Disposition": f"attachment; filename=\"{filename}\""
|
|
},
|
|
)
|
|
|
|
|
|
@router.post("/upload/{file_type}")
|
|
async def import_csv_data(
|
|
file_type: str,
|
|
file: UploadFile = UploadFileForm(...),
|
|
replace_existing: bool = Form(False),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Import data from CSV file"""
|
|
|
|
# Validate file type
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Unsupported file type: {file_type}. Available types: {list(CSV_MODEL_MAPPING.keys())}"
|
|
)
|
|
|
|
# Validate file extension
|
|
if not file.filename.endswith('.csv'):
|
|
raise HTTPException(status_code=400, detail="File must be a CSV file")
|
|
|
|
model_class = CSV_MODEL_MAPPING[file_type]
|
|
# Legacy mapping hints used internally by auto-discovery; not used strictly
|
|
legacy_hint_map = FIELD_MAPPINGS.get(file_type, {})
|
|
|
|
try:
|
|
# Read CSV content
|
|
content = await file.read()
|
|
|
|
# Try multiple encodings for legacy CSV files
|
|
encodings = ENCODINGS
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
if csv_content is None:
|
|
raise HTTPException(status_code=400, detail="Could not decode CSV file. Please ensure it's saved in UTF-8, Windows-1252, or ISO-8859-1 encoding.")
|
|
|
|
# Note: preprocess_csv helper removed as unused; robust parsing handled below
|
|
|
|
# Custom robust parser for problematic legacy CSV files
|
|
class MockCSVReader:
|
|
def __init__(self, data, fieldnames):
|
|
self.data = data
|
|
self.fieldnames = fieldnames
|
|
self.index = 0
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
if self.index >= len(self.data):
|
|
raise StopIteration
|
|
row = self.data[self.index]
|
|
self.index += 1
|
|
return row
|
|
|
|
try:
|
|
lines = csv_content.strip().split('\n')
|
|
if not lines:
|
|
raise ValueError("Empty CSV file")
|
|
|
|
# Parse header using proper CSV parsing
|
|
header_reader = csv.reader(io.StringIO(lines[0]))
|
|
headers = next(header_reader)
|
|
headers = [h.strip() for h in headers]
|
|
# Debug logging removed in API path; rely on audit/logging if needed
|
|
# Build dynamic header mapping for this file/model
|
|
mapping_info = _build_dynamic_mapping(headers, model_class, file_type)
|
|
|
|
# Parse data rows with proper CSV parsing
|
|
rows_data = []
|
|
skipped_rows = 0
|
|
|
|
for line_num, line in enumerate(lines[1:], start=2):
|
|
# Skip empty lines
|
|
if not line.strip():
|
|
continue
|
|
|
|
try:
|
|
# Use proper CSV parsing to handle commas within quoted fields
|
|
line_reader = csv.reader(io.StringIO(line))
|
|
fields = next(line_reader)
|
|
fields = [f.strip() for f in fields]
|
|
|
|
# Skip rows that are clearly malformed (too few fields)
|
|
if len(fields) < len(headers) // 2: # Less than half the expected fields
|
|
skipped_rows += 1
|
|
continue
|
|
|
|
# Pad or truncate to match header length
|
|
while len(fields) < len(headers):
|
|
fields.append('')
|
|
fields = fields[:len(headers)]
|
|
|
|
row_dict = dict(zip(headers, fields))
|
|
rows_data.append(row_dict)
|
|
|
|
except Exception as row_error:
|
|
import_logger.log_import_error(line_num, str(row_error), dict(zip(headers, fields)) if len(fields) <= len(headers) else None)
|
|
skipped_rows += 1
|
|
continue
|
|
|
|
csv_reader = MockCSVReader(rows_data, headers)
|
|
# Parsing summary suppressed to avoid noisy stdout in API
|
|
|
|
except Exception as e:
|
|
# Keep error minimal for client; internal logging can capture 'e'
|
|
raise HTTPException(status_code=400, detail=f"Could not parse CSV file. The file appears to have serious formatting issues. Error: {str(e)}")
|
|
|
|
imported_count = 0
|
|
created_count = 0
|
|
updated_count = 0
|
|
errors = []
|
|
flexible_saved = 0
|
|
mapped_headers = mapping_info.get("mapped_headers", {})
|
|
unmapped_headers = mapping_info.get("unmapped_headers", [])
|
|
# Special handling: assign line numbers per form for FORM_LST.csv
|
|
form_lst_line_counters: Dict[str, int] = {}
|
|
|
|
# If replace_existing is True, delete all existing records and related flexible extras
|
|
if replace_existing:
|
|
db.query(model_class).delete()
|
|
db.query(FlexibleImport).filter(
|
|
FlexibleImport.file_type == file_type,
|
|
FlexibleImport.target_table == model_class.__tablename__,
|
|
).delete()
|
|
db.commit()
|
|
|
|
for row_num, row in enumerate(csv_reader, start=2): # Start at 2 for header row
|
|
try:
|
|
# Convert CSV row to model data
|
|
model_data: Dict[str, Any] = {}
|
|
# Apply discovered mapping
|
|
for csv_field, db_field in mapped_headers.items():
|
|
if csv_field in row and db_field is not None:
|
|
converted_value = convert_value(row[csv_field], db_field)
|
|
if converted_value is not None:
|
|
model_data[db_field] = converted_value
|
|
|
|
# Inject sequential line_number for FORM_LST rows grouped by form_id
|
|
if file_type == "FORM_LST.csv":
|
|
form_id_value = model_data.get("form_id")
|
|
if form_id_value:
|
|
current = form_lst_line_counters.get(str(form_id_value), 0) + 1
|
|
form_lst_line_counters[str(form_id_value)] = current
|
|
# Only set if not provided
|
|
if "line_number" not in model_data:
|
|
model_data["line_number"] = current
|
|
|
|
# Skip empty rows
|
|
if not any(model_data.values()):
|
|
continue
|
|
|
|
# Fallback: if required non-nullable fields are missing, store row as flexible only
|
|
required_fields = _get_required_fields(model_class)
|
|
missing_required = [f for f in required_fields if model_data.get(f) in (None, "")]
|
|
if missing_required:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data={
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"missing_required": missing_required,
|
|
},
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
# Do not attempt to insert into strict model; continue to next row
|
|
continue
|
|
|
|
# Special validation for models with required fields
|
|
if model_class == Phone:
|
|
if 'phone' not in model_data or not model_data['phone']:
|
|
continue # Skip phone records without a phone number
|
|
|
|
if model_class == Rolodex:
|
|
if 'last' not in model_data or not model_data['last']:
|
|
continue # Skip rolodex records without a last name/company name
|
|
|
|
if model_class == Ledger:
|
|
# Skip ledger records without required fields
|
|
if 'empl_num' not in model_data or not model_data['empl_num']:
|
|
continue # Skip ledger records without employee number
|
|
if 'file_no' not in model_data or not model_data['file_no']:
|
|
continue # Skip ledger records without file number
|
|
|
|
# Create or update model instance
|
|
instance = None
|
|
# Upsert behavior for printers
|
|
if model_class == PrinterSetup:
|
|
# Determine primary key field name
|
|
_, pk_names = _get_model_columns(model_class)
|
|
pk_field_name_local = pk_names[0] if len(pk_names) == 1 else None
|
|
pk_value_local = model_data.get(pk_field_name_local) if pk_field_name_local else None
|
|
if pk_field_name_local and pk_value_local:
|
|
existing = db.query(model_class).filter(getattr(model_class, pk_field_name_local) == pk_value_local).first()
|
|
if existing:
|
|
# Update mutable fields
|
|
for k, v in model_data.items():
|
|
if k != pk_field_name_local:
|
|
setattr(existing, k, v)
|
|
instance = existing
|
|
updated_count += 1
|
|
else:
|
|
instance = model_class(**model_data)
|
|
db.add(instance)
|
|
created_count += 1
|
|
else:
|
|
# Fallback to insert if PK missing
|
|
instance = model_class(**model_data)
|
|
db.add(instance)
|
|
created_count += 1
|
|
db.flush()
|
|
# Enforce single default
|
|
try:
|
|
if bool(model_data.get("default_printer")):
|
|
db.query(model_class).filter(getattr(model_class, pk_field_name_local) != getattr(instance, pk_field_name_local)).update({model_class.default_printer: False})
|
|
except Exception:
|
|
pass
|
|
else:
|
|
# FK validation for known relationships
|
|
fk_errors = validate_foreign_keys(model_data, model_class, db)
|
|
if fk_errors:
|
|
for msg in fk_errors:
|
|
errors.append({"row": row_num, "error": msg})
|
|
# Persist as flexible for traceability
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data={
|
|
"mapped": model_data,
|
|
"fk_errors": fk_errors,
|
|
},
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
continue
|
|
instance = model_class(**model_data)
|
|
db.add(instance)
|
|
db.flush() # Ensure PK is available
|
|
|
|
# Capture PK details for flexible storage linkage (single-column PKs only)
|
|
_, pk_names = _get_model_columns(model_class)
|
|
pk_field_name = pk_names[0] if len(pk_names) == 1 else None
|
|
pk_value = None
|
|
if pk_field_name:
|
|
try:
|
|
pk_value = getattr(instance, pk_field_name)
|
|
except Exception:
|
|
pk_value = None
|
|
|
|
# Save unmapped fields into flexible storage (privacy-first, per-row JSON)
|
|
extra_data = {}
|
|
for csv_field in unmapped_headers:
|
|
if csv_field in row and row[csv_field] not in (None, ""):
|
|
extra_data[csv_field] = row[csv_field]
|
|
if extra_data:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=pk_field_name,
|
|
primary_key_value=str(pk_value) if pk_value is not None else None,
|
|
extra_data=extra_data,
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
imported_count += 1
|
|
|
|
# Commit every 100 records to avoid memory issues
|
|
if imported_count % 100 == 0:
|
|
db.commit()
|
|
|
|
except Exception as e:
|
|
# Rollback the transaction for this record
|
|
db.rollback()
|
|
# As a robustness measure, persist row in flexible storage instead of counting as error
|
|
try:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data={
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"error": str(e),
|
|
},
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
except Exception as flex_e:
|
|
errors.append({
|
|
"row": row_num,
|
|
"error": f"{str(e)} | Flexible save failed: {str(flex_e)}",
|
|
"data": row,
|
|
})
|
|
continue
|
|
|
|
# Final commit
|
|
db.commit()
|
|
|
|
result = {
|
|
"file_type": file_type,
|
|
"imported_count": imported_count,
|
|
"errors": errors[:10], # Limit errors to first 10
|
|
"total_errors": len(errors),
|
|
"auto_mapping": {
|
|
"mapped_headers": mapped_headers,
|
|
"unmapped_headers": unmapped_headers,
|
|
"flexible_saved_rows": flexible_saved,
|
|
},
|
|
"validation": {
|
|
"fk_errors": len([e for e in errors if isinstance(e, dict) and 'error' in e and 'not found' in str(e['error']).lower()])
|
|
}
|
|
}
|
|
# Include create/update breakdown for printers
|
|
if file_type == "PRINTERS.csv":
|
|
result["created_count"] = created_count
|
|
result["updated_count"] = updated_count
|
|
|
|
if errors:
|
|
result["warning"] = f"Import completed with {len(errors)} errors"
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
# Suppress stdout debug prints in API layer
|
|
db.rollback()
|
|
raise HTTPException(status_code=500, detail=f"Import failed: {str(e)}")
|
|
|
|
|
|
@router.get("/status")
|
|
async def get_import_status(db: Session = Depends(get_db), current_user: User = Depends(get_current_user)):
|
|
"""Get current import status and record counts"""
|
|
|
|
status = {}
|
|
|
|
for file_type, model_class in CSV_MODEL_MAPPING.items():
|
|
try:
|
|
count = db.query(model_class).count()
|
|
status[file_type] = {
|
|
"table_name": model_class.__tablename__,
|
|
"record_count": count
|
|
}
|
|
except Exception as e:
|
|
status[file_type] = {
|
|
"table_name": model_class.__tablename__,
|
|
"record_count": 0,
|
|
"error": str(e)
|
|
}
|
|
|
|
return status
|
|
|
|
|
|
@router.delete("/clear/{file_type}")
|
|
async def clear_table_data(
|
|
file_type: str,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Clear all data from a specific table"""
|
|
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
raise HTTPException(status_code=400, detail=f"Unknown file type: {file_type}")
|
|
|
|
model_class = CSV_MODEL_MAPPING[file_type]
|
|
|
|
try:
|
|
deleted_count = db.query(model_class).count()
|
|
db.query(model_class).delete()
|
|
# Also clear any flexible rows linked to this target table and file type
|
|
db.query(FlexibleImport).filter(
|
|
FlexibleImport.file_type == file_type,
|
|
FlexibleImport.target_table == model_class.__tablename__,
|
|
).delete()
|
|
db.commit()
|
|
|
|
return {
|
|
"file_type": file_type,
|
|
"table_name": model_class.__tablename__,
|
|
"deleted_count": deleted_count
|
|
}
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
raise HTTPException(status_code=500, detail=f"Clear operation failed: {str(e)}")
|
|
|
|
|
|
@router.post("/validate/{file_type}")
|
|
async def validate_csv_file(
|
|
file_type: str,
|
|
file: UploadFile = UploadFileForm(...),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Validate CSV file structure without importing"""
|
|
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
raise HTTPException(status_code=400, detail=f"Unsupported file type: {file_type}")
|
|
|
|
if not file.filename.endswith('.csv'):
|
|
raise HTTPException(status_code=400, detail="File must be a CSV file")
|
|
|
|
# Use auto-discovery mapping for validation
|
|
|
|
try:
|
|
content = await file.read()
|
|
|
|
# Try multiple encodings for legacy CSV files
|
|
encodings = ENCODINGS
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
if csv_content is None:
|
|
raise HTTPException(status_code=400, detail="Could not decode CSV file. Please ensure it's saved in UTF-8, Windows-1252, or ISO-8859-1 encoding.")
|
|
|
|
rows_list, csv_headers = parse_csv_with_fallback(csv_content)
|
|
model_class = CSV_MODEL_MAPPING[file_type]
|
|
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
|
|
mapped_headers = mapping_info["mapped_headers"]
|
|
unmapped_headers = mapping_info["unmapped_headers"]
|
|
|
|
# Sample data validation
|
|
sample_rows = []
|
|
errors = []
|
|
|
|
for row_num, row in enumerate(rows_list, start=2):
|
|
if row_num > 12: # Only check first 10 data rows
|
|
break
|
|
|
|
sample_rows.append(row)
|
|
|
|
# Check for data type issues on mapped fields
|
|
for csv_field, db_field in mapped_headers.items():
|
|
if csv_field in row and row[csv_field]:
|
|
try:
|
|
convert_value(row[csv_field], db_field)
|
|
except Exception as e:
|
|
errors.append({
|
|
"row": row_num,
|
|
"field": csv_field,
|
|
"value": row[csv_field],
|
|
"error": str(e)
|
|
})
|
|
|
|
return {
|
|
"file_type": file_type,
|
|
# Consider valid if we can map at least one column; we don't require exact header match
|
|
"valid": len(mapped_headers) > 0 and len(errors) == 0,
|
|
"headers": {
|
|
"found": csv_headers,
|
|
"mapped": mapped_headers,
|
|
"unmapped": unmapped_headers,
|
|
},
|
|
"sample_data": sample_rows,
|
|
"validation_errors": errors[:5], # First 5 errors only
|
|
"total_errors": len(errors),
|
|
"auto_mapping": {
|
|
"suggestions": mapping_info["suggestions"],
|
|
},
|
|
}
|
|
|
|
except Exception as e:
|
|
# Suppress stdout debug prints in API layer
|
|
raise HTTPException(status_code=500, detail=f"Validation failed: {str(e)}")
|
|
|
|
|
|
@router.get("/progress/{import_id}")
|
|
async def get_import_progress(
|
|
import_id: str,
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Get import progress status (placeholder for future implementation)"""
|
|
# This would be used for long-running imports with background tasks
|
|
return {
|
|
"import_id": import_id,
|
|
"status": "not_implemented",
|
|
"message": "Real-time progress tracking not yet implemented"
|
|
}
|
|
|
|
|
|
@router.get("/current-batch")
|
|
async def get_current_batch(
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Return the most recent running batch import for the current user, if any."""
|
|
try:
|
|
row = (
|
|
db.query(ImportAudit)
|
|
.filter(ImportAudit.status == "running")
|
|
.filter(ImportAudit.initiated_by_user_id == getattr(current_user, "id", None))
|
|
.order_by(ImportAudit.started_at.desc())
|
|
.first()
|
|
)
|
|
if not row:
|
|
return {"running": False}
|
|
return {
|
|
"running": True,
|
|
"audit_id": row.id,
|
|
"started_at": row.started_at.isoformat() if row.started_at else None,
|
|
"total_files": row.total_files,
|
|
"message": row.message,
|
|
}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Failed to get current batch: {str(e)}")
|
|
|
|
|
|
@router.get("/batch-progress/{audit_id}")
|
|
async def get_batch_progress(
|
|
audit_id: int,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Return real-time progress for a batch import using audit tables as the source of truth."""
|
|
audit = db.query(ImportAudit).filter(ImportAudit.id == audit_id).first()
|
|
if not audit:
|
|
raise HTTPException(status_code=404, detail="Batch not found")
|
|
|
|
# Authorization: allow only the initiating user or admins to view progress
|
|
try:
|
|
from app.utils.enhanced_auth import is_admin_user
|
|
is_admin = is_admin_user(current_user)
|
|
except Exception:
|
|
is_admin = False
|
|
if not is_admin and getattr(current_user, "id", None) != getattr(audit, "initiated_by_user_id", None):
|
|
raise HTTPException(status_code=403, detail="Not authorized to view this batch progress")
|
|
|
|
# Aggregate per-file results to compute progress
|
|
processed_files = db.query(ImportAuditFile).filter(ImportAuditFile.audit_id == audit.id).count()
|
|
successful_files = db.query(ImportAuditFile).filter(
|
|
ImportAuditFile.audit_id == audit.id,
|
|
ImportAuditFile.status.in_(["success", "completed_with_errors", "skipped"])
|
|
).count()
|
|
failed_files = db.query(ImportAuditFile).filter(
|
|
ImportAuditFile.audit_id == audit.id,
|
|
ImportAuditFile.status == "failed"
|
|
).count()
|
|
|
|
total_files = audit.total_files or 0
|
|
percent_complete: float = 0.0
|
|
if total_files > 0:
|
|
try:
|
|
percent_complete = round((processed_files / total_files) * 100, 1)
|
|
except Exception:
|
|
percent_complete = 0.0
|
|
|
|
data = {
|
|
"audit_id": audit.id,
|
|
"status": audit.status,
|
|
"total_files": total_files,
|
|
"processed_files": processed_files,
|
|
"successful_files": successful_files,
|
|
"failed_files": failed_files,
|
|
"started_at": audit.started_at.isoformat() if audit.started_at else None,
|
|
"finished_at": audit.finished_at.isoformat() if audit.finished_at else None,
|
|
"percent": percent_complete,
|
|
"message": audit.message,
|
|
}
|
|
|
|
# Include a brief summary of last processed file if desired (best-effort)
|
|
try:
|
|
last_file = (
|
|
db.query(ImportAuditFile)
|
|
.filter(ImportAuditFile.audit_id == audit.id)
|
|
.order_by(ImportAuditFile.id.desc())
|
|
.first()
|
|
)
|
|
if last_file:
|
|
data["last_file"] = {
|
|
"file_type": last_file.file_type,
|
|
"status": last_file.status,
|
|
"imported_count": last_file.imported_count,
|
|
"errors": last_file.errors,
|
|
"message": last_file.message,
|
|
"created_at": last_file.created_at.isoformat() if last_file.created_at else None,
|
|
}
|
|
except Exception:
|
|
pass
|
|
|
|
return data
|
|
|
|
|
|
@router.post("/batch-validate")
|
|
async def batch_validate_csv_files(
|
|
files: List[UploadFile] = UploadFileForm(...),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Validate multiple CSV files without importing"""
|
|
|
|
if len(files) > 25:
|
|
raise HTTPException(status_code=400, detail="Maximum 25 files allowed per batch")
|
|
|
|
validation_results = []
|
|
|
|
for file in files:
|
|
file_type = file.filename
|
|
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
validation_results.append({
|
|
"file_type": file_type,
|
|
"valid": False,
|
|
"error": f"Unsupported file type: {file_type}"
|
|
})
|
|
continue
|
|
|
|
if not file.filename.endswith('.csv'):
|
|
validation_results.append({
|
|
"file_type": file_type,
|
|
"valid": False,
|
|
"error": "File must be a CSV file"
|
|
})
|
|
continue
|
|
|
|
model_class = CSV_MODEL_MAPPING.get(file_type)
|
|
|
|
try:
|
|
content = await file.read()
|
|
|
|
# Try multiple encodings for legacy CSV files (include BOM-friendly utf-8-sig)
|
|
encodings = ENCODINGS
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
if csv_content is None:
|
|
validation_results.append({
|
|
"file_type": file_type,
|
|
"valid": False,
|
|
"error": "Could not decode CSV file encoding"
|
|
})
|
|
continue
|
|
|
|
# Handle CSV parsing issues with legacy files
|
|
rows_list, csv_headers = parse_csv_with_fallback(csv_content)
|
|
|
|
# Check headers and build dynamic mapping
|
|
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
|
|
mapped_headers = mapping_info["mapped_headers"]
|
|
unmapped_headers = mapping_info["unmapped_headers"]
|
|
header_validation = _validate_required_headers(file_type, mapped_headers)
|
|
header_validation = _validate_required_headers(file_type, mapped_headers)
|
|
header_validation = _validate_required_headers(file_type, mapped_headers)
|
|
header_validation = _validate_required_headers(file_type, mapped_headers)
|
|
|
|
# Sample data validation
|
|
sample_rows = []
|
|
errors = []
|
|
|
|
for row_num, row in enumerate(rows_list, start=2):
|
|
if row_num > 12: # Only check first 10 data rows
|
|
break
|
|
|
|
sample_rows.append(row)
|
|
|
|
# Check for data type issues
|
|
for csv_field, db_field in mapped_headers.items():
|
|
if csv_field in row and row[csv_field]:
|
|
try:
|
|
convert_value(row[csv_field], db_field)
|
|
except Exception as e:
|
|
errors.append({
|
|
"row": row_num,
|
|
"field": csv_field,
|
|
"value": row[csv_field],
|
|
"error": str(e)
|
|
})
|
|
|
|
validation_results.append({
|
|
"file_type": file_type,
|
|
"valid": (len(mapped_headers) > 0 and len(errors) == 0 and header_validation.get("ok", True)),
|
|
"headers": {
|
|
"found": csv_headers,
|
|
"mapped": mapped_headers,
|
|
"unmapped": unmapped_headers
|
|
},
|
|
"header_validation": header_validation,
|
|
"sample_data": sample_rows[:5], # Limit sample data for batch operation
|
|
"validation_errors": errors[:5], # First 5 errors only
|
|
"total_errors": len(errors),
|
|
"auto_mapping": {
|
|
"suggestions": mapping_info["suggestions"],
|
|
},
|
|
})
|
|
|
|
# Reset file pointer for potential future use
|
|
await file.seek(0)
|
|
|
|
except Exception as e:
|
|
validation_results.append({
|
|
"file_type": file_type,
|
|
"valid": False,
|
|
"error": f"Validation failed: {str(e)}"
|
|
})
|
|
|
|
# Summary statistics
|
|
total_files = len(validation_results)
|
|
valid_files = len([r for r in validation_results if r["valid"]])
|
|
invalid_files = total_files - valid_files
|
|
|
|
return {
|
|
"batch_validation_results": validation_results,
|
|
"summary": {
|
|
"total_files": total_files,
|
|
"valid_files": valid_files,
|
|
"invalid_files": invalid_files,
|
|
"all_valid": invalid_files == 0
|
|
}
|
|
}
|
|
|
|
|
|
@router.post("/batch-upload")
|
|
async def batch_import_csv_files(
|
|
files: List[UploadFile] = UploadFileForm(...),
|
|
replace_existing: bool = Form(False),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Import multiple CSV files in optimal order"""
|
|
|
|
if len(files) > 25:
|
|
raise HTTPException(status_code=400, detail="Maximum 25 files allowed per batch")
|
|
|
|
# Define optimal import order based on dependencies
|
|
import_order = IMPORT_ORDER
|
|
|
|
# Sort uploaded files by optimal import order
|
|
file_map = {f.filename: f for f in files}
|
|
ordered_files = []
|
|
|
|
for file_type in import_order:
|
|
if file_type in file_map:
|
|
ordered_files.append((file_type, file_map[file_type]))
|
|
del file_map[file_type]
|
|
|
|
# Add any remaining files not in the predefined order
|
|
for filename, file in file_map.items():
|
|
ordered_files.append((filename, file))
|
|
|
|
results = []
|
|
total_imported = 0
|
|
total_errors = 0
|
|
|
|
# Create import audit row (running)
|
|
audit_row = ImportAudit(
|
|
status="running",
|
|
total_files=len(files),
|
|
successful_files=0,
|
|
failed_files=0,
|
|
total_imported=0,
|
|
total_errors=0,
|
|
initiated_by_user_id=getattr(current_user, "id", None),
|
|
initiated_by_username=getattr(current_user, "username", None),
|
|
message="Batch import started",
|
|
)
|
|
db.add(audit_row)
|
|
db.commit()
|
|
db.refresh(audit_row)
|
|
# Broadcast initial snapshot
|
|
try:
|
|
await _broadcast_import_progress(db, audit_row.id)
|
|
except Exception:
|
|
pass
|
|
|
|
# Directory to persist uploaded files for this audit (for reruns)
|
|
audit_dir = Path(settings.upload_dir).joinpath("import_audits", str(audit_row.id))
|
|
try:
|
|
audit_dir.mkdir(parents=True, exist_ok=True)
|
|
except Exception:
|
|
pass
|
|
|
|
for file_type, file in ordered_files:
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
# Fallback flexible-only import for unknown file structures
|
|
try:
|
|
# Use async file operations for better performance
|
|
from app.services.async_file_operations import async_file_ops
|
|
|
|
# Stream save to disk for potential reruns and processing
|
|
saved_path = None
|
|
try:
|
|
relative_path = f"import_audits/{audit_row.id}/{file_type}"
|
|
saved_file_path, file_size, checksum = await async_file_ops.stream_upload_file(
|
|
file, relative_path
|
|
)
|
|
saved_path = str(async_file_ops.base_upload_dir / relative_path)
|
|
|
|
# Stream read for processing
|
|
content = b""
|
|
async for chunk in async_file_ops.stream_read_file(relative_path):
|
|
content += chunk
|
|
|
|
except Exception as e:
|
|
# Fallback to traditional method
|
|
await file.seek(0)
|
|
content = await file.read()
|
|
try:
|
|
file_path = audit_dir.joinpath(file_type)
|
|
with open(file_path, "wb") as fh:
|
|
fh.write(content)
|
|
saved_path = str(file_path)
|
|
except Exception:
|
|
saved_path = None
|
|
encodings = ENCODINGS
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
if csv_content is None:
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "failed",
|
|
"message": "Could not decode CSV file encoding"
|
|
})
|
|
continue
|
|
rows_list, headers = parse_csv_with_fallback(csv_content)
|
|
flexible_count = 0
|
|
for row in rows_list:
|
|
# Save entire row as flexible JSON
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=None,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=make_json_safe({k: v for k, v in (row or {}).items() if v not in (None, "")}),
|
|
)
|
|
)
|
|
flexible_count += 1
|
|
if flexible_count % 200 == 0:
|
|
db.commit()
|
|
db.commit()
|
|
total_imported += flexible_count
|
|
# Persist per-file result row
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "success",
|
|
"imported_count": flexible_count,
|
|
"errors": 0,
|
|
"message": f"Stored {flexible_count} rows as flexible data (no known model)",
|
|
"auto_mapping": {
|
|
"mapped_headers": {},
|
|
"unmapped_headers": list(headers),
|
|
"flexible_saved_rows": flexible_count,
|
|
},
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=audit_row.id,
|
|
file_type=file_type,
|
|
status="success",
|
|
imported_count=flexible_count,
|
|
errors=0,
|
|
message=f"Stored {flexible_count} rows as flexible data",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
try:
|
|
await _broadcast_import_progress(db, audit_row.id)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
except Exception as e:
|
|
db.rollback()
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "failed",
|
|
"message": f"Flexible import failed: {str(e)}"
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=audit_row.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message=f"Flexible import failed: {str(e)}",
|
|
details={}
|
|
))
|
|
db.commit()
|
|
try:
|
|
await _broadcast_import_progress(db, audit_row.id)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
|
|
try:
|
|
# Reset file pointer
|
|
await file.seek(0)
|
|
|
|
# Import this file using auto-discovery mapping
|
|
model_class = CSV_MODEL_MAPPING[file_type]
|
|
|
|
content = await file.read()
|
|
# Save original upload to disk for potential reruns
|
|
saved_path = None
|
|
try:
|
|
file_path = audit_dir.joinpath(file_type)
|
|
with open(file_path, "wb") as fh:
|
|
fh.write(content)
|
|
saved_path = str(file_path)
|
|
except Exception:
|
|
saved_path = None
|
|
|
|
# Try multiple encodings for legacy CSV files
|
|
encodings = ENCODINGS
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
if csv_content is None:
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "failed",
|
|
"message": "Could not decode CSV file encoding"
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=audit_row.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message="Could not decode CSV file encoding",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
try:
|
|
await _broadcast_import_progress(db, audit_row.id)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
|
|
# Handle CSV parsing issues with legacy files
|
|
rows_list, csv_headers = parse_csv_with_fallback(csv_content)
|
|
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
|
|
mapped_headers = mapping_info["mapped_headers"]
|
|
unmapped_headers = mapping_info["unmapped_headers"]
|
|
header_validation = _validate_required_headers(file_type, mapped_headers)
|
|
|
|
imported_count = 0
|
|
errors = []
|
|
flexible_saved = 0
|
|
fk_error_summary: Dict[str, int] = {}
|
|
# Special handling: assign line numbers per form for FORM_LST.csv
|
|
form_lst_line_counters: Dict[str, int] = {}
|
|
|
|
# If replace_existing is True and this is the first file of this type
|
|
if replace_existing:
|
|
db.query(model_class).delete()
|
|
db.query(FlexibleImport).filter(
|
|
FlexibleImport.file_type == file_type,
|
|
FlexibleImport.target_table == model_class.__tablename__,
|
|
).delete()
|
|
db.commit()
|
|
|
|
for row_num, row in enumerate(rows_list, start=2):
|
|
try:
|
|
model_data = {}
|
|
for csv_field, db_field in mapped_headers.items():
|
|
if csv_field in row and db_field is not None:
|
|
converted_value = convert_value(row[csv_field], db_field)
|
|
if converted_value is not None:
|
|
model_data[db_field] = converted_value
|
|
|
|
# Inject sequential line_number for FORM_LST rows grouped by form_id
|
|
if file_type == "FORM_LST.csv":
|
|
form_id_value = model_data.get("form_id")
|
|
if form_id_value:
|
|
current = form_lst_line_counters.get(str(form_id_value), 0) + 1
|
|
form_lst_line_counters[str(form_id_value)] = current
|
|
if "line_number" not in model_data:
|
|
model_data["line_number"] = current
|
|
|
|
if not any(model_data.values()):
|
|
continue
|
|
|
|
# Fallback: if required non-nullable fields are missing, store row as flexible only
|
|
required_fields = _get_required_fields(model_class)
|
|
missing_required = [f for f in required_fields if model_data.get(f) in (None, "")]
|
|
if missing_required:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=make_json_safe({
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"missing_required": missing_required,
|
|
}),
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
continue
|
|
|
|
# Special validation for models with required fields
|
|
if model_class == Phone:
|
|
if 'phone' not in model_data or not model_data['phone']:
|
|
continue # Skip phone records without a phone number
|
|
|
|
if model_class == Rolodex:
|
|
if 'last' not in model_data or not model_data['last']:
|
|
continue # Skip rolodex records without a last name/company name
|
|
|
|
if model_class == Ledger:
|
|
# Skip ledger records without required fields
|
|
if 'empl_num' not in model_data or not model_data['empl_num']:
|
|
continue # Skip ledger records without employee number
|
|
if 'file_no' not in model_data or not model_data['file_no']:
|
|
continue # Skip ledger records without file number
|
|
|
|
# FK validation for known relationships
|
|
fk_errors = validate_foreign_keys(model_data, model_class, db)
|
|
if fk_errors:
|
|
for msg in fk_errors:
|
|
errors.append({"row": row_num, "error": msg})
|
|
fk_error_summary[msg] = fk_error_summary.get(msg, 0) + 1
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=make_json_safe({
|
|
"mapped": model_data,
|
|
"fk_errors": fk_errors,
|
|
}),
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
continue
|
|
instance = model_class(**model_data)
|
|
db.add(instance)
|
|
db.flush()
|
|
|
|
# Link flexible extras
|
|
_, pk_names = _get_model_columns(model_class)
|
|
pk_field_name = pk_names[0] if len(pk_names) == 1 else None
|
|
pk_value = None
|
|
if pk_field_name:
|
|
try:
|
|
pk_value = getattr(instance, pk_field_name)
|
|
except Exception:
|
|
pk_value = None
|
|
extra_data = {}
|
|
for csv_field in unmapped_headers:
|
|
if csv_field in row and row[csv_field] not in (None, ""):
|
|
extra_data[csv_field] = row[csv_field]
|
|
if extra_data:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=pk_field_name,
|
|
primary_key_value=str(pk_value) if pk_value is not None else None,
|
|
extra_data=make_json_safe(extra_data),
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
imported_count += 1
|
|
|
|
if imported_count % 100 == 0:
|
|
db.commit()
|
|
|
|
except Exception as e:
|
|
# Rollback the transaction for this record
|
|
db.rollback()
|
|
# Persist row in flexible storage instead of counting as error only
|
|
try:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=make_json_safe({
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"error": str(e),
|
|
}),
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
except Exception as flex_e:
|
|
errors.append({
|
|
"row": row_num,
|
|
"error": f"{str(e)} | Flexible save failed: {str(flex_e)}",
|
|
})
|
|
continue
|
|
|
|
db.commit()
|
|
|
|
total_imported += imported_count
|
|
total_errors += len(errors)
|
|
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "success" if (len(errors) == 0 and header_validation.get("ok", True)) else "completed_with_errors",
|
|
"imported_count": imported_count,
|
|
"errors": len(errors),
|
|
"message": f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
|
|
"header_validation": header_validation,
|
|
"validation": {
|
|
"fk_errors_total": sum(fk_error_summary.values()),
|
|
"fk_error_summary": fk_error_summary,
|
|
},
|
|
"auto_mapping": {
|
|
"mapped_headers": mapped_headers,
|
|
"unmapped_headers": unmapped_headers,
|
|
"flexible_saved_rows": flexible_saved,
|
|
},
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=audit_row.id,
|
|
file_type=file_type,
|
|
status="success" if (len(errors) == 0 and header_validation.get("ok", True)) else "completed_with_errors",
|
|
imported_count=imported_count,
|
|
errors=len(errors),
|
|
message=f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
|
|
details={
|
|
"mapped_headers": list(mapped_headers.keys()),
|
|
"unmapped_count": len(unmapped_headers),
|
|
"flexible_saved_rows": flexible_saved,
|
|
"fk_errors_total": sum(fk_error_summary.values()),
|
|
"fk_error_summary": fk_error_summary,
|
|
"header_validation": header_validation,
|
|
**({"saved_path": saved_path} if saved_path else {}),
|
|
}
|
|
))
|
|
db.commit()
|
|
try:
|
|
await _broadcast_import_progress(db, audit_row.id)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "failed",
|
|
"message": f"Import failed: {str(e)}"
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=audit_row.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message=f"Import failed: {str(e)}",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
try:
|
|
await _broadcast_import_progress(db, audit_row.id)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
summary = {
|
|
"total_files": len(files),
|
|
"successful_files": len([r for r in results if r["status"] in ["success", "completed_with_errors"]]),
|
|
"failed_files": len([r for r in results if r["status"] == "failed"]),
|
|
"total_imported": total_imported,
|
|
"total_errors": total_errors
|
|
}
|
|
|
|
# Finalize audit row
|
|
try:
|
|
audit_row.successful_files = summary["successful_files"]
|
|
audit_row.failed_files = summary["failed_files"]
|
|
audit_row.total_imported = summary["total_imported"]
|
|
audit_row.total_errors = summary["total_errors"]
|
|
audit_row.status = "success" if summary["failed_files"] == 0 and summary["total_errors"] == 0 else (
|
|
"completed_with_errors" if summary["successful_files"] > 0 else "failed"
|
|
)
|
|
audit_row.message = f"Batch import completed: {audit_row.successful_files}/{audit_row.total_files} files"
|
|
audit_row.finished_at = datetime.now(timezone.utc)
|
|
audit_row.details = {
|
|
"files": [
|
|
{"file_type": r.get("file_type"), "status": r.get("status"), "imported_count": r.get("imported_count", 0), "errors": r.get("errors", 0)}
|
|
for r in results
|
|
]
|
|
}
|
|
db.add(audit_row)
|
|
db.commit()
|
|
try:
|
|
await _broadcast_import_progress(db, audit_row.id)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
return {
|
|
"batch_results": results,
|
|
"summary": summary
|
|
}
|
|
|
|
|
|
@router.websocket("/batch-progress/ws/{audit_id}")
|
|
async def ws_import_batch_progress(websocket: WebSocket, audit_id: int):
|
|
"""WebSocket: subscribe to real-time updates for an import audit using the pool."""
|
|
# Authenticate first (without accepting) to enforce authorization before subscribing
|
|
user = await websocket_manager.authenticate_websocket(websocket)
|
|
if not user:
|
|
# Authentication failure handled here to avoid accepting unauthorized connections
|
|
try:
|
|
await websocket.close(code=4401, reason="Authentication failed")
|
|
except Exception:
|
|
pass
|
|
return
|
|
|
|
# Authorization: only initiating user or admins may subscribe to this audit stream
|
|
db = SessionLocal()
|
|
try:
|
|
audit = db.query(ImportAudit).filter(ImportAudit.id == audit_id).first()
|
|
if not audit:
|
|
try:
|
|
await websocket.close(code=4404, reason="Batch not found")
|
|
except Exception:
|
|
pass
|
|
return
|
|
|
|
is_admin = bool(getattr(user, "is_admin", False))
|
|
if not is_admin and getattr(user, "id", None) != getattr(audit, "initiated_by_user_id", None):
|
|
import_logger.warning(
|
|
"Unauthorized WS subscription attempt for import batch",
|
|
audit_id=audit_id,
|
|
user_id=getattr(user, "id", None),
|
|
username=getattr(user, "username", None),
|
|
)
|
|
try:
|
|
await websocket.close(code=4403, reason="Not authorized to subscribe to this batch")
|
|
except Exception:
|
|
pass
|
|
return
|
|
finally:
|
|
db.close()
|
|
|
|
topic = f"import_batch_progress_{audit_id}"
|
|
|
|
async def handle_ws_message(connection_id: str, message: WebSocketMessage):
|
|
# No-op for now; reserved for future client messages
|
|
import_logger.debug(
|
|
"Import WS msg",
|
|
connection_id=connection_id,
|
|
audit_id=audit_id,
|
|
type=message.type,
|
|
)
|
|
|
|
connection_id = await websocket_manager.handle_connection(
|
|
websocket=websocket,
|
|
topics={topic},
|
|
require_auth=True,
|
|
metadata={"audit_id": audit_id, "endpoint": "import_batch_progress"},
|
|
message_handler=handle_ws_message,
|
|
)
|
|
|
|
if connection_id:
|
|
db = SessionLocal()
|
|
try:
|
|
audit = db.query(ImportAudit).filter(ImportAudit.id == audit_id).first()
|
|
payload = _aggregate_batch_progress(db, audit) if audit else None
|
|
if payload:
|
|
initial_message = WebSocketMessage(
|
|
type="progress",
|
|
topic=topic,
|
|
data=payload,
|
|
)
|
|
await websocket_manager.pool._send_to_connection(connection_id, initial_message)
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@router.get("/recent-batches")
|
|
async def recent_batch_imports(
|
|
limit: int = Query(5, ge=1, le=50),
|
|
offset: int = Query(0, ge=0),
|
|
status: Optional[str] = Query(None, description="Filter by status: running|success|completed_with_errors|failed"),
|
|
start: Optional[str] = Query(None, description="ISO datetime start for started_at filter"),
|
|
end: Optional[str] = Query(None, description="ISO datetime end for started_at filter"),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Return recent batch import audit rows (most recent first) with optional filters and pagination."""
|
|
q = db.query(ImportAudit)
|
|
if status and status.lower() != "all":
|
|
q = q.filter(ImportAudit.status == status)
|
|
# Date range filters on started_at
|
|
try:
|
|
if start:
|
|
start_dt = datetime.fromisoformat(start)
|
|
q = q.filter(ImportAudit.started_at >= start_dt)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
if end:
|
|
end_dt = datetime.fromisoformat(end)
|
|
q = q.filter(ImportAudit.started_at <= end_dt)
|
|
except Exception:
|
|
pass
|
|
total = q.count()
|
|
rows = (
|
|
q.order_by(ImportAudit.started_at.desc())
|
|
.offset(offset)
|
|
.limit(limit)
|
|
.all()
|
|
)
|
|
def _row(r: ImportAudit):
|
|
return {
|
|
"id": r.id,
|
|
"started_at": r.started_at.isoformat() if r.started_at else None,
|
|
"finished_at": r.finished_at.isoformat() if r.finished_at else None,
|
|
"status": r.status,
|
|
"total_files": r.total_files,
|
|
"successful_files": r.successful_files,
|
|
"failed_files": r.failed_files,
|
|
"total_imported": r.total_imported,
|
|
"total_errors": r.total_errors,
|
|
"initiated_by": r.initiated_by_username,
|
|
"message": r.message,
|
|
}
|
|
return {"recent": [_row(r) for r in rows], "total": total, "limit": limit, "offset": offset}
|
|
|
|
|
|
@router.get("/recent-batches/{audit_id}")
|
|
async def get_batch_details(
|
|
audit_id: int,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Return a specific audit entry with per-file details."""
|
|
audit = db.query(ImportAudit).filter(ImportAudit.id == audit_id).first()
|
|
if not audit:
|
|
raise HTTPException(status_code=404, detail="Audit entry not found")
|
|
files = (
|
|
db.query(ImportAuditFile)
|
|
.filter(ImportAuditFile.audit_id == audit.id)
|
|
.order_by(ImportAuditFile.id.asc())
|
|
.all()
|
|
)
|
|
def _row(r: ImportAudit):
|
|
return {
|
|
"id": r.id,
|
|
"started_at": r.started_at.isoformat() if r.started_at else None,
|
|
"finished_at": r.finished_at.isoformat() if r.finished_at else None,
|
|
"status": r.status,
|
|
"total_files": r.total_files,
|
|
"successful_files": r.successful_files,
|
|
"failed_files": r.failed_files,
|
|
"total_imported": r.total_imported,
|
|
"total_errors": r.total_errors,
|
|
"initiated_by": r.initiated_by_username,
|
|
"message": r.message,
|
|
"details": r.details or {},
|
|
}
|
|
def _file(f: ImportAuditFile):
|
|
return {
|
|
"id": f.id,
|
|
"file_type": f.file_type,
|
|
"status": f.status,
|
|
"imported_count": f.imported_count,
|
|
"errors": f.errors,
|
|
"message": f.message,
|
|
"details": f.details or {},
|
|
"created_at": f.created_at.isoformat() if f.created_at else None,
|
|
}
|
|
return {"audit": _row(audit), "files": [_file(f) for f in files]}
|
|
|
|
|
|
@router.post("/recent-batches/{audit_id}/rerun-failed")
|
|
async def rerun_failed_files(
|
|
audit_id: int,
|
|
replace_existing: bool = Form(False),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Re-run only failed files for a given audit. Creates a new audit entry for the rerun."""
|
|
prior = db.query(ImportAudit).filter(ImportAudit.id == audit_id).first()
|
|
if not prior:
|
|
raise HTTPException(status_code=404, detail="Audit entry not found")
|
|
failed_files: List[ImportAuditFile] = (
|
|
db.query(ImportAuditFile)
|
|
.filter(ImportAuditFile.audit_id == audit_id, ImportAuditFile.status == "failed")
|
|
.all()
|
|
)
|
|
if not failed_files:
|
|
raise HTTPException(status_code=400, detail="No failed files to rerun for this audit")
|
|
|
|
# Build list of (file_type, path) that exist
|
|
items: List[Tuple[str, str]] = []
|
|
for f in failed_files:
|
|
saved_path = None
|
|
try:
|
|
saved_path = (f.details or {}).get("saved_path")
|
|
except Exception:
|
|
saved_path = None
|
|
if saved_path and os.path.exists(saved_path):
|
|
items.append((f.file_type, saved_path))
|
|
if not items:
|
|
raise HTTPException(status_code=400, detail="No saved files available to rerun. Upload again.")
|
|
|
|
# Import order for sorting
|
|
import_order = IMPORT_ORDER
|
|
order_index = {name: i for i, name in enumerate(import_order)}
|
|
items.sort(key=lambda x: order_index.get(x[0], len(import_order) + 1))
|
|
|
|
# Create new audit row for rerun
|
|
rerun_audit = ImportAudit(
|
|
status="running",
|
|
total_files=len(items),
|
|
successful_files=0,
|
|
failed_files=0,
|
|
total_imported=0,
|
|
total_errors=0,
|
|
initiated_by_user_id=getattr(current_user, "id", None),
|
|
initiated_by_username=getattr(current_user, "username", None),
|
|
message=f"Rerun failed files for audit #{audit_id}",
|
|
details={"rerun_of": audit_id},
|
|
)
|
|
db.add(rerun_audit)
|
|
db.commit()
|
|
db.refresh(rerun_audit)
|
|
|
|
# Directory to persist rerun files
|
|
rerun_dir = Path(settings.upload_dir).joinpath("import_audits", str(rerun_audit.id))
|
|
try:
|
|
rerun_dir.mkdir(parents=True, exist_ok=True)
|
|
except Exception:
|
|
pass
|
|
|
|
results: List[Dict[str, Any]] = []
|
|
total_imported = 0
|
|
total_errors = 0
|
|
|
|
for file_type, path in items:
|
|
try:
|
|
with open(path, "rb") as fh:
|
|
content = fh.read()
|
|
# Save a copy under the rerun audit
|
|
saved_path = None
|
|
try:
|
|
file_path = rerun_dir.joinpath(file_type)
|
|
with open(file_path, "wb") as out:
|
|
out.write(content)
|
|
saved_path = str(file_path)
|
|
except Exception:
|
|
saved_path = None
|
|
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
# Flexible-only path
|
|
encodings = ENCODINGS
|
|
csv_content = None
|
|
for enc in encodings:
|
|
try:
|
|
csv_content = content.decode(enc)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
if csv_content is None:
|
|
results.append({"file_type": file_type, "status": "failed", "message": "Could not decode CSV file encoding"})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=rerun_audit.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message="Could not decode CSV file encoding",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
|
|
rows_list, _headers = parse_csv_with_fallback(csv_content)
|
|
flexible_count = 0
|
|
for row in rows_list:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=None,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=make_json_safe({k: v for k, v in (row or {}).items() if v not in (None, "")}),
|
|
)
|
|
)
|
|
flexible_count += 1
|
|
if flexible_count % 200 == 0:
|
|
db.commit()
|
|
db.commit()
|
|
total_imported += flexible_count
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "success",
|
|
"imported_count": flexible_count,
|
|
"errors": 0,
|
|
"message": f"Stored {flexible_count} rows as flexible data (no known model)",
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=rerun_audit.id,
|
|
file_type=file_type,
|
|
status="success",
|
|
imported_count=flexible_count,
|
|
errors=0,
|
|
message=f"Stored {flexible_count} rows as flexible data",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
|
|
# Known model path
|
|
model_class = CSV_MODEL_MAPPING[file_type]
|
|
encodings = ENCODINGS
|
|
csv_content = None
|
|
for enc in encodings:
|
|
try:
|
|
csv_content = content.decode(enc)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
if csv_content is None:
|
|
results.append({"file_type": file_type, "status": "failed", "message": "Could not decode CSV file encoding"})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=rerun_audit.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message="Could not decode CSV file encoding",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
|
|
csv_reader = csv.DictReader(io.StringIO(csv_content), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
|
csv_headers = csv_reader.fieldnames or []
|
|
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
|
|
mapped_headers = mapping_info["mapped_headers"]
|
|
unmapped_headers = mapping_info["unmapped_headers"]
|
|
header_validation = _validate_required_headers(file_type, mapped_headers)
|
|
imported_count = 0
|
|
errors: List[Dict[str, Any]] = []
|
|
# Special handling: assign line numbers per form for FORM_LST.csv
|
|
form_lst_line_counters: Dict[str, int] = {}
|
|
|
|
if replace_existing:
|
|
db.query(model_class).delete()
|
|
db.query(FlexibleImport).filter(
|
|
FlexibleImport.file_type == file_type,
|
|
FlexibleImport.target_table == model_class.__tablename__,
|
|
).delete()
|
|
db.commit()
|
|
|
|
for row_num, row in enumerate(csv_reader, start=2):
|
|
try:
|
|
model_data: Dict[str, Any] = {}
|
|
for csv_field, db_field in mapped_headers.items():
|
|
if csv_field in row and db_field is not None:
|
|
converted_value = convert_value(row[csv_field], db_field)
|
|
if converted_value is not None:
|
|
model_data[db_field] = converted_value
|
|
# Inject sequential line_number for FORM_LST rows grouped by form_id
|
|
if file_type == "FORM_LST.csv":
|
|
form_id_value = model_data.get("form_id")
|
|
if form_id_value:
|
|
current = form_lst_line_counters.get(str(form_id_value), 0) + 1
|
|
form_lst_line_counters[str(form_id_value)] = current
|
|
if "line_number" not in model_data:
|
|
model_data["line_number"] = current
|
|
if not any(model_data.values()):
|
|
continue
|
|
required_fields = _get_required_fields(model_class)
|
|
missing_required = [f for f in required_fields if model_data.get(f) in (None, "")]
|
|
if missing_required:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data={
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"missing_required": missing_required,
|
|
},
|
|
)
|
|
)
|
|
continue
|
|
|
|
if model_class == Phone and (not model_data.get('phone')):
|
|
continue
|
|
if model_class == Rolodex and (not model_data.get('last')):
|
|
continue
|
|
if model_class == Ledger and (not model_data.get('empl_num') or not model_data.get('file_no')):
|
|
continue
|
|
|
|
instance = model_class(**model_data)
|
|
db.add(instance)
|
|
db.flush()
|
|
|
|
_, pk_names = _get_model_columns(model_class)
|
|
pk_field_name = pk_names[0] if len(pk_names) == 1 else None
|
|
pk_value = None
|
|
if pk_field_name:
|
|
try:
|
|
pk_value = getattr(instance, pk_field_name)
|
|
except Exception:
|
|
pk_value = None
|
|
extra_data = {}
|
|
for csv_field in unmapped_headers:
|
|
if csv_field in row and row[csv_field] not in (None, ""):
|
|
extra_data[csv_field] = row[csv_field]
|
|
if extra_data:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=pk_field_name,
|
|
primary_key_value=str(pk_value) if pk_value is not None else None,
|
|
extra_data=extra_data,
|
|
)
|
|
)
|
|
imported_count += 1
|
|
if imported_count % 100 == 0:
|
|
db.commit()
|
|
except Exception as e:
|
|
db.rollback()
|
|
try:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data={
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"error": str(e),
|
|
},
|
|
)
|
|
)
|
|
except Exception:
|
|
errors.append({"row": row_num, "error": str(e)})
|
|
continue
|
|
|
|
db.commit()
|
|
total_imported += imported_count
|
|
total_errors += len(errors)
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "success" if (len(errors) == 0 and header_validation.get("ok", True)) else "completed_with_errors",
|
|
"imported_count": imported_count,
|
|
"errors": len(errors),
|
|
"message": f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
|
|
"header_validation": header_validation,
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=rerun_audit.id,
|
|
file_type=file_type,
|
|
status="success" if (len(errors) == 0 and header_validation.get("ok", True)) else "completed_with_errors",
|
|
imported_count=imported_count,
|
|
errors=len(errors),
|
|
message=f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
|
|
details={**({"saved_path": saved_path} if saved_path else {}), "header_validation": header_validation}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
results.append({"file_type": file_type, "status": "failed", "message": f"Import failed: {str(e)}"})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=rerun_audit.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message=f"Import failed: {str(e)}",
|
|
details={}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
# Finalize rerun audit
|
|
summary = {
|
|
"total_files": len(items),
|
|
"successful_files": len([r for r in results if r["status"] in ["success", "completed_with_errors"]]),
|
|
"failed_files": len([r for r in results if r["status"] == "failed"]),
|
|
"total_imported": total_imported,
|
|
"total_errors": total_errors,
|
|
}
|
|
try:
|
|
rerun_audit.successful_files = summary["successful_files"]
|
|
rerun_audit.failed_files = summary["failed_files"]
|
|
rerun_audit.total_imported = summary["total_imported"]
|
|
rerun_audit.total_errors = summary["total_errors"]
|
|
rerun_audit.status = "success" if summary["failed_files"] == 0 and summary["total_errors"] == 0 else (
|
|
"completed_with_errors" if summary["successful_files"] > 0 else "failed"
|
|
)
|
|
rerun_audit.message = f"Rerun completed: {rerun_audit.successful_files}/{rerun_audit.total_files} files"
|
|
rerun_audit.finished_at = datetime.now(timezone.utc)
|
|
rerun_audit.details = {"rerun_of": audit_id}
|
|
db.add(rerun_audit)
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
return {"batch_results": results, "summary": summary, "rerun_audit_id": rerun_audit.id}
|
|
|
|
@router.post("/upload-flexible")
|
|
async def upload_flexible_only(
|
|
file: UploadFile = UploadFileForm(...),
|
|
replace_existing: bool = Form(False),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
"""Flexible-only single-file upload.
|
|
|
|
Accepts any CSV and stores each row as a `FlexibleImport` record with `target_table=None`.
|
|
"""
|
|
# Ensure CSV
|
|
if not file.filename or not file.filename.lower().endswith(".csv"):
|
|
raise HTTPException(status_code=400, detail="File must be a CSV file")
|
|
|
|
file_type = file.filename
|
|
|
|
try:
|
|
# Optionally clear prior flexible rows for this file_type
|
|
if replace_existing:
|
|
db.query(FlexibleImport).filter(
|
|
FlexibleImport.file_type == file_type,
|
|
FlexibleImport.target_table == None, # noqa: E711
|
|
).delete()
|
|
db.commit()
|
|
|
|
content = await file.read()
|
|
encodings = ENCODINGS
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
if csv_content is None:
|
|
raise HTTPException(status_code=400, detail="Could not decode CSV file encoding")
|
|
|
|
reader = csv.DictReader(io.StringIO(csv_content), delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
|
headers = reader.fieldnames or []
|
|
|
|
imported_count = 0
|
|
for row in reader:
|
|
payload = {k: v for k, v in (row or {}).items() if v not in (None, "")}
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=None,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=payload,
|
|
)
|
|
)
|
|
imported_count += 1
|
|
if imported_count % 200 == 0:
|
|
db.commit()
|
|
|
|
db.commit()
|
|
|
|
return {
|
|
"file_type": file_type,
|
|
"imported_count": imported_count,
|
|
"errors": [],
|
|
"total_errors": 0,
|
|
"auto_mapping": {
|
|
"mapped_headers": {},
|
|
"unmapped_headers": list(headers),
|
|
"flexible_saved_rows": imported_count,
|
|
},
|
|
"message": f"Stored {imported_count} rows as flexible data (no known model)",
|
|
}
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
db.rollback()
|
|
raise HTTPException(status_code=500, detail=f"Flexible upload failed: {str(e)}") |