2234 lines
86 KiB
Python
2234 lines
86 KiB
Python
"""
|
|
Data import API endpoints for CSV file uploads with auto-discovery mapping.
|
|
"""
|
|
import csv
|
|
import io
|
|
import re
|
|
import os
|
|
from pathlib import Path
|
|
from difflib import SequenceMatcher
|
|
from datetime import datetime, date
|
|
from decimal import Decimal
|
|
from typing import List, Dict, Any, Optional, Tuple
|
|
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File as UploadFileForm, Form, Query
|
|
from sqlalchemy.orm import Session
|
|
from app.database.base import get_db
|
|
from app.auth.security import get_current_user
|
|
from app.models.user import User
|
|
from app.models.rolodex import Rolodex, Phone
|
|
from app.models.files import File
|
|
from app.models.ledger import Ledger
|
|
from app.models.qdro import QDRO
|
|
from app.models.pensions import Pension, PensionSchedule, MarriageHistory, DeathBenefit, SeparationAgreement, LifeTable, NumberTable
|
|
from app.models.lookups import Employee, FileType, FileStatus, TransactionType, TransactionCode, State, GroupLookup, Footer, PlanInfo, FormIndex, FormList, PrinterSetup, SystemSetup
|
|
from app.models.additional import Payment, Deposit, FileNote, FormVariable, ReportVariable
|
|
from app.models.flexible import FlexibleImport
|
|
from app.models.audit import ImportAudit, ImportAuditFile
|
|
from app.config import settings
|
|
|
|
router = APIRouter(tags=["import"])
|
|
|
|
|
|
# CSV to Model mapping
|
|
CSV_MODEL_MAPPING = {
|
|
"ROLODEX.csv": Rolodex,
|
|
"ROLEX_V.csv": Rolodex, # Legacy/view alias
|
|
"PHONE.csv": Phone,
|
|
"FILES.csv": File,
|
|
"FILES_R.csv": File, # Legacy/report alias
|
|
"FILES_V.csv": File, # Legacy/view alias
|
|
"LEDGER.csv": Ledger,
|
|
"QDROS.csv": QDRO,
|
|
"PENSIONS.csv": Pension,
|
|
"SCHEDULE.csv": PensionSchedule,
|
|
"MARRIAGE.csv": MarriageHistory,
|
|
"DEATH.csv": DeathBenefit,
|
|
"SEPARATE.csv": SeparationAgreement,
|
|
"LIFETABL.csv": LifeTable,
|
|
"NUMBERAL.csv": NumberTable,
|
|
"EMPLOYEE.csv": Employee,
|
|
"FILETYPE.csv": FileType,
|
|
"FILESTAT.csv": FileStatus,
|
|
"TRNSTYPE.csv": TransactionType,
|
|
"TRNSLKUP.csv": TransactionCode,
|
|
"STATES.csv": State,
|
|
"GRUPLKUP.csv": GroupLookup,
|
|
"FOOTERS.csv": Footer,
|
|
"PLANINFO.csv": PlanInfo,
|
|
# Legacy alternate names from export directories
|
|
"SCHEDULE.csv": PensionSchedule,
|
|
"FORM_INX.csv": FormIndex,
|
|
"FORM_LST.csv": FormList,
|
|
"PRINTERS.csv": PrinterSetup,
|
|
"SETUP.csv": SystemSetup,
|
|
# Additional models for complete legacy coverage
|
|
"DEPOSITS.csv": Deposit,
|
|
"FILENOTS.csv": FileNote,
|
|
"FVARLKUP.csv": FormVariable,
|
|
"RVARLKUP.csv": ReportVariable,
|
|
"PAYMENTS.csv": Payment,
|
|
"TRNSACTN.csv": Ledger # Maps to existing Ledger model (same structure)
|
|
}
|
|
|
|
# Field mappings for CSV columns to database fields
|
|
# Legacy header synonyms used as hints only (not required). Auto-discovery will work without exact matches.
|
|
FIELD_MAPPINGS = {
|
|
"ROLODEX.csv": {
|
|
"Id": "id",
|
|
"Prefix": "prefix",
|
|
"First": "first",
|
|
"Middle": "middle",
|
|
"Last": "last",
|
|
"Suffix": "suffix",
|
|
"Title": "title",
|
|
"A1": "a1",
|
|
"A2": "a2",
|
|
"A3": "a3",
|
|
"City": "city",
|
|
"Abrev": "abrev",
|
|
"St": None, # Full state name - skip this field as model only has abrev
|
|
"Zip": "zip",
|
|
"Email": "email",
|
|
"DOB": "dob",
|
|
"SS#": "ss_number",
|
|
"Legal_Status": "legal_status",
|
|
"Group": "group",
|
|
"Memo": "memo"
|
|
},
|
|
"PHONE.csv": {
|
|
"Id": "rolodex_id",
|
|
"Phone": "phone",
|
|
"Location": "location"
|
|
},
|
|
"FILES.csv": {
|
|
"File_No": "file_no",
|
|
"Id": "id",
|
|
"File_Type": "file_type",
|
|
"Regarding": "regarding",
|
|
"Opened": "opened",
|
|
"Closed": "closed",
|
|
"Empl_Num": "empl_num",
|
|
"Rate_Per_Hour": "rate_per_hour",
|
|
"Status": "status",
|
|
"Footer_Code": "footer_code",
|
|
"Opposing": "opposing",
|
|
"Hours": "hours",
|
|
"Hours_P": "hours_p",
|
|
"Trust_Bal": "trust_bal",
|
|
"Trust_Bal_P": "trust_bal_p",
|
|
"Hourly_Fees": "hourly_fees",
|
|
"Hourly_Fees_P": "hourly_fees_p",
|
|
"Flat_Fees": "flat_fees",
|
|
"Flat_Fees_P": "flat_fees_p",
|
|
"Disbursements": "disbursements",
|
|
"Disbursements_P": "disbursements_p",
|
|
"Credit_Bal": "credit_bal",
|
|
"Credit_Bal_P": "credit_bal_p",
|
|
"Total_Charges": "total_charges",
|
|
"Total_Charges_P": "total_charges_p",
|
|
"Amount_Owing": "amount_owing",
|
|
"Amount_Owing_P": "amount_owing_p",
|
|
"Transferable": "transferable",
|
|
"Memo": "memo"
|
|
},
|
|
"LEDGER.csv": {
|
|
"File_No": "file_no",
|
|
"Date": "date",
|
|
"Item_No": "item_no",
|
|
"Empl_Num": "empl_num",
|
|
"T_Code": "t_code",
|
|
"T_Type": "t_type",
|
|
"T_Type_L": "t_type_l",
|
|
"Quantity": "quantity",
|
|
"Rate": "rate",
|
|
"Amount": "amount",
|
|
"Billed": "billed",
|
|
"Note": "note"
|
|
},
|
|
"QDROS.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Plan_Id": "plan_id",
|
|
"^1": "field1",
|
|
"^2": "field2",
|
|
"^Part": "part",
|
|
"^AltP": "altp",
|
|
"^Pet": "pet",
|
|
"^Res": "res",
|
|
"Case_Type": "case_type",
|
|
"Case_Code": "case_code",
|
|
"Section": "section",
|
|
"Case_Number": "case_number",
|
|
"Judgment_Date": "judgment_date",
|
|
"Valuation_Date": "valuation_date",
|
|
"Married_On": "married_on",
|
|
"Percent_Awarded": "percent_awarded",
|
|
"Ven_City": "ven_city",
|
|
"Ven_Cnty": "ven_cnty",
|
|
"Ven_St": "ven_st",
|
|
"Draft_Out": "draft_out",
|
|
"Draft_Apr": "draft_apr",
|
|
"Final_Out": "final_out",
|
|
"Judge": "judge",
|
|
"Form_Name": "form_name"
|
|
},
|
|
"PENSIONS.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Plan_Id": "plan_id",
|
|
"Plan_Name": "plan_name",
|
|
"Title": "title",
|
|
"First": "first",
|
|
"Last": "last",
|
|
"Birth": "birth",
|
|
"Race": "race",
|
|
"Sex": "sex",
|
|
"Info": "info",
|
|
"Valu": "valu",
|
|
"Accrued": "accrued",
|
|
"Vested_Per": "vested_per",
|
|
"Start_Age": "start_age",
|
|
"COLA": "cola",
|
|
"Max_COLA": "max_cola",
|
|
"Withdrawal": "withdrawal",
|
|
"Pre_DR": "pre_dr",
|
|
"Post_DR": "post_dr",
|
|
"Tax_Rate": "tax_rate"
|
|
},
|
|
"EMPLOYEE.csv": {
|
|
"Empl_Num": "empl_num",
|
|
"Rate_Per_Hour": "rate_per_hour"
|
|
# "Empl_Id": not a field in Employee model, using empl_num as identifier
|
|
# Model has additional fields (first_name, last_name, title, etc.) not in CSV
|
|
},
|
|
"STATES.csv": {
|
|
"Abrev": "abbreviation",
|
|
"St": "name"
|
|
},
|
|
"GRUPLKUP.csv": {
|
|
"Code": "group_code",
|
|
"Description": "description"
|
|
# "Title": field not present in model, skipping
|
|
},
|
|
"TRNSLKUP.csv": {
|
|
"T_Code": "t_code",
|
|
"T_Type": "t_type",
|
|
# "T_Type_L": not a field in TransactionCode model
|
|
"Amount": "default_rate",
|
|
"Description": "description"
|
|
},
|
|
"TRNSTYPE.csv": {
|
|
"T_Type": "t_type",
|
|
"T_Type_L": "description"
|
|
# "Header": maps to debit_credit but needs data transformation
|
|
# "Footer": doesn't align with active boolean field
|
|
# These fields may need custom handling or model updates
|
|
},
|
|
"FILETYPE.csv": {
|
|
"File_Type": "type_code",
|
|
"Description": "description",
|
|
"Default_Rate": "default_rate"
|
|
},
|
|
"FILESTAT.csv": {
|
|
"Status_Code": "status_code",
|
|
"Description": "description",
|
|
"Sort_Order": "sort_order"
|
|
},
|
|
"FOOTERS.csv": {
|
|
"F_Code": "footer_code",
|
|
"F_Footer": "content"
|
|
# Description is optional - not required for footers
|
|
},
|
|
"PLANINFO.csv": {
|
|
"Plan_Id": "plan_id",
|
|
"Plan_Name": "plan_name",
|
|
"Plan_Type": "plan_type",
|
|
"Sponsor": "sponsor",
|
|
"Administrator": "administrator",
|
|
"Address1": "address1",
|
|
"Address2": "address2",
|
|
"City": "city",
|
|
"State": "state",
|
|
"Zip_Code": "zip_code",
|
|
"Phone": "phone",
|
|
"Notes": "notes"
|
|
},
|
|
"FORM_INX.csv": {
|
|
"Form_Id": "form_id",
|
|
"Form_Name": "form_name",
|
|
"Category": "category"
|
|
},
|
|
"FORM_LST.csv": {
|
|
"Form_Id": "form_id",
|
|
"Line_Number": "line_number",
|
|
"Content": "content"
|
|
},
|
|
"PRINTERS.csv": {
|
|
"Printer_Name": "printer_name",
|
|
"Description": "description",
|
|
"Driver": "driver",
|
|
"Port": "port",
|
|
"Default_Printer": "default_printer"
|
|
},
|
|
"SETUP.csv": {
|
|
"Setting_Key": "setting_key",
|
|
"Setting_Value": "setting_value",
|
|
"Description": "description",
|
|
"Setting_Type": "setting_type"
|
|
},
|
|
"SCHEDULE.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Vests_On": "vests_on",
|
|
"Vests_At": "vests_at"
|
|
},
|
|
"MARRIAGE.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Marriage_Date": "marriage_date",
|
|
"Separation_Date": "separation_date",
|
|
"Divorce_Date": "divorce_date"
|
|
},
|
|
"DEATH.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Benefit_Type": "benefit_type",
|
|
"Benefit_Amount": "benefit_amount",
|
|
"Beneficiary": "beneficiary"
|
|
},
|
|
"SEPARATE.csv": {
|
|
"File_No": "file_no",
|
|
"Version": "version",
|
|
"Agreement_Date": "agreement_date",
|
|
"Terms": "terms"
|
|
},
|
|
"LIFETABL.csv": {
|
|
"Age": "age",
|
|
"Male_Mortality": "male_mortality",
|
|
"Female_Mortality": "female_mortality"
|
|
},
|
|
"NUMBERAL.csv": {
|
|
"Table_Name": "table_name",
|
|
"Age": "age",
|
|
"Value": "value"
|
|
},
|
|
# Additional CSV file mappings
|
|
"DEPOSITS.csv": {
|
|
"Deposit_Date": "deposit_date",
|
|
"Total": "total"
|
|
},
|
|
"FILENOTS.csv": {
|
|
"File_No": "file_no",
|
|
"Memo_Date": "memo_date",
|
|
"Memo_Note": "memo_note"
|
|
},
|
|
"FVARLKUP.csv": {
|
|
"Identifier": "identifier",
|
|
"Query": "query",
|
|
"Response": "response"
|
|
},
|
|
"RVARLKUP.csv": {
|
|
"Identifier": "identifier",
|
|
"Query": "query"
|
|
},
|
|
"PAYMENTS.csv": {
|
|
"Deposit_Date": "deposit_date",
|
|
"File_No": "file_no",
|
|
"Id": "client_id",
|
|
"Regarding": "regarding",
|
|
"Amount": "amount",
|
|
"Note": "note"
|
|
},
|
|
"TRNSACTN.csv": {
|
|
# Maps to Ledger model - same structure as LEDGER.csv
|
|
"File_No": "file_no",
|
|
"Date": "date",
|
|
"Item_No": "item_no",
|
|
"Empl_Num": "empl_num",
|
|
"T_Code": "t_code",
|
|
"T_Type": "t_type",
|
|
"T_Type_L": "t_type_l",
|
|
"Quantity": "quantity",
|
|
"Rate": "rate",
|
|
"Amount": "amount",
|
|
"Billed": "billed",
|
|
"Note": "note"
|
|
}
|
|
}
|
|
|
|
|
|
def parse_date(date_str: str) -> Optional[datetime]:
|
|
"""Parse date string in various formats"""
|
|
if not date_str or date_str.strip() == "":
|
|
return None
|
|
|
|
date_formats = [
|
|
"%Y-%m-%d",
|
|
"%m/%d/%Y",
|
|
"%d/%m/%Y",
|
|
"%m-%d-%Y",
|
|
"%d-%m-%Y",
|
|
"%Y/%m/%d"
|
|
]
|
|
|
|
for fmt in date_formats:
|
|
try:
|
|
return datetime.strptime(date_str.strip(), fmt).date()
|
|
except ValueError:
|
|
continue
|
|
|
|
return None
|
|
|
|
|
|
|
|
def make_json_safe(value: Any) -> Any:
|
|
"""Recursively convert values to JSON-serializable types.
|
|
|
|
- date/datetime -> ISO string
|
|
- Decimal -> float
|
|
- dict/list -> recurse
|
|
"""
|
|
if isinstance(value, (datetime, date)):
|
|
return value.isoformat()
|
|
if isinstance(value, Decimal):
|
|
try:
|
|
return float(value)
|
|
except Exception:
|
|
return str(value)
|
|
if isinstance(value, dict):
|
|
return {k: make_json_safe(v) for k, v in value.items()}
|
|
if isinstance(value, list):
|
|
return [make_json_safe(v) for v in value]
|
|
return value
|
|
|
|
|
|
def parse_csv_robust(csv_content: str) -> Tuple[List[Dict[str, str]], List[str]]:
|
|
"""Parse CSV text robustly by handling broken newlines in unquoted fields.
|
|
|
|
Returns tuple of (rows_as_dicts, headers)
|
|
"""
|
|
lines = (csv_content or "").strip().split('\n')
|
|
if not lines or (len(lines) == 1 and not lines[0].strip()):
|
|
return [], []
|
|
|
|
# Parse headers using the csv module to respect quoting
|
|
header_reader = csv.reader(io.StringIO(lines[0]))
|
|
headers = next(header_reader)
|
|
headers = [h.strip() for h in headers]
|
|
|
|
rows_data: List[Dict[str, str]] = []
|
|
for line_num, line in enumerate(lines[1:], start=2):
|
|
# Skip empty lines
|
|
if not line.strip():
|
|
continue
|
|
try:
|
|
# Parse each line independently; avoids multiline parse explosions
|
|
line_reader = csv.reader(io.StringIO(line))
|
|
fields = next(line_reader)
|
|
fields = [f.strip() for f in fields]
|
|
|
|
# If clearly malformed (too few fields), skip
|
|
if len(fields) < max(1, len(headers) // 2):
|
|
continue
|
|
|
|
# Pad or truncate to header length
|
|
while len(fields) < len(headers):
|
|
fields.append("")
|
|
fields = fields[:len(headers)]
|
|
|
|
row_dict = dict(zip(headers, fields))
|
|
rows_data.append(row_dict)
|
|
except Exception:
|
|
# Skip malformed row
|
|
continue
|
|
|
|
return rows_data, headers
|
|
|
|
|
|
def parse_csv_with_fallback(text: str) -> Tuple[List[Dict[str, str]], List[str]]:
|
|
"""Try csv.DictReader first; on failure, fall back to robust parser."""
|
|
try:
|
|
reader = csv.DictReader(io.StringIO(text), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
|
headers_local = reader.fieldnames or []
|
|
rows_local: List[Dict[str, str]] = []
|
|
for r in reader:
|
|
rows_local.append(r)
|
|
return rows_local, headers_local
|
|
except Exception:
|
|
return parse_csv_robust(text)
|
|
|
|
|
|
def _normalize_label(label: str) -> str:
|
|
"""Normalize a header/field label for fuzzy comparison."""
|
|
if not label:
|
|
return ""
|
|
# Lowercase, replace separators with space, remove non-alphanumerics, expand common short forms
|
|
lowered = label.strip().lower()
|
|
# Replace separators
|
|
lowered = re.sub(r"[\s\-]+", "_", lowered)
|
|
# Remove non-word characters except underscore
|
|
lowered = re.sub(r"[^a-z0-9_]", "", lowered)
|
|
# Expand a few common abbreviations
|
|
replacements = {
|
|
"num": "number",
|
|
"no": "number",
|
|
"amt": "amount",
|
|
"addr": "address",
|
|
"st": "state",
|
|
"dob": "dateofbirth",
|
|
"ss": "ssnumber",
|
|
}
|
|
tokens = [replacements.get(t, t) for t in lowered.split("_") if t]
|
|
return "".join(tokens)
|
|
|
|
|
|
def _get_model_columns(model_class) -> Tuple[Dict[str, Any], List[str]]:
|
|
"""Return model columns mapping name->Column and list of primary key column names."""
|
|
columns = {}
|
|
pk_names = []
|
|
for col in model_class.__table__.columns:
|
|
if col.name in {"created_at", "updated_at"}:
|
|
continue
|
|
columns[col.name] = col
|
|
if col.primary_key:
|
|
pk_names.append(col.name)
|
|
return columns, pk_names
|
|
|
|
|
|
def _build_dynamic_mapping(headers: List[str], model_class, file_type: str) -> Dict[str, Any]:
|
|
"""Create a mapping from CSV headers to model fields using synonyms and fuzzy similarity.
|
|
|
|
Returns a dict with keys: mapping (csv_header->db_field), suggestions, unmapped_headers, mapped_headers
|
|
"""
|
|
model_columns, _ = _get_model_columns(model_class)
|
|
model_field_names = list(model_columns.keys())
|
|
|
|
# Start with legacy mapping hints when available
|
|
legacy_map = FIELD_MAPPINGS.get(file_type, {}) or {}
|
|
|
|
mapping: Dict[str, Optional[str]] = {}
|
|
suggestions: Dict[str, List[Tuple[str, float]]] = {}
|
|
used_db_fields: set[str] = set()
|
|
|
|
# 1) Exact legacy header key usage
|
|
for header in headers:
|
|
if header in legacy_map and legacy_map[header] is not None:
|
|
candidate = legacy_map[header]
|
|
if candidate in model_field_names and candidate not in used_db_fields:
|
|
mapping[header] = candidate
|
|
used_db_fields.add(candidate)
|
|
|
|
# 2) Direct exact match against model fields (case-insensitive and normalized)
|
|
normalized_model = {name: _normalize_label(name) for name in model_field_names}
|
|
normalized_to_model = {v: k for k, v in normalized_model.items()}
|
|
|
|
for header in headers:
|
|
if header in mapping:
|
|
continue
|
|
normalized_header = _normalize_label(header)
|
|
if normalized_header in normalized_to_model:
|
|
candidate = normalized_to_model[normalized_header]
|
|
if candidate not in used_db_fields:
|
|
mapping[header] = candidate
|
|
used_db_fields.add(candidate)
|
|
|
|
# 3) Fuzzy best-match based on normalized strings
|
|
for header in headers:
|
|
if header in mapping:
|
|
continue
|
|
normalized_header = _normalize_label(header)
|
|
best_candidate = None
|
|
best_score = 0.0
|
|
candidate_list: List[Tuple[str, float]] = []
|
|
for model_field in model_field_names:
|
|
if model_field in used_db_fields:
|
|
continue
|
|
nm = normalized_model[model_field]
|
|
if not nm or not normalized_header:
|
|
score = 0.0
|
|
else:
|
|
# Combine ratio and partial containment heuristic
|
|
ratio = SequenceMatcher(None, normalized_header, nm).ratio()
|
|
containment = 1.0 if (normalized_header in nm or nm in normalized_header) else 0.0
|
|
score = max(ratio, 0.85 if containment else 0.0)
|
|
candidate_list.append((model_field, score))
|
|
if score > best_score:
|
|
best_score = score
|
|
best_candidate = model_field
|
|
# Keep top 3 suggestions for UI
|
|
suggestions[header] = sorted(candidate_list, key=lambda x: x[1], reverse=True)[:3]
|
|
# Apply only if score above threshold
|
|
if best_candidate and best_score >= 0.82:
|
|
mapping[header] = best_candidate
|
|
used_db_fields.add(best_candidate)
|
|
|
|
# 4) Any header explicitly mapped to None in legacy map is considered intentionally skipped
|
|
for header in headers:
|
|
if header not in mapping and header in legacy_map and legacy_map[header] is None:
|
|
mapping[header] = None
|
|
|
|
mapped_headers = {h: f for h, f in mapping.items() if f is not None}
|
|
unmapped_headers = [h for h in headers if h not in mapping or mapping[h] is None]
|
|
|
|
return {
|
|
"mapping": mapping,
|
|
"mapped_headers": mapped_headers,
|
|
"unmapped_headers": unmapped_headers,
|
|
"suggestions": suggestions,
|
|
}
|
|
|
|
|
|
def _get_required_fields(model_class) -> List[str]:
|
|
"""Infer required (non-nullable) fields for a model to avoid DB errors.
|
|
|
|
Excludes primary keys (which might be autoincrement or provided) and timestamp mixins.
|
|
"""
|
|
required = []
|
|
for col in model_class.__table__.columns:
|
|
if col.name in {"created_at", "updated_at"}:
|
|
continue
|
|
if col.primary_key:
|
|
# If PK is a string or composite, we cannot assume optional; handle separately
|
|
continue
|
|
try:
|
|
is_required = not getattr(col, "nullable", True)
|
|
except Exception:
|
|
is_required = False
|
|
if is_required:
|
|
required.append(col.name)
|
|
return required
|
|
|
|
|
|
def convert_value(value: str, field_name: str) -> Any:
|
|
"""Convert string value to appropriate type based on field name"""
|
|
if not value or value.strip() == "" or value.strip().lower() in ["null", "none", "n/a"]:
|
|
return None
|
|
|
|
value = value.strip()
|
|
|
|
# Date fields
|
|
if any(word in field_name.lower() for word in ["date", "dob", "birth", "opened", "closed", "judgment", "valuation", "married", "vests_on"]):
|
|
parsed_date = parse_date(value)
|
|
return parsed_date
|
|
|
|
# Boolean fields
|
|
if any(word in field_name.lower() for word in ["active", "default_printer", "billed", "transferable"]):
|
|
if value.lower() in ["true", "1", "yes", "y", "on", "active"]:
|
|
return True
|
|
elif value.lower() in ["false", "0", "no", "n", "off", "inactive"]:
|
|
return False
|
|
else:
|
|
return None
|
|
|
|
# Numeric fields (float)
|
|
if any(word in field_name.lower() for word in ["rate", "hour", "bal", "fee", "amount", "owing", "transfer", "valu", "accrued", "vested", "cola", "tax", "percent", "benefit_amount", "mortality", "value"]):
|
|
try:
|
|
# Remove currency symbols and commas
|
|
cleaned_value = value.replace("$", "").replace(",", "").replace("%", "")
|
|
return float(cleaned_value)
|
|
except ValueError:
|
|
return 0.0
|
|
|
|
# Integer fields
|
|
if any(word in field_name.lower() for word in ["item_no", "age", "start_age", "version", "line_number", "sort_order", "empl_num"]):
|
|
try:
|
|
return int(float(value)) # Handle cases like "1.0"
|
|
except ValueError:
|
|
# For employee numbers, return None to skip the record rather than 0
|
|
if "empl_num" in field_name.lower():
|
|
return None
|
|
return 0
|
|
|
|
# String fields - limit length to prevent database errors
|
|
if len(value) > 500: # Reasonable limit for most string fields
|
|
return value[:500]
|
|
|
|
return value
|
|
|
|
|
|
def validate_foreign_keys(model_data: dict, model_class, db: Session) -> list[str]:
|
|
"""Validate foreign key relationships before inserting data"""
|
|
errors = []
|
|
|
|
# Check Phone -> Rolodex relationship
|
|
if model_class == Phone and "rolodex_id" in model_data:
|
|
rolodex_id = model_data["rolodex_id"]
|
|
if rolodex_id and not db.query(Rolodex).filter(Rolodex.id == rolodex_id).first():
|
|
errors.append(f"Rolodex ID '{rolodex_id}' not found")
|
|
|
|
# Check File -> Rolodex relationship
|
|
if model_class == File and "id" in model_data:
|
|
rolodex_id = model_data["id"]
|
|
if rolodex_id and not db.query(Rolodex).filter(Rolodex.id == rolodex_id).first():
|
|
errors.append(f"Owner Rolodex ID '{rolodex_id}' not found")
|
|
|
|
# Add more foreign key validations as needed
|
|
return errors
|
|
|
|
|
|
@router.get("/available-files")
|
|
async def get_available_csv_files(current_user: User = Depends(get_current_user)):
|
|
"""Get list of available CSV files for import"""
|
|
return {
|
|
"available_files": list(CSV_MODEL_MAPPING.keys()),
|
|
"descriptions": {
|
|
"ROLODEX.csv": "Customer/contact information",
|
|
"PHONE.csv": "Phone numbers linked to customers",
|
|
"FILES.csv": "Client files and cases",
|
|
"LEDGER.csv": "Financial transactions per file",
|
|
"QDROS.csv": "Legal documents and court orders",
|
|
"PENSIONS.csv": "Pension calculation data",
|
|
"EMPLOYEE.csv": "Staff and employee information",
|
|
"STATES.csv": "US States lookup table",
|
|
"FILETYPE.csv": "File type categories",
|
|
"FILESTAT.csv": "File status codes",
|
|
"FOOTERS.csv": "Document footers and signatures",
|
|
"DEPOSITS.csv": "Daily bank deposit summaries",
|
|
"FILENOTS.csv": "File notes and case memos",
|
|
"FVARLKUP.csv": "Form template variables",
|
|
"RVARLKUP.csv": "Report template variables",
|
|
"PAYMENTS.csv": "Individual payments within deposits",
|
|
"TRNSACTN.csv": "Transaction details (maps to Ledger)"
|
|
},
|
|
"auto_discovery": True
|
|
}
|
|
|
|
|
|
@router.post("/upload/{file_type}")
|
|
async def import_csv_data(
|
|
file_type: str,
|
|
file: UploadFile = UploadFileForm(...),
|
|
replace_existing: bool = Form(False),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Import data from CSV file"""
|
|
|
|
# Validate file type
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Unsupported file type: {file_type}. Available types: {list(CSV_MODEL_MAPPING.keys())}"
|
|
)
|
|
|
|
# Validate file extension
|
|
if not file.filename.endswith('.csv'):
|
|
raise HTTPException(status_code=400, detail="File must be a CSV file")
|
|
|
|
model_class = CSV_MODEL_MAPPING[file_type]
|
|
# Legacy mapping hints used internally by auto-discovery; not used strictly
|
|
legacy_hint_map = FIELD_MAPPINGS.get(file_type, {})
|
|
|
|
try:
|
|
# Read CSV content
|
|
content = await file.read()
|
|
|
|
# Try multiple encodings for legacy CSV files
|
|
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
if csv_content is None:
|
|
raise HTTPException(status_code=400, detail="Could not decode CSV file. Please ensure it's saved in UTF-8, Windows-1252, or ISO-8859-1 encoding.")
|
|
|
|
# Preprocess CSV content to fix common legacy issues
|
|
def preprocess_csv(content):
|
|
lines = content.split('\n')
|
|
cleaned_lines = []
|
|
i = 0
|
|
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
# If line doesn't have the expected number of commas, it might be a broken multi-line field
|
|
if i == 0: # Header line
|
|
cleaned_lines.append(line)
|
|
expected_comma_count = line.count(',')
|
|
i += 1
|
|
continue
|
|
|
|
# Check if this line has the expected number of commas
|
|
if line.count(',') < expected_comma_count:
|
|
# This might be a continuation of the previous line
|
|
# Try to merge with previous line
|
|
if cleaned_lines:
|
|
cleaned_lines[-1] += " " + line.replace('\n', ' ').replace('\r', ' ')
|
|
else:
|
|
cleaned_lines.append(line)
|
|
else:
|
|
cleaned_lines.append(line)
|
|
i += 1
|
|
|
|
return '\n'.join(cleaned_lines)
|
|
|
|
# Custom robust parser for problematic legacy CSV files
|
|
class MockCSVReader:
|
|
def __init__(self, data, fieldnames):
|
|
self.data = data
|
|
self.fieldnames = fieldnames
|
|
self.index = 0
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
if self.index >= len(self.data):
|
|
raise StopIteration
|
|
row = self.data[self.index]
|
|
self.index += 1
|
|
return row
|
|
|
|
try:
|
|
lines = csv_content.strip().split('\n')
|
|
if not lines:
|
|
raise ValueError("Empty CSV file")
|
|
|
|
# Parse header using proper CSV parsing
|
|
header_reader = csv.reader(io.StringIO(lines[0]))
|
|
headers = next(header_reader)
|
|
headers = [h.strip() for h in headers]
|
|
print(f"DEBUG: Found {len(headers)} headers: {headers}")
|
|
# Build dynamic header mapping for this file/model
|
|
mapping_info = _build_dynamic_mapping(headers, model_class, file_type)
|
|
|
|
# Parse data rows with proper CSV parsing
|
|
rows_data = []
|
|
skipped_rows = 0
|
|
|
|
for line_num, line in enumerate(lines[1:], start=2):
|
|
# Skip empty lines
|
|
if not line.strip():
|
|
continue
|
|
|
|
try:
|
|
# Use proper CSV parsing to handle commas within quoted fields
|
|
line_reader = csv.reader(io.StringIO(line))
|
|
fields = next(line_reader)
|
|
fields = [f.strip() for f in fields]
|
|
|
|
# Skip rows that are clearly malformed (too few fields)
|
|
if len(fields) < len(headers) // 2: # Less than half the expected fields
|
|
skipped_rows += 1
|
|
continue
|
|
|
|
# Pad or truncate to match header length
|
|
while len(fields) < len(headers):
|
|
fields.append('')
|
|
fields = fields[:len(headers)]
|
|
|
|
row_dict = dict(zip(headers, fields))
|
|
rows_data.append(row_dict)
|
|
|
|
except Exception as row_error:
|
|
print(f"Skipping malformed row {line_num}: {row_error}")
|
|
skipped_rows += 1
|
|
continue
|
|
|
|
csv_reader = MockCSVReader(rows_data, headers)
|
|
print(f"SUCCESS: Parsed {len(rows_data)} rows (skipped {skipped_rows} malformed rows)")
|
|
|
|
except Exception as e:
|
|
print(f"Custom parsing failed: {e}")
|
|
raise HTTPException(status_code=400, detail=f"Could not parse CSV file. The file appears to have serious formatting issues. Error: {str(e)}")
|
|
|
|
imported_count = 0
|
|
errors = []
|
|
flexible_saved = 0
|
|
mapped_headers = mapping_info.get("mapped_headers", {})
|
|
unmapped_headers = mapping_info.get("unmapped_headers", [])
|
|
|
|
# If replace_existing is True, delete all existing records and related flexible extras
|
|
if replace_existing:
|
|
db.query(model_class).delete()
|
|
db.query(FlexibleImport).filter(
|
|
FlexibleImport.file_type == file_type,
|
|
FlexibleImport.target_table == model_class.__tablename__,
|
|
).delete()
|
|
db.commit()
|
|
|
|
for row_num, row in enumerate(csv_reader, start=2): # Start at 2 for header row
|
|
try:
|
|
# Convert CSV row to model data
|
|
model_data: Dict[str, Any] = {}
|
|
# Apply discovered mapping
|
|
for csv_field, db_field in mapped_headers.items():
|
|
if csv_field in row and db_field is not None:
|
|
converted_value = convert_value(row[csv_field], db_field)
|
|
if converted_value is not None:
|
|
model_data[db_field] = converted_value
|
|
|
|
# Skip empty rows
|
|
if not any(model_data.values()):
|
|
continue
|
|
|
|
# Fallback: if required non-nullable fields are missing, store row as flexible only
|
|
required_fields = _get_required_fields(model_class)
|
|
missing_required = [f for f in required_fields if model_data.get(f) in (None, "")]
|
|
if missing_required:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data={
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"missing_required": missing_required,
|
|
},
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
# Do not attempt to insert into strict model; continue to next row
|
|
continue
|
|
|
|
# Special validation for models with required fields
|
|
if model_class == Phone:
|
|
if 'phone' not in model_data or not model_data['phone']:
|
|
continue # Skip phone records without a phone number
|
|
|
|
if model_class == Rolodex:
|
|
if 'last' not in model_data or not model_data['last']:
|
|
continue # Skip rolodex records without a last name/company name
|
|
|
|
if model_class == Ledger:
|
|
# Skip ledger records without required fields
|
|
if 'empl_num' not in model_data or not model_data['empl_num']:
|
|
continue # Skip ledger records without employee number
|
|
if 'file_no' not in model_data or not model_data['file_no']:
|
|
continue # Skip ledger records without file number
|
|
|
|
# Create model instance
|
|
instance = model_class(**model_data)
|
|
db.add(instance)
|
|
db.flush() # Ensure PK is available
|
|
|
|
# Capture PK details for flexible storage linkage (single-column PKs only)
|
|
_, pk_names = _get_model_columns(model_class)
|
|
pk_field_name = pk_names[0] if len(pk_names) == 1 else None
|
|
pk_value = None
|
|
if pk_field_name:
|
|
try:
|
|
pk_value = getattr(instance, pk_field_name)
|
|
except Exception:
|
|
pk_value = None
|
|
|
|
# Save unmapped fields into flexible storage (privacy-first, per-row JSON)
|
|
extra_data = {}
|
|
for csv_field in unmapped_headers:
|
|
if csv_field in row and row[csv_field] not in (None, ""):
|
|
extra_data[csv_field] = row[csv_field]
|
|
if extra_data:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=pk_field_name,
|
|
primary_key_value=str(pk_value) if pk_value is not None else None,
|
|
extra_data=extra_data,
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
imported_count += 1
|
|
|
|
# Commit every 100 records to avoid memory issues
|
|
if imported_count % 100 == 0:
|
|
db.commit()
|
|
|
|
except Exception as e:
|
|
# Rollback the transaction for this record
|
|
db.rollback()
|
|
# As a robustness measure, persist row in flexible storage instead of counting as error
|
|
try:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data={
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"error": str(e),
|
|
},
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
except Exception as flex_e:
|
|
errors.append({
|
|
"row": row_num,
|
|
"error": f"{str(e)} | Flexible save failed: {str(flex_e)}",
|
|
"data": row,
|
|
})
|
|
continue
|
|
|
|
# Final commit
|
|
db.commit()
|
|
|
|
result = {
|
|
"file_type": file_type,
|
|
"imported_count": imported_count,
|
|
"errors": errors[:10], # Limit errors to first 10
|
|
"total_errors": len(errors),
|
|
"auto_mapping": {
|
|
"mapped_headers": mapped_headers,
|
|
"unmapped_headers": unmapped_headers,
|
|
"flexible_saved_rows": flexible_saved,
|
|
},
|
|
}
|
|
|
|
if errors:
|
|
result["warning"] = f"Import completed with {len(errors)} errors"
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
print(f"IMPORT ERROR DEBUG: {type(e).__name__}: {str(e)}")
|
|
import traceback
|
|
print(f"TRACEBACK: {traceback.format_exc()}")
|
|
db.rollback()
|
|
raise HTTPException(status_code=500, detail=f"Import failed: {str(e)}")
|
|
|
|
|
|
@router.get("/status")
|
|
async def get_import_status(db: Session = Depends(get_db), current_user: User = Depends(get_current_user)):
|
|
"""Get current import status and record counts"""
|
|
|
|
status = {}
|
|
|
|
for file_type, model_class in CSV_MODEL_MAPPING.items():
|
|
try:
|
|
count = db.query(model_class).count()
|
|
status[file_type] = {
|
|
"table_name": model_class.__tablename__,
|
|
"record_count": count
|
|
}
|
|
except Exception as e:
|
|
status[file_type] = {
|
|
"table_name": model_class.__tablename__,
|
|
"record_count": 0,
|
|
"error": str(e)
|
|
}
|
|
|
|
return status
|
|
|
|
|
|
@router.delete("/clear/{file_type}")
|
|
async def clear_table_data(
|
|
file_type: str,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Clear all data from a specific table"""
|
|
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
raise HTTPException(status_code=400, detail=f"Unknown file type: {file_type}")
|
|
|
|
model_class = CSV_MODEL_MAPPING[file_type]
|
|
|
|
try:
|
|
deleted_count = db.query(model_class).count()
|
|
db.query(model_class).delete()
|
|
# Also clear any flexible rows linked to this target table and file type
|
|
db.query(FlexibleImport).filter(
|
|
FlexibleImport.file_type == file_type,
|
|
FlexibleImport.target_table == model_class.__tablename__,
|
|
).delete()
|
|
db.commit()
|
|
|
|
return {
|
|
"file_type": file_type,
|
|
"table_name": model_class.__tablename__,
|
|
"deleted_count": deleted_count
|
|
}
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
raise HTTPException(status_code=500, detail=f"Clear operation failed: {str(e)}")
|
|
|
|
|
|
@router.post("/validate/{file_type}")
|
|
async def validate_csv_file(
|
|
file_type: str,
|
|
file: UploadFile = UploadFileForm(...),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Validate CSV file structure without importing"""
|
|
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
raise HTTPException(status_code=400, detail=f"Unsupported file type: {file_type}")
|
|
|
|
if not file.filename.endswith('.csv'):
|
|
raise HTTPException(status_code=400, detail="File must be a CSV file")
|
|
|
|
# Use auto-discovery mapping for validation
|
|
|
|
try:
|
|
content = await file.read()
|
|
|
|
# Try multiple encodings for legacy CSV files
|
|
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
if csv_content is None:
|
|
raise HTTPException(status_code=400, detail="Could not decode CSV file. Please ensure it's saved in UTF-8, Windows-1252, or ISO-8859-1 encoding.")
|
|
|
|
# Parse CSV with fallback to robust line-by-line parsing
|
|
def parse_csv_with_fallback(text: str) -> Tuple[List[Dict[str, str]], List[str]]:
|
|
try:
|
|
reader = csv.DictReader(io.StringIO(text), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
|
headers_local = reader.fieldnames or []
|
|
rows_local = []
|
|
for r in reader:
|
|
rows_local.append(r)
|
|
return rows_local, headers_local
|
|
except Exception:
|
|
return parse_csv_robust(text)
|
|
|
|
rows_list, csv_headers = parse_csv_with_fallback(csv_content)
|
|
model_class = CSV_MODEL_MAPPING[file_type]
|
|
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
|
|
mapped_headers = mapping_info["mapped_headers"]
|
|
unmapped_headers = mapping_info["unmapped_headers"]
|
|
|
|
# Sample data validation
|
|
sample_rows = []
|
|
errors = []
|
|
|
|
for row_num, row in enumerate(rows_list, start=2):
|
|
if row_num > 12: # Only check first 10 data rows
|
|
break
|
|
|
|
sample_rows.append(row)
|
|
|
|
# Check for data type issues on mapped fields
|
|
for csv_field, db_field in mapped_headers.items():
|
|
if csv_field in row and row[csv_field]:
|
|
try:
|
|
convert_value(row[csv_field], db_field)
|
|
except Exception as e:
|
|
errors.append({
|
|
"row": row_num,
|
|
"field": csv_field,
|
|
"value": row[csv_field],
|
|
"error": str(e)
|
|
})
|
|
|
|
return {
|
|
"file_type": file_type,
|
|
# Consider valid if we can map at least one column; we don't require exact header match
|
|
"valid": len(mapped_headers) > 0 and len(errors) == 0,
|
|
"headers": {
|
|
"found": csv_headers,
|
|
"mapped": mapped_headers,
|
|
"unmapped": unmapped_headers,
|
|
},
|
|
"sample_data": sample_rows,
|
|
"validation_errors": errors[:5], # First 5 errors only
|
|
"total_errors": len(errors),
|
|
"auto_mapping": {
|
|
"suggestions": mapping_info["suggestions"],
|
|
},
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"VALIDATION ERROR DEBUG: {type(e).__name__}: {str(e)}")
|
|
import traceback
|
|
print(f"VALIDATION TRACEBACK: {traceback.format_exc()}")
|
|
raise HTTPException(status_code=500, detail=f"Validation failed: {str(e)}")
|
|
|
|
|
|
@router.get("/progress/{import_id}")
|
|
async def get_import_progress(
|
|
import_id: str,
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Get import progress status (placeholder for future implementation)"""
|
|
# This would be used for long-running imports with background tasks
|
|
return {
|
|
"import_id": import_id,
|
|
"status": "not_implemented",
|
|
"message": "Real-time progress tracking not yet implemented"
|
|
}
|
|
|
|
|
|
@router.post("/batch-validate")
|
|
async def batch_validate_csv_files(
|
|
files: List[UploadFile] = UploadFileForm(...),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Validate multiple CSV files without importing"""
|
|
|
|
if len(files) > 25:
|
|
raise HTTPException(status_code=400, detail="Maximum 25 files allowed per batch")
|
|
|
|
validation_results = []
|
|
|
|
for file in files:
|
|
file_type = file.filename
|
|
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
validation_results.append({
|
|
"file_type": file_type,
|
|
"valid": False,
|
|
"error": f"Unsupported file type: {file_type}"
|
|
})
|
|
continue
|
|
|
|
if not file.filename.endswith('.csv'):
|
|
validation_results.append({
|
|
"file_type": file_type,
|
|
"valid": False,
|
|
"error": "File must be a CSV file"
|
|
})
|
|
continue
|
|
|
|
model_class = CSV_MODEL_MAPPING.get(file_type)
|
|
|
|
try:
|
|
content = await file.read()
|
|
|
|
# Try multiple encodings for legacy CSV files (include BOM-friendly utf-8-sig)
|
|
encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
if csv_content is None:
|
|
validation_results.append({
|
|
"file_type": file_type,
|
|
"valid": False,
|
|
"error": "Could not decode CSV file encoding"
|
|
})
|
|
continue
|
|
|
|
# Handle CSV parsing issues with legacy files
|
|
rows_list, csv_headers = parse_csv_with_fallback(csv_content)
|
|
|
|
# Check headers and build dynamic mapping
|
|
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
|
|
mapped_headers = mapping_info["mapped_headers"]
|
|
unmapped_headers = mapping_info["unmapped_headers"]
|
|
|
|
# Sample data validation
|
|
sample_rows = []
|
|
errors = []
|
|
|
|
for row_num, row in enumerate(rows_list, start=2):
|
|
if row_num > 12: # Only check first 10 data rows
|
|
break
|
|
|
|
sample_rows.append(row)
|
|
|
|
# Check for data type issues
|
|
for csv_field, db_field in mapped_headers.items():
|
|
if csv_field in row and row[csv_field]:
|
|
try:
|
|
convert_value(row[csv_field], db_field)
|
|
except Exception as e:
|
|
errors.append({
|
|
"row": row_num,
|
|
"field": csv_field,
|
|
"value": row[csv_field],
|
|
"error": str(e)
|
|
})
|
|
|
|
validation_results.append({
|
|
"file_type": file_type,
|
|
"valid": len(mapped_headers) > 0 and len(errors) == 0,
|
|
"headers": {
|
|
"found": csv_headers,
|
|
"mapped": mapped_headers,
|
|
"unmapped": unmapped_headers
|
|
},
|
|
"sample_data": sample_rows[:5], # Limit sample data for batch operation
|
|
"validation_errors": errors[:5], # First 5 errors only
|
|
"total_errors": len(errors),
|
|
"auto_mapping": {
|
|
"suggestions": mapping_info["suggestions"],
|
|
},
|
|
})
|
|
|
|
# Reset file pointer for potential future use
|
|
await file.seek(0)
|
|
|
|
except Exception as e:
|
|
validation_results.append({
|
|
"file_type": file_type,
|
|
"valid": False,
|
|
"error": f"Validation failed: {str(e)}"
|
|
})
|
|
|
|
# Summary statistics
|
|
total_files = len(validation_results)
|
|
valid_files = len([r for r in validation_results if r["valid"]])
|
|
invalid_files = total_files - valid_files
|
|
|
|
return {
|
|
"batch_validation_results": validation_results,
|
|
"summary": {
|
|
"total_files": total_files,
|
|
"valid_files": valid_files,
|
|
"invalid_files": invalid_files,
|
|
"all_valid": invalid_files == 0
|
|
}
|
|
}
|
|
|
|
|
|
@router.post("/batch-upload")
|
|
async def batch_import_csv_files(
|
|
files: List[UploadFile] = UploadFileForm(...),
|
|
replace_existing: bool = Form(False),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Import multiple CSV files in optimal order"""
|
|
|
|
if len(files) > 25:
|
|
raise HTTPException(status_code=400, detail="Maximum 25 files allowed per batch")
|
|
|
|
# Define optimal import order based on dependencies
|
|
import_order = [
|
|
"STATES.csv", "GRUPLKUP.csv", "EMPLOYEE.csv", "FILETYPE.csv", "FILESTAT.csv",
|
|
"TRNSTYPE.csv", "TRNSLKUP.csv", "FOOTERS.csv", "SETUP.csv", "PRINTERS.csv",
|
|
"ROLODEX.csv", "PHONE.csv", "FILES.csv", "LEDGER.csv", "TRNSACTN.csv",
|
|
"QDROS.csv", "PENSIONS.csv", "PLANINFO.csv", "PAYMENTS.csv", "DEPOSITS.csv",
|
|
"FILENOTS.csv", "FORM_INX.csv", "FORM_LST.csv", "FVARLKUP.csv", "RVARLKUP.csv"
|
|
]
|
|
|
|
# Sort uploaded files by optimal import order
|
|
file_map = {f.filename: f for f in files}
|
|
ordered_files = []
|
|
|
|
for file_type in import_order:
|
|
if file_type in file_map:
|
|
ordered_files.append((file_type, file_map[file_type]))
|
|
del file_map[file_type]
|
|
|
|
# Add any remaining files not in the predefined order
|
|
for filename, file in file_map.items():
|
|
ordered_files.append((filename, file))
|
|
|
|
results = []
|
|
total_imported = 0
|
|
total_errors = 0
|
|
|
|
# Create import audit row (running)
|
|
audit_row = ImportAudit(
|
|
status="running",
|
|
total_files=len(files),
|
|
successful_files=0,
|
|
failed_files=0,
|
|
total_imported=0,
|
|
total_errors=0,
|
|
initiated_by_user_id=getattr(current_user, "id", None),
|
|
initiated_by_username=getattr(current_user, "username", None),
|
|
message="Batch import started",
|
|
)
|
|
db.add(audit_row)
|
|
db.commit()
|
|
db.refresh(audit_row)
|
|
|
|
# Directory to persist uploaded files for this audit (for reruns)
|
|
audit_dir = Path(settings.upload_dir).joinpath("import_audits", str(audit_row.id))
|
|
try:
|
|
audit_dir.mkdir(parents=True, exist_ok=True)
|
|
except Exception:
|
|
pass
|
|
|
|
for file_type, file in ordered_files:
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
# Fallback flexible-only import for unknown file structures
|
|
try:
|
|
await file.seek(0)
|
|
content = await file.read()
|
|
# Save original upload to disk for potential reruns
|
|
saved_path = None
|
|
try:
|
|
file_path = audit_dir.joinpath(file_type)
|
|
with open(file_path, "wb") as fh:
|
|
fh.write(content)
|
|
saved_path = str(file_path)
|
|
except Exception:
|
|
saved_path = None
|
|
encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
if csv_content is None:
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "failed",
|
|
"message": "Could not decode CSV file encoding"
|
|
})
|
|
continue
|
|
rows_list, headers = parse_csv_with_fallback(csv_content)
|
|
flexible_count = 0
|
|
for row in rows_list:
|
|
# Save entire row as flexible JSON
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=None,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=make_json_safe({k: v for k, v in (row or {}).items() if v not in (None, "")}),
|
|
)
|
|
)
|
|
flexible_count += 1
|
|
if flexible_count % 200 == 0:
|
|
db.commit()
|
|
db.commit()
|
|
total_imported += flexible_count
|
|
# Persist per-file result row
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "success",
|
|
"imported_count": flexible_count,
|
|
"errors": 0,
|
|
"message": f"Stored {flexible_count} rows as flexible data (no known model)",
|
|
"auto_mapping": {
|
|
"mapped_headers": {},
|
|
"unmapped_headers": list(headers),
|
|
"flexible_saved_rows": flexible_count,
|
|
},
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=audit_row.id,
|
|
file_type=file_type,
|
|
status="success",
|
|
imported_count=flexible_count,
|
|
errors=0,
|
|
message=f"Stored {flexible_count} rows as flexible data",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
except Exception as e:
|
|
db.rollback()
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "failed",
|
|
"message": f"Flexible import failed: {str(e)}"
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=audit_row.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message=f"Flexible import failed: {str(e)}",
|
|
details={}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
|
|
try:
|
|
# Reset file pointer
|
|
await file.seek(0)
|
|
|
|
# Import this file using auto-discovery mapping
|
|
model_class = CSV_MODEL_MAPPING[file_type]
|
|
|
|
content = await file.read()
|
|
# Save original upload to disk for potential reruns
|
|
saved_path = None
|
|
try:
|
|
file_path = audit_dir.joinpath(file_type)
|
|
with open(file_path, "wb") as fh:
|
|
fh.write(content)
|
|
saved_path = str(file_path)
|
|
except Exception:
|
|
saved_path = None
|
|
|
|
# Try multiple encodings for legacy CSV files
|
|
encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
if csv_content is None:
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "failed",
|
|
"message": "Could not decode CSV file encoding"
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=audit_row.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message="Could not decode CSV file encoding",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
|
|
# Handle CSV parsing issues with legacy files
|
|
rows_list, csv_headers = parse_csv_with_fallback(csv_content)
|
|
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
|
|
mapped_headers = mapping_info["mapped_headers"]
|
|
unmapped_headers = mapping_info["unmapped_headers"]
|
|
|
|
imported_count = 0
|
|
errors = []
|
|
flexible_saved = 0
|
|
|
|
# If replace_existing is True and this is the first file of this type
|
|
if replace_existing:
|
|
db.query(model_class).delete()
|
|
db.query(FlexibleImport).filter(
|
|
FlexibleImport.file_type == file_type,
|
|
FlexibleImport.target_table == model_class.__tablename__,
|
|
).delete()
|
|
db.commit()
|
|
|
|
for row_num, row in enumerate(rows_list, start=2):
|
|
try:
|
|
model_data = {}
|
|
for csv_field, db_field in mapped_headers.items():
|
|
if csv_field in row and db_field is not None:
|
|
converted_value = convert_value(row[csv_field], db_field)
|
|
if converted_value is not None:
|
|
model_data[db_field] = converted_value
|
|
|
|
if not any(model_data.values()):
|
|
continue
|
|
|
|
# Fallback: if required non-nullable fields are missing, store row as flexible only
|
|
required_fields = _get_required_fields(model_class)
|
|
missing_required = [f for f in required_fields if model_data.get(f) in (None, "")]
|
|
if missing_required:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=make_json_safe({
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"missing_required": missing_required,
|
|
}),
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
continue
|
|
|
|
# Special validation for models with required fields
|
|
if model_class == Phone:
|
|
if 'phone' not in model_data or not model_data['phone']:
|
|
continue # Skip phone records without a phone number
|
|
|
|
if model_class == Rolodex:
|
|
if 'last' not in model_data or not model_data['last']:
|
|
continue # Skip rolodex records without a last name/company name
|
|
|
|
if model_class == Ledger:
|
|
# Skip ledger records without required fields
|
|
if 'empl_num' not in model_data or not model_data['empl_num']:
|
|
continue # Skip ledger records without employee number
|
|
if 'file_no' not in model_data or not model_data['file_no']:
|
|
continue # Skip ledger records without file number
|
|
|
|
instance = model_class(**model_data)
|
|
db.add(instance)
|
|
db.flush()
|
|
|
|
# Link flexible extras
|
|
_, pk_names = _get_model_columns(model_class)
|
|
pk_field_name = pk_names[0] if len(pk_names) == 1 else None
|
|
pk_value = None
|
|
if pk_field_name:
|
|
try:
|
|
pk_value = getattr(instance, pk_field_name)
|
|
except Exception:
|
|
pk_value = None
|
|
extra_data = {}
|
|
for csv_field in unmapped_headers:
|
|
if csv_field in row and row[csv_field] not in (None, ""):
|
|
extra_data[csv_field] = row[csv_field]
|
|
if extra_data:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=pk_field_name,
|
|
primary_key_value=str(pk_value) if pk_value is not None else None,
|
|
extra_data=make_json_safe(extra_data),
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
imported_count += 1
|
|
|
|
if imported_count % 100 == 0:
|
|
db.commit()
|
|
|
|
except Exception as e:
|
|
# Rollback the transaction for this record
|
|
db.rollback()
|
|
# Persist row in flexible storage instead of counting as error only
|
|
try:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=make_json_safe({
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"error": str(e),
|
|
}),
|
|
)
|
|
)
|
|
flexible_saved += 1
|
|
except Exception as flex_e:
|
|
errors.append({
|
|
"row": row_num,
|
|
"error": f"{str(e)} | Flexible save failed: {str(flex_e)}",
|
|
})
|
|
continue
|
|
|
|
db.commit()
|
|
|
|
total_imported += imported_count
|
|
total_errors += len(errors)
|
|
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "success" if len(errors) == 0 else "completed_with_errors",
|
|
"imported_count": imported_count,
|
|
"errors": len(errors),
|
|
"message": f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
|
|
"auto_mapping": {
|
|
"mapped_headers": mapped_headers,
|
|
"unmapped_headers": unmapped_headers,
|
|
"flexible_saved_rows": flexible_saved,
|
|
},
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=audit_row.id,
|
|
file_type=file_type,
|
|
status="success" if len(errors) == 0 else "completed_with_errors",
|
|
imported_count=imported_count,
|
|
errors=len(errors),
|
|
message=f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
|
|
details={
|
|
"mapped_headers": list(mapped_headers.keys()),
|
|
"unmapped_count": len(unmapped_headers),
|
|
"flexible_saved_rows": flexible_saved,
|
|
**({"saved_path": saved_path} if saved_path else {}),
|
|
}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "failed",
|
|
"message": f"Import failed: {str(e)}"
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=audit_row.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message=f"Import failed: {str(e)}",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
summary = {
|
|
"total_files": len(files),
|
|
"successful_files": len([r for r in results if r["status"] in ["success", "completed_with_errors"]]),
|
|
"failed_files": len([r for r in results if r["status"] == "failed"]),
|
|
"total_imported": total_imported,
|
|
"total_errors": total_errors
|
|
}
|
|
|
|
# Finalize audit row
|
|
try:
|
|
audit_row.successful_files = summary["successful_files"]
|
|
audit_row.failed_files = summary["failed_files"]
|
|
audit_row.total_imported = summary["total_imported"]
|
|
audit_row.total_errors = summary["total_errors"]
|
|
audit_row.status = "success" if summary["failed_files"] == 0 and summary["total_errors"] == 0 else (
|
|
"completed_with_errors" if summary["successful_files"] > 0 else "failed"
|
|
)
|
|
audit_row.message = f"Batch import completed: {audit_row.successful_files}/{audit_row.total_files} files"
|
|
audit_row.finished_at = datetime.utcnow()
|
|
audit_row.details = {
|
|
"files": [
|
|
{"file_type": r.get("file_type"), "status": r.get("status"), "imported_count": r.get("imported_count", 0), "errors": r.get("errors", 0)}
|
|
for r in results
|
|
]
|
|
}
|
|
db.add(audit_row)
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
return {
|
|
"batch_results": results,
|
|
"summary": summary
|
|
}
|
|
|
|
|
|
@router.get("/recent-batches")
|
|
async def recent_batch_imports(
|
|
limit: int = Query(5, ge=1, le=50),
|
|
offset: int = Query(0, ge=0),
|
|
status: Optional[str] = Query(None, description="Filter by status: running|success|completed_with_errors|failed"),
|
|
start: Optional[str] = Query(None, description="ISO datetime start for started_at filter"),
|
|
end: Optional[str] = Query(None, description="ISO datetime end for started_at filter"),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Return recent batch import audit rows (most recent first) with optional filters and pagination."""
|
|
q = db.query(ImportAudit)
|
|
if status and status.lower() != "all":
|
|
q = q.filter(ImportAudit.status == status)
|
|
# Date range filters on started_at
|
|
try:
|
|
if start:
|
|
start_dt = datetime.fromisoformat(start)
|
|
q = q.filter(ImportAudit.started_at >= start_dt)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
if end:
|
|
end_dt = datetime.fromisoformat(end)
|
|
q = q.filter(ImportAudit.started_at <= end_dt)
|
|
except Exception:
|
|
pass
|
|
total = q.count()
|
|
rows = (
|
|
q.order_by(ImportAudit.started_at.desc())
|
|
.offset(offset)
|
|
.limit(limit)
|
|
.all()
|
|
)
|
|
def _row(r: ImportAudit):
|
|
return {
|
|
"id": r.id,
|
|
"started_at": r.started_at.isoformat() if r.started_at else None,
|
|
"finished_at": r.finished_at.isoformat() if r.finished_at else None,
|
|
"status": r.status,
|
|
"total_files": r.total_files,
|
|
"successful_files": r.successful_files,
|
|
"failed_files": r.failed_files,
|
|
"total_imported": r.total_imported,
|
|
"total_errors": r.total_errors,
|
|
"initiated_by": r.initiated_by_username,
|
|
"message": r.message,
|
|
}
|
|
return {"recent": [_row(r) for r in rows], "total": total, "limit": limit, "offset": offset}
|
|
|
|
|
|
@router.get("/recent-batches/{audit_id}")
|
|
async def get_batch_details(
|
|
audit_id: int,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Return a specific audit entry with per-file details."""
|
|
audit = db.query(ImportAudit).filter(ImportAudit.id == audit_id).first()
|
|
if not audit:
|
|
raise HTTPException(status_code=404, detail="Audit entry not found")
|
|
files = (
|
|
db.query(ImportAuditFile)
|
|
.filter(ImportAuditFile.audit_id == audit.id)
|
|
.order_by(ImportAuditFile.id.asc())
|
|
.all()
|
|
)
|
|
def _row(r: ImportAudit):
|
|
return {
|
|
"id": r.id,
|
|
"started_at": r.started_at.isoformat() if r.started_at else None,
|
|
"finished_at": r.finished_at.isoformat() if r.finished_at else None,
|
|
"status": r.status,
|
|
"total_files": r.total_files,
|
|
"successful_files": r.successful_files,
|
|
"failed_files": r.failed_files,
|
|
"total_imported": r.total_imported,
|
|
"total_errors": r.total_errors,
|
|
"initiated_by": r.initiated_by_username,
|
|
"message": r.message,
|
|
"details": r.details or {},
|
|
}
|
|
def _file(f: ImportAuditFile):
|
|
return {
|
|
"id": f.id,
|
|
"file_type": f.file_type,
|
|
"status": f.status,
|
|
"imported_count": f.imported_count,
|
|
"errors": f.errors,
|
|
"message": f.message,
|
|
"details": f.details or {},
|
|
"created_at": f.created_at.isoformat() if f.created_at else None,
|
|
}
|
|
return {"audit": _row(audit), "files": [_file(f) for f in files]}
|
|
|
|
|
|
@router.post("/recent-batches/{audit_id}/rerun-failed")
|
|
async def rerun_failed_files(
|
|
audit_id: int,
|
|
replace_existing: bool = Form(False),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""Re-run only failed files for a given audit. Creates a new audit entry for the rerun."""
|
|
prior = db.query(ImportAudit).filter(ImportAudit.id == audit_id).first()
|
|
if not prior:
|
|
raise HTTPException(status_code=404, detail="Audit entry not found")
|
|
failed_files: List[ImportAuditFile] = (
|
|
db.query(ImportAuditFile)
|
|
.filter(ImportAuditFile.audit_id == audit_id, ImportAuditFile.status == "failed")
|
|
.all()
|
|
)
|
|
if not failed_files:
|
|
raise HTTPException(status_code=400, detail="No failed files to rerun for this audit")
|
|
|
|
# Build list of (file_type, path) that exist
|
|
items: List[Tuple[str, str]] = []
|
|
for f in failed_files:
|
|
saved_path = None
|
|
try:
|
|
saved_path = (f.details or {}).get("saved_path")
|
|
except Exception:
|
|
saved_path = None
|
|
if saved_path and os.path.exists(saved_path):
|
|
items.append((f.file_type, saved_path))
|
|
if not items:
|
|
raise HTTPException(status_code=400, detail="No saved files available to rerun. Upload again.")
|
|
|
|
# Import order for sorting
|
|
import_order = [
|
|
"STATES.csv", "GRUPLKUP.csv", "EMPLOYEE.csv", "FILETYPE.csv", "FILESTAT.csv",
|
|
"TRNSTYPE.csv", "TRNSLKUP.csv", "FOOTERS.csv", "SETUP.csv", "PRINTERS.csv",
|
|
"ROLODEX.csv", "PHONE.csv", "FILES.csv", "LEDGER.csv", "TRNSACTN.csv",
|
|
"QDROS.csv", "PENSIONS.csv", "PLANINFO.csv", "PAYMENTS.csv", "DEPOSITS.csv",
|
|
"FILENOTS.csv", "FORM_INX.csv", "FORM_LST.csv", "FVARLKUP.csv", "RVARLKUP.csv"
|
|
]
|
|
order_index = {name: i for i, name in enumerate(import_order)}
|
|
items.sort(key=lambda x: order_index.get(x[0], len(import_order) + 1))
|
|
|
|
# Create new audit row for rerun
|
|
rerun_audit = ImportAudit(
|
|
status="running",
|
|
total_files=len(items),
|
|
successful_files=0,
|
|
failed_files=0,
|
|
total_imported=0,
|
|
total_errors=0,
|
|
initiated_by_user_id=getattr(current_user, "id", None),
|
|
initiated_by_username=getattr(current_user, "username", None),
|
|
message=f"Rerun failed files for audit #{audit_id}",
|
|
details={"rerun_of": audit_id},
|
|
)
|
|
db.add(rerun_audit)
|
|
db.commit()
|
|
db.refresh(rerun_audit)
|
|
|
|
# Directory to persist rerun files
|
|
rerun_dir = Path(settings.upload_dir).joinpath("import_audits", str(rerun_audit.id))
|
|
try:
|
|
rerun_dir.mkdir(parents=True, exist_ok=True)
|
|
except Exception:
|
|
pass
|
|
|
|
results: List[Dict[str, Any]] = []
|
|
total_imported = 0
|
|
total_errors = 0
|
|
|
|
for file_type, path in items:
|
|
try:
|
|
with open(path, "rb") as fh:
|
|
content = fh.read()
|
|
# Save a copy under the rerun audit
|
|
saved_path = None
|
|
try:
|
|
file_path = rerun_dir.joinpath(file_type)
|
|
with open(file_path, "wb") as out:
|
|
out.write(content)
|
|
saved_path = str(file_path)
|
|
except Exception:
|
|
saved_path = None
|
|
|
|
if file_type not in CSV_MODEL_MAPPING:
|
|
# Flexible-only path
|
|
encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
|
csv_content = None
|
|
for enc in encodings:
|
|
try:
|
|
csv_content = content.decode(enc)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
if csv_content is None:
|
|
results.append({"file_type": file_type, "status": "failed", "message": "Could not decode CSV file encoding"})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=rerun_audit.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message="Could not decode CSV file encoding",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
|
|
rows_list, _headers = parse_csv_with_fallback(csv_content)
|
|
flexible_count = 0
|
|
for row in rows_list:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=None,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=make_json_safe({k: v for k, v in (row or {}).items() if v not in (None, "")}),
|
|
)
|
|
)
|
|
flexible_count += 1
|
|
if flexible_count % 200 == 0:
|
|
db.commit()
|
|
db.commit()
|
|
total_imported += flexible_count
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "success",
|
|
"imported_count": flexible_count,
|
|
"errors": 0,
|
|
"message": f"Stored {flexible_count} rows as flexible data (no known model)",
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=rerun_audit.id,
|
|
file_type=file_type,
|
|
status="success",
|
|
imported_count=flexible_count,
|
|
errors=0,
|
|
message=f"Stored {flexible_count} rows as flexible data",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
|
|
# Known model path
|
|
model_class = CSV_MODEL_MAPPING[file_type]
|
|
encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
|
|
csv_content = None
|
|
for enc in encodings:
|
|
try:
|
|
csv_content = content.decode(enc)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
if csv_content is None:
|
|
results.append({"file_type": file_type, "status": "failed", "message": "Could not decode CSV file encoding"})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=rerun_audit.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message="Could not decode CSV file encoding",
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
continue
|
|
|
|
csv_reader = csv.DictReader(io.StringIO(csv_content), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
|
csv_headers = csv_reader.fieldnames or []
|
|
mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
|
|
mapped_headers = mapping_info["mapped_headers"]
|
|
unmapped_headers = mapping_info["unmapped_headers"]
|
|
imported_count = 0
|
|
errors: List[Dict[str, Any]] = []
|
|
|
|
if replace_existing:
|
|
db.query(model_class).delete()
|
|
db.query(FlexibleImport).filter(
|
|
FlexibleImport.file_type == file_type,
|
|
FlexibleImport.target_table == model_class.__tablename__,
|
|
).delete()
|
|
db.commit()
|
|
|
|
for row_num, row in enumerate(csv_reader, start=2):
|
|
try:
|
|
model_data: Dict[str, Any] = {}
|
|
for csv_field, db_field in mapped_headers.items():
|
|
if csv_field in row and db_field is not None:
|
|
converted_value = convert_value(row[csv_field], db_field)
|
|
if converted_value is not None:
|
|
model_data[db_field] = converted_value
|
|
if not any(model_data.values()):
|
|
continue
|
|
required_fields = _get_required_fields(model_class)
|
|
missing_required = [f for f in required_fields if model_data.get(f) in (None, "")]
|
|
if missing_required:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data={
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"missing_required": missing_required,
|
|
},
|
|
)
|
|
)
|
|
continue
|
|
|
|
if model_class == Phone and (not model_data.get('phone')):
|
|
continue
|
|
if model_class == Rolodex and (not model_data.get('last')):
|
|
continue
|
|
if model_class == Ledger and (not model_data.get('empl_num') or not model_data.get('file_no')):
|
|
continue
|
|
|
|
instance = model_class(**model_data)
|
|
db.add(instance)
|
|
db.flush()
|
|
|
|
_, pk_names = _get_model_columns(model_class)
|
|
pk_field_name = pk_names[0] if len(pk_names) == 1 else None
|
|
pk_value = None
|
|
if pk_field_name:
|
|
try:
|
|
pk_value = getattr(instance, pk_field_name)
|
|
except Exception:
|
|
pk_value = None
|
|
extra_data = {}
|
|
for csv_field in unmapped_headers:
|
|
if csv_field in row and row[csv_field] not in (None, ""):
|
|
extra_data[csv_field] = row[csv_field]
|
|
if extra_data:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=pk_field_name,
|
|
primary_key_value=str(pk_value) if pk_value is not None else None,
|
|
extra_data=extra_data,
|
|
)
|
|
)
|
|
imported_count += 1
|
|
if imported_count % 100 == 0:
|
|
db.commit()
|
|
except Exception as e:
|
|
db.rollback()
|
|
try:
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=model_class.__tablename__,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data={
|
|
"mapped": model_data,
|
|
"unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
|
|
"error": str(e),
|
|
},
|
|
)
|
|
)
|
|
except Exception:
|
|
errors.append({"row": row_num, "error": str(e)})
|
|
continue
|
|
|
|
db.commit()
|
|
total_imported += imported_count
|
|
total_errors += len(errors)
|
|
results.append({
|
|
"file_type": file_type,
|
|
"status": "success" if len(errors) == 0 else "completed_with_errors",
|
|
"imported_count": imported_count,
|
|
"errors": len(errors),
|
|
"message": f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
|
|
})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=rerun_audit.id,
|
|
file_type=file_type,
|
|
status="success" if len(errors) == 0 else "completed_with_errors",
|
|
imported_count=imported_count,
|
|
errors=len(errors),
|
|
message=f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
|
|
details={"saved_path": saved_path} if saved_path else {}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
results.append({"file_type": file_type, "status": "failed", "message": f"Import failed: {str(e)}"})
|
|
try:
|
|
db.add(ImportAuditFile(
|
|
audit_id=rerun_audit.id,
|
|
file_type=file_type,
|
|
status="failed",
|
|
imported_count=0,
|
|
errors=1,
|
|
message=f"Import failed: {str(e)}",
|
|
details={}
|
|
))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
# Finalize rerun audit
|
|
summary = {
|
|
"total_files": len(items),
|
|
"successful_files": len([r for r in results if r["status"] in ["success", "completed_with_errors"]]),
|
|
"failed_files": len([r for r in results if r["status"] == "failed"]),
|
|
"total_imported": total_imported,
|
|
"total_errors": total_errors,
|
|
}
|
|
try:
|
|
rerun_audit.successful_files = summary["successful_files"]
|
|
rerun_audit.failed_files = summary["failed_files"]
|
|
rerun_audit.total_imported = summary["total_imported"]
|
|
rerun_audit.total_errors = summary["total_errors"]
|
|
rerun_audit.status = "success" if summary["failed_files"] == 0 and summary["total_errors"] == 0 else (
|
|
"completed_with_errors" if summary["successful_files"] > 0 else "failed"
|
|
)
|
|
rerun_audit.message = f"Rerun completed: {rerun_audit.successful_files}/{rerun_audit.total_files} files"
|
|
rerun_audit.finished_at = datetime.utcnow()
|
|
rerun_audit.details = {"rerun_of": audit_id}
|
|
db.add(rerun_audit)
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
|
|
return {"batch_results": results, "summary": summary, "rerun_audit_id": rerun_audit.id}
|
|
|
|
@router.post("/upload-flexible")
|
|
async def upload_flexible_only(
|
|
file: UploadFile = UploadFileForm(...),
|
|
replace_existing: bool = Form(False),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
"""Flexible-only single-file upload.
|
|
|
|
Accepts any CSV and stores each row as a `FlexibleImport` record with `target_table=None`.
|
|
"""
|
|
# Ensure CSV
|
|
if not file.filename or not file.filename.lower().endswith(".csv"):
|
|
raise HTTPException(status_code=400, detail="File must be a CSV file")
|
|
|
|
file_type = file.filename
|
|
|
|
try:
|
|
# Optionally clear prior flexible rows for this file_type
|
|
if replace_existing:
|
|
db.query(FlexibleImport).filter(
|
|
FlexibleImport.file_type == file_type,
|
|
FlexibleImport.target_table == None, # noqa: E711
|
|
).delete()
|
|
db.commit()
|
|
|
|
content = await file.read()
|
|
encodings = ["utf-8-sig", "utf-8", "windows-1252", "iso-8859-1", "cp1252"]
|
|
csv_content = None
|
|
for encoding in encodings:
|
|
try:
|
|
csv_content = content.decode(encoding)
|
|
break
|
|
except UnicodeDecodeError:
|
|
continue
|
|
if csv_content is None:
|
|
raise HTTPException(status_code=400, detail="Could not decode CSV file encoding")
|
|
|
|
reader = csv.DictReader(io.StringIO(csv_content), delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
|
headers = reader.fieldnames or []
|
|
|
|
imported_count = 0
|
|
for row in reader:
|
|
payload = {k: v for k, v in (row or {}).items() if v not in (None, "")}
|
|
db.add(
|
|
FlexibleImport(
|
|
file_type=file_type,
|
|
target_table=None,
|
|
primary_key_field=None,
|
|
primary_key_value=None,
|
|
extra_data=payload,
|
|
)
|
|
)
|
|
imported_count += 1
|
|
if imported_count % 200 == 0:
|
|
db.commit()
|
|
|
|
db.commit()
|
|
|
|
return {
|
|
"file_type": file_type,
|
|
"imported_count": imported_count,
|
|
"errors": [],
|
|
"total_errors": 0,
|
|
"auto_mapping": {
|
|
"mapped_headers": {},
|
|
"unmapped_headers": list(headers),
|
|
"flexible_saved_rows": imported_count,
|
|
},
|
|
"message": f"Stored {imported_count} rows as flexible data (no known model)",
|
|
}
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
db.rollback()
|
|
raise HTTPException(status_code=500, detail=f"Flexible upload failed: {str(e)}") |