delphi-database/app/api/import_data.py

"""
Data import API endpoints for CSV file uploads with auto-discovery mapping.
"""
import csv
import io
import re
import os
from pathlib import Path
from difflib import SequenceMatcher
from datetime import datetime, date
from decimal import Decimal
from typing import List, Dict, Any, Optional, Tuple
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File as UploadFileForm, Form, Query
from sqlalchemy.orm import Session
from app.database.base import get_db
from app.auth.security import get_current_user
from app.models.user import User
from app.models.rolodex import Rolodex, Phone
from app.models.files import File
from app.models.ledger import Ledger
from app.models.qdro import QDRO
from app.models.pensions import Pension, PensionSchedule, MarriageHistory, DeathBenefit, SeparationAgreement, LifeTable, NumberTable
from app.models.lookups import Employee, FileType, FileStatus, TransactionType, TransactionCode, State, GroupLookup, Footer, PlanInfo, FormIndex, FormList, PrinterSetup, SystemSetup
from app.models.additional import Payment, Deposit, FileNote, FormVariable, ReportVariable
from app.models.flexible import FlexibleImport
from app.models.audit import ImportAudit, ImportAuditFile
from app.config import settings

router = APIRouter(tags=["import"])


# CSV to Model mapping
CSV_MODEL_MAPPING = {
    "ROLODEX.csv": Rolodex,
    "ROLEX_V.csv": Rolodex,  # Legacy/view alias
    "PHONE.csv": Phone,
    "FILES.csv": File,
    "FILES_R.csv": File,  # Legacy/report alias
    "FILES_V.csv": File,  # Legacy/view alias
    "LEDGER.csv": Ledger,
    "QDROS.csv": QDRO,
    "PENSIONS.csv": Pension,
    "SCHEDULE.csv": PensionSchedule,
    "MARRIAGE.csv": MarriageHistory,
    "DEATH.csv": DeathBenefit,
    "SEPARATE.csv": SeparationAgreement,
    "LIFETABL.csv": LifeTable,
    "NUMBERAL.csv": NumberTable,
    "EMPLOYEE.csv": Employee,
    "FILETYPE.csv": FileType,
    "FILESTAT.csv": FileStatus,
    "TRNSTYPE.csv": TransactionType,
    "TRNSLKUP.csv": TransactionCode,
    "STATES.csv": State,
    "GRUPLKUP.csv": GroupLookup,
    "FOOTERS.csv": Footer,
    "PLANINFO.csv": PlanInfo,
    # Legacy alternate names from export directories
    "SCHEDULE.csv": PensionSchedule,
    "FORM_INX.csv": FormIndex,
    "FORM_LST.csv": FormList,
    "PRINTERS.csv": PrinterSetup,
    "SETUP.csv": SystemSetup,
    # Additional models for complete legacy coverage
    "DEPOSITS.csv": Deposit,
    "FILENOTS.csv": FileNote,
    "FVARLKUP.csv": FormVariable,
    "RVARLKUP.csv": ReportVariable,
    "PAYMENTS.csv": Payment,
    "TRNSACTN.csv": Ledger  # Maps to existing Ledger model (same structure)
}

# Field mappings for CSV columns to database fields
# Legacy header synonyms used as hints only (not required). Auto-discovery will work without exact matches.
FIELD_MAPPINGS = {
    "ROLODEX.csv": {
        "Id": "id",
        "Prefix": "prefix",
        "First": "first",
        "Middle": "middle",
        "Last": "last",
        "Suffix": "suffix",
        "Title": "title",
        "A1": "a1",
        "A2": "a2",
        "A3": "a3",
        "City": "city",
        "Abrev": "abrev",
        "St": None,  # Full state name - skip this field as model only has abrev
        "Zip": "zip",
        "Email": "email",
        "DOB": "dob",
        "SS#": "ss_number",
        "Legal_Status": "legal_status",
        "Group": "group",
        "Memo": "memo"
    },
    "PHONE.csv": {
        "Id": "rolodex_id",
        "Phone": "phone",
        "Location": "location"
    },
    "FILES.csv": {
        "File_No": "file_no",
        "Id": "id",
        "File_Type": "file_type",
        "Regarding": "regarding",
        "Opened": "opened",
        "Closed": "closed",
        "Empl_Num": "empl_num",
        "Rate_Per_Hour": "rate_per_hour",
        "Status": "status",
        "Footer_Code": "footer_code",
        "Opposing": "opposing",
        "Hours": "hours",
        "Hours_P": "hours_p",
        "Trust_Bal": "trust_bal",
        "Trust_Bal_P": "trust_bal_p",
        "Hourly_Fees": "hourly_fees",
        "Hourly_Fees_P": "hourly_fees_p",
        "Flat_Fees": "flat_fees",
        "Flat_Fees_P": "flat_fees_p",
        "Disbursements": "disbursements",
        "Disbursements_P": "disbursements_p",
        "Credit_Bal": "credit_bal",
        "Credit_Bal_P": "credit_bal_p",
        "Total_Charges": "total_charges",
        "Total_Charges_P": "total_charges_p",
        "Amount_Owing": "amount_owing",
        "Amount_Owing_P": "amount_owing_p",
        "Transferable": "transferable",
        "Memo": "memo"
    },
    "LEDGER.csv": {
        "File_No": "file_no",
        "Date": "date",
        "Item_No": "item_no",
        "Empl_Num": "empl_num",
        "T_Code": "t_code",
        "T_Type": "t_type",
        "T_Type_L": "t_type_l",
        "Quantity": "quantity",
        "Rate": "rate",
        "Amount": "amount",
        "Billed": "billed",
        "Note": "note"
    },
    "QDROS.csv": {
        "File_No": "file_no",
        "Version": "version",
        "Plan_Id": "plan_id",
        "^1": "field1",
        "^2": "field2",
        "^Part": "part",
        "^AltP": "altp",
        "^Pet": "pet",
        "^Res": "res",
        "Case_Type": "case_type",
        "Case_Code": "case_code",
        "Section": "section",
        "Case_Number": "case_number",
        "Judgment_Date": "judgment_date",
        "Valuation_Date": "valuation_date",
        "Married_On": "married_on",
        "Percent_Awarded": "percent_awarded",
        "Ven_City": "ven_city",
        "Ven_Cnty": "ven_cnty",
        "Ven_St": "ven_st",
        "Draft_Out": "draft_out",
        "Draft_Apr": "draft_apr",
        "Final_Out": "final_out",
        "Judge": "judge",
        "Form_Name": "form_name"
    },
    "PENSIONS.csv": {
        "File_No": "file_no",
        "Version": "version",
        "Plan_Id": "plan_id",
        "Plan_Name": "plan_name",
        "Title": "title",
        "First": "first",
        "Last": "last",
        "Birth": "birth",
        "Race": "race",
        "Sex": "sex",
        "Info": "info",
        "Valu": "valu",
        "Accrued": "accrued",
        "Vested_Per": "vested_per",
        "Start_Age": "start_age",
        "COLA": "cola",
        "Max_COLA": "max_cola",
        "Withdrawal": "withdrawal",
        "Pre_DR": "pre_dr",
        "Post_DR": "post_dr",
        "Tax_Rate": "tax_rate"
    },
    "EMPLOYEE.csv": {
        "Empl_Num": "empl_num",
        "Rate_Per_Hour": "rate_per_hour"
        # "Empl_Id": not a field in Employee model, using empl_num as identifier
        # Model has additional fields (first_name, last_name, title, etc.) not in CSV
    },
    "STATES.csv": {
        "Abrev": "abbreviation",
        "St": "name"
    },
    "GRUPLKUP.csv": {
        "Code": "group_code",
        "Description": "description"
        # "Title": field not present in model, skipping
    },
    "TRNSLKUP.csv": {
        "T_Code": "t_code",
        "T_Type": "t_type",
        # "T_Type_L": not a field in TransactionCode model
        "Amount": "default_rate",
        "Description": "description"
    },
    "TRNSTYPE.csv": {
        "T_Type": "t_type",
        "T_Type_L": "description"
        # "Header": maps to debit_credit but needs data transformation
        # "Footer": doesn't align with active boolean field
        # These fields may need custom handling or model updates
    },
    "FILETYPE.csv": {
        "File_Type": "type_code",
        "Description": "description",
        "Default_Rate": "default_rate"
    },
    "FILESTAT.csv": {
        "Status_Code": "status_code",
        "Description": "description",
        "Sort_Order": "sort_order"
    },
    "FOOTERS.csv": {
        "F_Code": "footer_code",
        "F_Footer": "content"
        # Description is optional - not required for footers
    },
    "PLANINFO.csv": {
        "Plan_Id": "plan_id",
        "Plan_Name": "plan_name",
        "Plan_Type": "plan_type",
        "Sponsor": "sponsor",
        "Administrator": "administrator",
        "Address1": "address1",
        "Address2": "address2",
        "City": "city",
        "State": "state",
        "Zip_Code": "zip_code",
        "Phone": "phone",
        "Notes": "notes"
    },
    "FORM_INX.csv": {
        "Form_Id": "form_id",
        "Form_Name": "form_name",
        "Category": "category"
    },
    "FORM_LST.csv": {
        "Form_Id": "form_id",
        "Line_Number": "line_number",
        "Content": "content"
    },
    "PRINTERS.csv": {
        "Printer_Name": "printer_name",
        "Description": "description",
        "Driver": "driver",
        "Port": "port",
        "Default_Printer": "default_printer"
    },
    "SETUP.csv": {
        "Setting_Key": "setting_key",
        "Setting_Value": "setting_value",
        "Description": "description",
        "Setting_Type": "setting_type"
    },
    "SCHEDULE.csv": {
        "File_No": "file_no",
        "Version": "version",
        "Vests_On": "vests_on",
        "Vests_At": "vests_at"
    },
    "MARRIAGE.csv": {
        "File_No": "file_no",
        "Version": "version",
        "Marriage_Date": "marriage_date",
        "Separation_Date": "separation_date",
        "Divorce_Date": "divorce_date"
    },
    "DEATH.csv": {
        "File_No": "file_no",
        "Version": "version",
        "Benefit_Type": "benefit_type",
        "Benefit_Amount": "benefit_amount",
        "Beneficiary": "beneficiary"
    },
    "SEPARATE.csv": {
        "File_No": "file_no",
        "Version": "version",
        "Agreement_Date": "agreement_date",
        "Terms": "terms"
    },
    "LIFETABL.csv": {
        "Age": "age",
        "Male_Mortality": "male_mortality",
        "Female_Mortality": "female_mortality"
    },
    "NUMBERAL.csv": {
        "Table_Name": "table_name",
        "Age": "age",
        "Value": "value"
    },
    # Additional CSV file mappings
    "DEPOSITS.csv": {
        "Deposit_Date": "deposit_date",
        "Total": "total"
    },
    "FILENOTS.csv": {
        "File_No": "file_no",
        "Memo_Date": "memo_date",
        "Memo_Note": "memo_note"
    },
    "FVARLKUP.csv": {
        "Identifier": "identifier",
        "Query": "query",
        "Response": "response"
    },
    "RVARLKUP.csv": {
        "Identifier": "identifier",
        "Query": "query"
    },
    "PAYMENTS.csv": {
        "Deposit_Date": "deposit_date",
        "File_No": "file_no",
        "Id": "client_id",
        "Regarding": "regarding",
        "Amount": "amount",
        "Note": "note"
    },
    "TRNSACTN.csv": {
        # Maps to Ledger model - same structure as LEDGER.csv
        "File_No": "file_no",
        "Date": "date",
        "Item_No": "item_no",
        "Empl_Num": "empl_num",
        "T_Code": "t_code",
        "T_Type": "t_type",
        "T_Type_L": "t_type_l",
        "Quantity": "quantity",
        "Rate": "rate",
        "Amount": "amount",
        "Billed": "billed",
        "Note": "note"
    }
}


def parse_date(date_str: str) -> Optional[datetime]:
    """Parse date string in various formats"""
    if not date_str or date_str.strip() == "":
        return None

    date_formats = [
        "%Y-%m-%d",
        "%m/%d/%Y",
        "%d/%m/%Y",
        "%m-%d-%Y",
        "%d-%m-%Y",
        "%Y/%m/%d"
    ]

    for fmt in date_formats:
        try:
            return datetime.strptime(date_str.strip(), fmt).date()
        except ValueError:
            continue

    return None


def make_json_safe(value: Any) -> Any:
    """Recursively convert values to JSON-serializable types.

    - date/datetime -> ISO string
    - Decimal -> float
    - dict/list -> recurse
    """
    if isinstance(value, (datetime, date)):
        return value.isoformat()
    if isinstance(value, Decimal):
        try:
            return float(value)
        except Exception:
            return str(value)
    if isinstance(value, dict):
        return {k: make_json_safe(v) for k, v in value.items()}
    if isinstance(value, list):
        return [make_json_safe(v) for v in value]
    return value


def parse_csv_robust(csv_content: str) -> Tuple[List[Dict[str, str]], List[str]]:
    """Parse CSV text robustly by handling broken newlines in unquoted fields.

    Returns tuple of (rows_as_dicts, headers)
    """
    lines = (csv_content or "").strip().split('\n')
    if not lines or (len(lines) == 1 and not lines[0].strip()):
        return [], []

    # Parse headers using the csv module to respect quoting
    header_reader = csv.reader(io.StringIO(lines[0]))
    headers = next(header_reader)
    headers = [h.strip() for h in headers]

    rows_data: List[Dict[str, str]] = []
    for line_num, line in enumerate(lines[1:], start=2):
        # Skip empty lines
        if not line.strip():
            continue
        try:
            # Parse each line independently; avoids multiline parse explosions
            line_reader = csv.reader(io.StringIO(line))
            fields = next(line_reader)
            fields = [f.strip() for f in fields]

            # If clearly malformed (too few fields), skip
            if len(fields) < max(1, len(headers) // 2):
                continue

            # Pad or truncate to header length
            while len(fields) < len(headers):
                fields.append("")
            fields = fields[:len(headers)]

            row_dict = dict(zip(headers, fields))
            rows_data.append(row_dict)
        except Exception:
            # Skip malformed row
            continue

    return rows_data, headers


def parse_csv_with_fallback(text: str) -> Tuple[List[Dict[str, str]], List[str]]:
    """Try csv.DictReader first; on failure, fall back to robust parser."""
    try:
        reader = csv.DictReader(io.StringIO(text), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        headers_local = reader.fieldnames or []
        rows_local: List[Dict[str, str]] = []
        for r in reader:
            rows_local.append(r)
        return rows_local, headers_local
    except Exception:
        return parse_csv_robust(text)


def _normalize_label(label: str) -> str:
    """Normalize a header/field label for fuzzy comparison."""
    if not label:
        return ""
    # Lowercase, replace separators with space, remove non-alphanumerics, expand common short forms
    lowered = label.strip().lower()
    # Replace separators
    lowered = re.sub(r"[\s\-]+", "_", lowered)
    # Remove non-word characters except underscore
    lowered = re.sub(r"[^a-z0-9_]", "", lowered)
    # Expand a few common abbreviations
    replacements = {
        "num": "number",
        "no": "number",
        "amt": "amount",
        "addr": "address",
        "st": "state",
        "dob": "dateofbirth",
        "ss": "ssnumber",
    }
    tokens = [replacements.get(t, t) for t in lowered.split("_") if t]
    return "".join(tokens)


def _get_model_columns(model_class) -> Tuple[Dict[str, Any], List[str]]:
    """Return model columns mapping name->Column and list of primary key column names."""
    columns = {}
    pk_names = []
    for col in model_class.__table__.columns:
        if col.name in {"created_at", "updated_at"}:
            continue
        columns[col.name] = col
        if col.primary_key:
            pk_names.append(col.name)
    return columns, pk_names


def _build_dynamic_mapping(headers: List[str], model_class, file_type: str) -> Dict[str, Any]:
    """Create a mapping from CSV headers to model fields using synonyms and fuzzy similarity.

    Returns a dict with keys: mapping (csv_header->db_field), suggestions, unmapped_headers, mapped_headers
    """
    model_columns, _ = _get_model_columns(model_class)
    model_field_names = list(model_columns.keys())

    # Start with legacy mapping hints when available
    legacy_map = FIELD_MAPPINGS.get(file_type, {}) or {}

    mapping: Dict[str, Optional[str]] = {}
    suggestions: Dict[str, List[Tuple[str, float]]] = {}
    used_db_fields: set[str] = set()

    # 1) Exact legacy header key usage
    for header in headers:
        if header in legacy_map and legacy_map[header] is not None:
            candidate = legacy_map[header]
            if candidate in model_field_names and candidate not in used_db_fields:
                mapping[header] = candidate
                used_db_fields.add(candidate)

    # 2) Direct exact match against model fields (case-insensitive and normalized)
    normalized_model = {name: _normalize_label(name) for name in model_field_names}
    normalized_to_model = {v: k for k, v in normalized_model.items()}

    for header in headers:
        if header in mapping:
            continue
        normalized_header = _normalize_label(header)
        if normalized_header in normalized_to_model:
            candidate = normalized_to_model[normalized_header]
            if candidate not in used_db_fields:
                mapping[header] = candidate
                used_db_fields.add(candidate)

    # 3) Fuzzy best-match based on normalized strings
    for header in headers:
        if header in mapping:
            continue
        normalized_header = _normalize_label(header)
        best_candidate = None
        best_score = 0.0
        candidate_list: List[Tuple[str, float]] = []
        for model_field in model_field_names:
            if model_field in used_db_fields:
                continue
            nm = normalized_model[model_field]
            if not nm or not normalized_header:
                score = 0.0
            else:
                # Combine ratio and partial containment heuristic
                ratio = SequenceMatcher(None, normalized_header, nm).ratio()
                containment = 1.0 if (normalized_header in nm or nm in normalized_header) else 0.0
                score = max(ratio, 0.85 if containment else 0.0)
            candidate_list.append((model_field, score))
            if score > best_score:
                best_score = score
                best_candidate = model_field
        # Keep top 3 suggestions for UI
        suggestions[header] = sorted(candidate_list, key=lambda x: x[1], reverse=True)[:3]
        # Apply only if score above threshold
        if best_candidate and best_score >= 0.82:
            mapping[header] = best_candidate
            used_db_fields.add(best_candidate)

    # 4) Any header explicitly mapped to None in legacy map is considered intentionally skipped
    for header in headers:
        if header not in mapping and header in legacy_map and legacy_map[header] is None:
            mapping[header] = None

    mapped_headers = {h: f for h, f in mapping.items() if f is not None}
    unmapped_headers = [h for h in headers if h not in mapping or mapping[h] is None]

    return {
        "mapping": mapping,
        "mapped_headers": mapped_headers,
        "unmapped_headers": unmapped_headers,
        "suggestions": suggestions,
    }


def _get_required_fields(model_class) -> List[str]:
    """Infer required (non-nullable) fields for a model to avoid DB errors.

    Excludes primary keys (which might be autoincrement or provided) and timestamp mixins.
    """
    required = []
    for col in model_class.__table__.columns:
        if col.name in {"created_at", "updated_at"}:
            continue
        if col.primary_key:
            # If PK is a string or composite, we cannot assume optional; handle separately
            continue
        try:
            is_required = not getattr(col, "nullable", True)
        except Exception:
            is_required = False
        if is_required:
            required.append(col.name)
    return required


def convert_value(value: str, field_name: str) -> Any:
    """Convert string value to appropriate type based on field name"""
    if not value or value.strip() == "" or value.strip().lower() in ["null", "none", "n/a"]:
        return None

    value = value.strip()

    # Date fields
    if any(word in field_name.lower() for word in ["date", "dob", "birth", "opened", "closed", "judgment", "valuation", "married", "vests_on"]):
        parsed_date = parse_date(value)
        return parsed_date

    # Boolean fields
    if any(word in field_name.lower() for word in ["active", "default_printer", "billed", "transferable"]):
        if value.lower() in ["true", "1", "yes", "y", "on", "active"]:
            return True
        elif value.lower() in ["false", "0", "no", "n", "off", "inactive"]:
            return False
        else:
            return None

    # Numeric fields (float)
    if any(word in field_name.lower() for word in ["rate", "hour", "bal", "fee", "amount", "owing", "transfer", "valu", "accrued", "vested", "cola", "tax", "percent", "benefit_amount", "mortality", "value"]):
        try:
            # Remove currency symbols and commas
            cleaned_value = value.replace("$", "").replace(",", "").replace("%", "")
            return float(cleaned_value)
        except ValueError:
            return 0.0

    # Integer fields
    if any(word in field_name.lower() for word in ["item_no", "age", "start_age", "version", "line_number", "sort_order", "empl_num"]):
        try:
            return int(float(value))  # Handle cases like "1.0"
        except ValueError:
            # For employee numbers, return None to skip the record rather than 0
            if "empl_num" in field_name.lower():
                return None
            return 0

    # String fields - limit length to prevent database errors
    if len(value) > 500:  # Reasonable limit for most string fields
        return value[:500]

    return value


def validate_foreign_keys(model_data: dict, model_class, db: Session) -> list[str]:
    """Validate foreign key relationships before inserting data"""
    errors = []

    # Check Phone -> Rolodex relationship
    if model_class == Phone and "rolodex_id" in model_data:
        rolodex_id = model_data["rolodex_id"]
        if rolodex_id and not db.query(Rolodex).filter(Rolodex.id == rolodex_id).first():
            errors.append(f"Rolodex ID '{rolodex_id}' not found")

    # Check File -> Rolodex relationship
    if model_class == File and "id" in model_data:
        rolodex_id = model_data["id"]
        if rolodex_id and not db.query(Rolodex).filter(Rolodex.id == rolodex_id).first():
            errors.append(f"Owner Rolodex ID '{rolodex_id}' not found")

    # Add more foreign key validations as needed
    return errors


@router.get("/available-files")
async def get_available_csv_files(current_user: User = Depends(get_current_user)):
    """Get list of available CSV files for import"""
    return {
        "available_files": list(CSV_MODEL_MAPPING.keys()),
        "descriptions": {
            "ROLODEX.csv": "Customer/contact information",
            "PHONE.csv": "Phone numbers linked to customers",
            "FILES.csv": "Client files and cases",
            "LEDGER.csv": "Financial transactions per file",
            "QDROS.csv": "Legal documents and court orders",
            "PENSIONS.csv": "Pension calculation data",
            "EMPLOYEE.csv": "Staff and employee information",
            "STATES.csv": "US States lookup table",
            "FILETYPE.csv": "File type categories",
            "FILESTAT.csv": "File status codes",
            "FOOTERS.csv": "Document footers and signatures",
            "DEPOSITS.csv": "Daily bank deposit summaries",
            "FILENOTS.csv": "File notes and case memos",
            "FVARLKUP.csv": "Form template variables",
            "RVARLKUP.csv": "Report template variables",
            "PAYMENTS.csv": "Individual payments within deposits",
            "TRNSACTN.csv": "Transaction details (maps to Ledger)"
        },
        "auto_discovery": True
    }


@router.post("/upload/{file_type}")
async def import_csv_data(
    file_type: str,
    file: UploadFile = UploadFileForm(...),
    replace_existing: bool = Form(False),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
):
    """Import data from CSV file"""

    # Validate file type
    if file_type not in CSV_MODEL_MAPPING:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported file type: {file_type}. Available types: {list(CSV_MODEL_MAPPING.keys())}"
        )

    # Validate file extension
    if not file.filename.endswith('.csv'):
        raise HTTPException(status_code=400, detail="File must be a CSV file")

    model_class = CSV_MODEL_MAPPING[file_type]
    # Legacy mapping hints used internally by auto-discovery; not used strictly
    legacy_hint_map = FIELD_MAPPINGS.get(file_type, {})

    try:
        # Read CSV content
        content = await file.read()

        # Try multiple encodings for legacy CSV files
        encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
        csv_content = None
        for encoding in encodings:
            try:
                csv_content = content.decode(encoding)
                break
            except UnicodeDecodeError:
                continue

        if csv_content is None:
            raise HTTPException(status_code=400, detail="Could not decode CSV file. Please ensure it's saved in UTF-8, Windows-1252, or ISO-8859-1 encoding.")

        # Preprocess CSV content to fix common legacy issues
        def preprocess_csv(content):
            lines = content.split('\n')
            cleaned_lines = []
            i = 0

            while i < len(lines):
                line = lines[i]
                # If line doesn't have the expected number of commas, it might be a broken multi-line field
                if i == 0:  # Header line
                    cleaned_lines.append(line)
                    expected_comma_count = line.count(',')
                    i += 1
                    continue

                # Check if this line has the expected number of commas
                if line.count(',') < expected_comma_count:
                    # This might be a continuation of the previous line
                    # Try to merge with previous line
                    if cleaned_lines:
                        cleaned_lines[-1] += " " + line.replace('\n', ' ').replace('\r', ' ')
                    else:
                        cleaned_lines.append(line)
                else:
                    cleaned_lines.append(line)
                i += 1

            return '\n'.join(cleaned_lines)

        # Custom robust parser for problematic legacy CSV files
        class MockCSVReader:
            def __init__(self, data, fieldnames):
                self.data = data
                self.fieldnames = fieldnames
                self.index = 0

            def __iter__(self):
                return self

            def __next__(self):
                if self.index >= len(self.data):
                    raise StopIteration
                row = self.data[self.index]
                self.index += 1
                return row

        try:
            lines = csv_content.strip().split('\n')
            if not lines:
                raise ValueError("Empty CSV file")

            # Parse header using proper CSV parsing
            header_reader = csv.reader(io.StringIO(lines[0]))
            headers = next(header_reader)
            headers = [h.strip() for h in headers]
            print(f"DEBUG: Found {len(headers)} headers: {headers}")
            # Build dynamic header mapping for this file/model
            mapping_info = _build_dynamic_mapping(headers, model_class, file_type)

            # Parse data rows with proper CSV parsing
            rows_data = []
            skipped_rows = 0

            for line_num, line in enumerate(lines[1:], start=2):
                # Skip empty lines
                if not line.strip():
                    continue

                try:
                    # Use proper CSV parsing to handle commas within quoted fields
                    line_reader = csv.reader(io.StringIO(line))
                    fields = next(line_reader)
                    fields = [f.strip() for f in fields]

                    # Skip rows that are clearly malformed (too few fields)
                    if len(fields) < len(headers) // 2:  # Less than half the expected fields
                        skipped_rows += 1
                        continue

                    # Pad or truncate to match header length
                    while len(fields) < len(headers):
                        fields.append('')
                    fields = fields[:len(headers)]

                    row_dict = dict(zip(headers, fields))
                    rows_data.append(row_dict)

                except Exception as row_error:
                    print(f"Skipping malformed row {line_num}: {row_error}")
                    skipped_rows += 1
                    continue

            csv_reader = MockCSVReader(rows_data, headers)
            print(f"SUCCESS: Parsed {len(rows_data)} rows (skipped {skipped_rows} malformed rows)")

        except Exception as e:
            print(f"Custom parsing failed: {e}")
            raise HTTPException(status_code=400, detail=f"Could not parse CSV file. The file appears to have serious formatting issues. Error: {str(e)}")

        imported_count = 0
        errors = []
        flexible_saved = 0
        mapped_headers = mapping_info.get("mapped_headers", {})
        unmapped_headers = mapping_info.get("unmapped_headers", [])

        # If replace_existing is True, delete all existing records and related flexible extras
        if replace_existing:
            db.query(model_class).delete()
            db.query(FlexibleImport).filter(
                FlexibleImport.file_type == file_type,
                FlexibleImport.target_table == model_class.__tablename__,
            ).delete()
            db.commit()

        for row_num, row in enumerate(csv_reader, start=2):  # Start at 2 for header row
            try:
                # Convert CSV row to model data
                model_data: Dict[str, Any] = {}
                # Apply discovered mapping
                for csv_field, db_field in mapped_headers.items():
                    if csv_field in row and db_field is not None:
                        converted_value = convert_value(row[csv_field], db_field)
                        if converted_value is not None:
                            model_data[db_field] = converted_value

                # Skip empty rows
                if not any(model_data.values()):
                    continue

                # Fallback: if required non-nullable fields are missing, store row as flexible only
                required_fields = _get_required_fields(model_class)
                missing_required = [f for f in required_fields if model_data.get(f) in (None, "")]
                if missing_required:
                    db.add(
                        FlexibleImport(
                            file_type=file_type,
                            target_table=model_class.__tablename__,
                            primary_key_field=None,
                            primary_key_value=None,
                            extra_data={
                                "mapped": model_data,
                                "unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
                                "missing_required": missing_required,
                            },
                        )
                    )
                    flexible_saved += 1
                    # Do not attempt to insert into strict model; continue to next row
                    continue

                # Special validation for models with required fields
                if model_class == Phone:
                    if 'phone' not in model_data or not model_data['phone']:
                        continue  # Skip phone records without a phone number

                if model_class == Rolodex:
                    if 'last' not in model_data or not model_data['last']:
                        continue  # Skip rolodex records without a last name/company name

                if model_class == Ledger:
                    # Skip ledger records without required fields
                    if 'empl_num' not in model_data or not model_data['empl_num']:
                        continue  # Skip ledger records without employee number
                    if 'file_no' not in model_data or not model_data['file_no']:
                        continue  # Skip ledger records without file number

                # Create model instance
                instance = model_class(**model_data)
                db.add(instance)
                db.flush()  # Ensure PK is available

                # Capture PK details for flexible storage linkage (single-column PKs only)
                _, pk_names = _get_model_columns(model_class)
                pk_field_name = pk_names[0] if len(pk_names) == 1 else None
                pk_value = None
                if pk_field_name:
                    try:
                        pk_value = getattr(instance, pk_field_name)
                    except Exception:
                        pk_value = None

                # Save unmapped fields into flexible storage (privacy-first, per-row JSON)
                extra_data = {}
                for csv_field in unmapped_headers:
                    if csv_field in row and row[csv_field] not in (None, ""):
                        extra_data[csv_field] = row[csv_field]
                if extra_data:
                    db.add(
                        FlexibleImport(
                            file_type=file_type,
                            target_table=model_class.__tablename__,
                            primary_key_field=pk_field_name,
                            primary_key_value=str(pk_value) if pk_value is not None else None,
                            extra_data=extra_data,
                        )
                    )
                    flexible_saved += 1
                imported_count += 1

                # Commit every 100 records to avoid memory issues
                if imported_count % 100 == 0:
                    db.commit()

            except Exception as e:
                # Rollback the transaction for this record
                db.rollback()
                # As a robustness measure, persist row in flexible storage instead of counting as error
                try:
                    db.add(
                        FlexibleImport(
                            file_type=file_type,
                            target_table=model_class.__tablename__,
                            primary_key_field=None,
                            primary_key_value=None,
                            extra_data={
                                "mapped": model_data,
                                "unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
                                "error": str(e),
                            },
                        )
                    )
                    flexible_saved += 1
                except Exception as flex_e:
                    errors.append({
                        "row": row_num,
                        "error": f"{str(e)} | Flexible save failed: {str(flex_e)}",
                        "data": row,
                    })
                continue

        # Final commit
        db.commit()

        result = {
            "file_type": file_type,
            "imported_count": imported_count,
            "errors": errors[:10],  # Limit errors to first 10
            "total_errors": len(errors),
            "auto_mapping": {
                "mapped_headers": mapped_headers,
                "unmapped_headers": unmapped_headers,
                "flexible_saved_rows": flexible_saved,
            },
        }

        if errors:
            result["warning"] = f"Import completed with {len(errors)} errors"

        return result

    except Exception as e:
        print(f"IMPORT ERROR DEBUG: {type(e).__name__}: {str(e)}")
        import traceback
        print(f"TRACEBACK: {traceback.format_exc()}")
        db.rollback()
        raise HTTPException(status_code=500, detail=f"Import failed: {str(e)}")


@router.get("/status")
async def get_import_status(db: Session = Depends(get_db), current_user: User = Depends(get_current_user)):
    """Get current import status and record counts"""

    status = {}

    for file_type, model_class in CSV_MODEL_MAPPING.items():
        try:
            count = db.query(model_class).count()
            status[file_type] = {
                "table_name": model_class.__tablename__,
                "record_count": count
            }
        except Exception as e:
            status[file_type] = {
                "table_name": model_class.__tablename__,
                "record_count": 0,
                "error": str(e)
            }

    return status


@router.delete("/clear/{file_type}")
async def clear_table_data(
    file_type: str,
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
):
    """Clear all data from a specific table"""

    if file_type not in CSV_MODEL_MAPPING:
        raise HTTPException(status_code=400, detail=f"Unknown file type: {file_type}")

    model_class = CSV_MODEL_MAPPING[file_type]

    try:
        deleted_count = db.query(model_class).count()
        db.query(model_class).delete()
        # Also clear any flexible rows linked to this target table and file type
        db.query(FlexibleImport).filter(
            FlexibleImport.file_type == file_type,
            FlexibleImport.target_table == model_class.__tablename__,
        ).delete()
        db.commit()

        return {
            "file_type": file_type,
            "table_name": model_class.__tablename__,
            "deleted_count": deleted_count
        }

    except Exception as e:
        db.rollback()
        raise HTTPException(status_code=500, detail=f"Clear operation failed: {str(e)}")


@router.post("/validate/{file_type}")
async def validate_csv_file(
    file_type: str,
    file: UploadFile = UploadFileForm(...),
    current_user: User = Depends(get_current_user)
):
    """Validate CSV file structure without importing"""

    if file_type not in CSV_MODEL_MAPPING:
        raise HTTPException(status_code=400, detail=f"Unsupported file type: {file_type}")

    if not file.filename.endswith('.csv'):
        raise HTTPException(status_code=400, detail="File must be a CSV file")

    # Use auto-discovery mapping for validation

    try:
        content = await file.read()

        # Try multiple encodings for legacy CSV files
        encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
        csv_content = None
        for encoding in encodings:
            try:
                csv_content = content.decode(encoding)
                break
            except UnicodeDecodeError:
                continue

        if csv_content is None:
            raise HTTPException(status_code=400, detail="Could not decode CSV file. Please ensure it's saved in UTF-8, Windows-1252, or ISO-8859-1 encoding.")

        # Parse CSV with fallback to robust line-by-line parsing
        def parse_csv_with_fallback(text: str) -> Tuple[List[Dict[str, str]], List[str]]:
            try:
                reader = csv.DictReader(io.StringIO(text), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                headers_local = reader.fieldnames or []
                rows_local = []
                for r in reader:
                    rows_local.append(r)
                return rows_local, headers_local
            except Exception:
                return parse_csv_robust(text)

        rows_list, csv_headers = parse_csv_with_fallback(csv_content)
        model_class = CSV_MODEL_MAPPING[file_type]
        mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
        mapped_headers = mapping_info["mapped_headers"]
        unmapped_headers = mapping_info["unmapped_headers"]

        # Sample data validation
        sample_rows = []
        errors = []

        for row_num, row in enumerate(rows_list, start=2):
            if row_num > 12:  # Only check first 10 data rows
                break

            sample_rows.append(row)

            # Check for data type issues on mapped fields
            for csv_field, db_field in mapped_headers.items():
                if csv_field in row and row[csv_field]:
                    try:
                        convert_value(row[csv_field], db_field)
                    except Exception as e:
                        errors.append({
                            "row": row_num,
                            "field": csv_field,
                            "value": row[csv_field],
                            "error": str(e)
                        })

        return {
            "file_type": file_type,
            # Consider valid if we can map at least one column; we don't require exact header match
            "valid": len(mapped_headers) > 0 and len(errors) == 0,
            "headers": {
                "found": csv_headers,
                "mapped": mapped_headers,
                "unmapped": unmapped_headers,
            },
            "sample_data": sample_rows,
            "validation_errors": errors[:5],  # First 5 errors only
            "total_errors": len(errors),
            "auto_mapping": {
                "suggestions": mapping_info["suggestions"],
            },
        }

    except Exception as e:
        print(f"VALIDATION ERROR DEBUG: {type(e).__name__}: {str(e)}")
        import traceback
        print(f"VALIDATION TRACEBACK: {traceback.format_exc()}")
        raise HTTPException(status_code=500, detail=f"Validation failed: {str(e)}")


@router.get("/progress/{import_id}")
async def get_import_progress(
    import_id: str,
    current_user: User = Depends(get_current_user)
):
    """Get import progress status (placeholder for future implementation)"""
    # This would be used for long-running imports with background tasks
    return {
        "import_id": import_id,
        "status": "not_implemented",
        "message": "Real-time progress tracking not yet implemented"
    }


@router.post("/batch-validate")
async def batch_validate_csv_files(
    files: List[UploadFile] = UploadFileForm(...),
    current_user: User = Depends(get_current_user)
):
    """Validate multiple CSV files without importing"""

    if len(files) > 25:
        raise HTTPException(status_code=400, detail="Maximum 25 files allowed per batch")

    validation_results = []

    for file in files:
        file_type = file.filename

        if file_type not in CSV_MODEL_MAPPING:
            validation_results.append({
                "file_type": file_type,
                "valid": False,
                "error": f"Unsupported file type: {file_type}"
            })
            continue

        if not file.filename.endswith('.csv'):
            validation_results.append({
                "file_type": file_type,
                "valid": False,
                "error": "File must be a CSV file"
            })
            continue

        model_class = CSV_MODEL_MAPPING.get(file_type)

        try:
            content = await file.read()

            # Try multiple encodings for legacy CSV files (include BOM-friendly utf-8-sig)
            encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
            csv_content = None
            for encoding in encodings:
                try:
                    csv_content = content.decode(encoding)
                    break
                except UnicodeDecodeError:
                    continue

            if csv_content is None:
                validation_results.append({
                    "file_type": file_type,
                    "valid": False,
                    "error": "Could not decode CSV file encoding"
                })
                continue

            # Handle CSV parsing issues with legacy files
            rows_list, csv_headers = parse_csv_with_fallback(csv_content)

            # Check headers and build dynamic mapping
            mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
            mapped_headers = mapping_info["mapped_headers"]
            unmapped_headers = mapping_info["unmapped_headers"]

            # Sample data validation
            sample_rows = []
            errors = []

            for row_num, row in enumerate(rows_list, start=2):
                if row_num > 12:  # Only check first 10 data rows
                    break

                sample_rows.append(row)

                # Check for data type issues
                for csv_field, db_field in mapped_headers.items():
                    if csv_field in row and row[csv_field]:
                        try:
                            convert_value(row[csv_field], db_field)
                        except Exception as e:
                            errors.append({
                                "row": row_num,
                                "field": csv_field,
                                "value": row[csv_field],
                                "error": str(e)
                            })

            validation_results.append({
                "file_type": file_type,
                "valid": len(mapped_headers) > 0 and len(errors) == 0,
                "headers": {
                    "found": csv_headers,
                    "mapped": mapped_headers,
                    "unmapped": unmapped_headers
                },
                "sample_data": sample_rows[:5],  # Limit sample data for batch operation
                "validation_errors": errors[:5],  # First 5 errors only
                "total_errors": len(errors),
                "auto_mapping": {
                    "suggestions": mapping_info["suggestions"],
                },
            })

            # Reset file pointer for potential future use
            await file.seek(0)

        except Exception as e:
            validation_results.append({
                "file_type": file_type,
                "valid": False,
                "error": f"Validation failed: {str(e)}"
            })

    # Summary statistics
    total_files = len(validation_results)
    valid_files = len([r for r in validation_results if r["valid"]])
    invalid_files = total_files - valid_files

    return {
        "batch_validation_results": validation_results,
        "summary": {
            "total_files": total_files,
            "valid_files": valid_files,
            "invalid_files": invalid_files,
            "all_valid": invalid_files == 0
        }
    }


@router.post("/batch-upload")
async def batch_import_csv_files(
    files: List[UploadFile] = UploadFileForm(...),
    replace_existing: bool = Form(False),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
):
    """Import multiple CSV files in optimal order"""

    if len(files) > 25:
        raise HTTPException(status_code=400, detail="Maximum 25 files allowed per batch")

    # Define optimal import order based on dependencies
    import_order = [
        "STATES.csv", "GRUPLKUP.csv", "EMPLOYEE.csv", "FILETYPE.csv", "FILESTAT.csv",
        "TRNSTYPE.csv", "TRNSLKUP.csv", "FOOTERS.csv", "SETUP.csv", "PRINTERS.csv",
        "ROLODEX.csv", "PHONE.csv", "FILES.csv", "LEDGER.csv", "TRNSACTN.csv",
        "QDROS.csv", "PENSIONS.csv", "PLANINFO.csv", "PAYMENTS.csv", "DEPOSITS.csv",
        "FILENOTS.csv", "FORM_INX.csv", "FORM_LST.csv", "FVARLKUP.csv", "RVARLKUP.csv"
    ]

    # Sort uploaded files by optimal import order
    file_map = {f.filename: f for f in files}
    ordered_files = []

    for file_type in import_order:
        if file_type in file_map:
            ordered_files.append((file_type, file_map[file_type]))
            del file_map[file_type]

    # Add any remaining files not in the predefined order
    for filename, file in file_map.items():
        ordered_files.append((filename, file))

    results = []
    total_imported = 0
    total_errors = 0

    # Create import audit row (running)
    audit_row = ImportAudit(
        status="running",
        total_files=len(files),
        successful_files=0,
        failed_files=0,
        total_imported=0,
        total_errors=0,
        initiated_by_user_id=getattr(current_user, "id", None),
        initiated_by_username=getattr(current_user, "username", None),
        message="Batch import started",
    )
    db.add(audit_row)
    db.commit()
    db.refresh(audit_row)

    # Directory to persist uploaded files for this audit (for reruns)
    audit_dir = Path(settings.upload_dir).joinpath("import_audits", str(audit_row.id))
    try:
        audit_dir.mkdir(parents=True, exist_ok=True)
    except Exception:
        pass

    for file_type, file in ordered_files:
        if file_type not in CSV_MODEL_MAPPING:
            # Fallback flexible-only import for unknown file structures
            try:
                await file.seek(0)
                content = await file.read()
                # Save original upload to disk for potential reruns
                saved_path = None
                try:
                    file_path = audit_dir.joinpath(file_type)
                    with open(file_path, "wb") as fh:
                        fh.write(content)
                    saved_path = str(file_path)
                except Exception:
                    saved_path = None
                encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
                csv_content = None
                for encoding in encodings:
                    try:
                        csv_content = content.decode(encoding)
                        break
                    except UnicodeDecodeError:
                        continue
                if csv_content is None:
                    results.append({
                        "file_type": file_type,
                        "status": "failed",
                        "message": "Could not decode CSV file encoding"
                    })
                    continue
                rows_list, headers = parse_csv_with_fallback(csv_content)
                flexible_count = 0
                for row in rows_list:
                    # Save entire row as flexible JSON
                    db.add(
                        FlexibleImport(
                            file_type=file_type,
                            target_table=None,
                            primary_key_field=None,
                            primary_key_value=None,
                            extra_data=make_json_safe({k: v for k, v in (row or {}).items() if v not in (None, "")}),
                        )
                    )
                    flexible_count += 1
                    if flexible_count % 200 == 0:
                        db.commit()
                db.commit()
                total_imported += flexible_count
                # Persist per-file result row
                results.append({
                    "file_type": file_type,
                    "status": "success",
                    "imported_count": flexible_count,
                    "errors": 0,
                    "message": f"Stored {flexible_count} rows as flexible data (no known model)",
                    "auto_mapping": {
                        "mapped_headers": {},
                        "unmapped_headers": list(headers),
                        "flexible_saved_rows": flexible_count,
                    },
                })
                try:
                    db.add(ImportAuditFile(
                        audit_id=audit_row.id,
                        file_type=file_type,
                        status="success",
                        imported_count=flexible_count,
                        errors=0,
                        message=f"Stored {flexible_count} rows as flexible data",
                        details={"saved_path": saved_path} if saved_path else {}
                    ))
                    db.commit()
                except Exception:
                    db.rollback()
                continue
            except Exception as e:
                db.rollback()
                results.append({
                    "file_type": file_type,
                    "status": "failed",
                    "message": f"Flexible import failed: {str(e)}"
                })
                try:
                    db.add(ImportAuditFile(
                        audit_id=audit_row.id,
                        file_type=file_type,
                        status="failed",
                        imported_count=0,
                        errors=1,
                        message=f"Flexible import failed: {str(e)}",
                        details={}
                    ))
                    db.commit()
                except Exception:
                    db.rollback()
                continue

        try:
            # Reset file pointer
            await file.seek(0)

            # Import this file using auto-discovery mapping
            model_class = CSV_MODEL_MAPPING[file_type]

            content = await file.read()
            # Save original upload to disk for potential reruns
            saved_path = None
            try:
                file_path = audit_dir.joinpath(file_type)
                with open(file_path, "wb") as fh:
                    fh.write(content)
                saved_path = str(file_path)
            except Exception:
                saved_path = None

            # Try multiple encodings for legacy CSV files
            encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
            csv_content = None
            for encoding in encodings:
                try:
                    csv_content = content.decode(encoding)
                    break
                except UnicodeDecodeError:
                    continue

            if csv_content is None:
                results.append({
                    "file_type": file_type,
                    "status": "failed",
                    "message": "Could not decode CSV file encoding"
                })
                try:
                    db.add(ImportAuditFile(
                        audit_id=audit_row.id,
                        file_type=file_type,
                        status="failed",
                        imported_count=0,
                        errors=1,
                        message="Could not decode CSV file encoding",
                        details={"saved_path": saved_path} if saved_path else {}
                    ))
                    db.commit()
                except Exception:
                    db.rollback()
                continue

            # Handle CSV parsing issues with legacy files
            rows_list, csv_headers = parse_csv_with_fallback(csv_content)
            mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
            mapped_headers = mapping_info["mapped_headers"]
            unmapped_headers = mapping_info["unmapped_headers"]

            imported_count = 0
            errors = []
            flexible_saved = 0

            # If replace_existing is True and this is the first file of this type
            if replace_existing:
                db.query(model_class).delete()
                db.query(FlexibleImport).filter(
                    FlexibleImport.file_type == file_type,
                    FlexibleImport.target_table == model_class.__tablename__,
                ).delete()
                db.commit()

            for row_num, row in enumerate(rows_list, start=2):
                try:
                    model_data = {}
                    for csv_field, db_field in mapped_headers.items():
                        if csv_field in row and db_field is not None:
                            converted_value = convert_value(row[csv_field], db_field)
                            if converted_value is not None:
                                model_data[db_field] = converted_value

                    if not any(model_data.values()):
                        continue

                    # Fallback: if required non-nullable fields are missing, store row as flexible only
                    required_fields = _get_required_fields(model_class)
                    missing_required = [f for f in required_fields if model_data.get(f) in (None, "")]
                    if missing_required:
                        db.add(
                            FlexibleImport(
                                file_type=file_type,
                                target_table=model_class.__tablename__,
                                primary_key_field=None,
                                primary_key_value=None,
                                extra_data=make_json_safe({
                                    "mapped": model_data,
                                    "unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
                                    "missing_required": missing_required,
                                }),
                            )
                        )
                        flexible_saved += 1
                        continue

                    # Special validation for models with required fields
                    if model_class == Phone:
                        if 'phone' not in model_data or not model_data['phone']:
                            continue  # Skip phone records without a phone number

                    if model_class == Rolodex:
                        if 'last' not in model_data or not model_data['last']:
                            continue  # Skip rolodex records without a last name/company name

                    if model_class == Ledger:
                        # Skip ledger records without required fields
                        if 'empl_num' not in model_data or not model_data['empl_num']:
                            continue  # Skip ledger records without employee number
                        if 'file_no' not in model_data or not model_data['file_no']:
                            continue  # Skip ledger records without file number

                    instance = model_class(**model_data)
                    db.add(instance)
                    db.flush()

                    # Link flexible extras
                    _, pk_names = _get_model_columns(model_class)
                    pk_field_name = pk_names[0] if len(pk_names) == 1 else None
                    pk_value = None
                    if pk_field_name:
                        try:
                            pk_value = getattr(instance, pk_field_name)
                        except Exception:
                            pk_value = None
                    extra_data = {}
                    for csv_field in unmapped_headers:
                        if csv_field in row and row[csv_field] not in (None, ""):
                            extra_data[csv_field] = row[csv_field]
                    if extra_data:
                        db.add(
                            FlexibleImport(
                                file_type=file_type,
                                target_table=model_class.__tablename__,
                                primary_key_field=pk_field_name,
                                primary_key_value=str(pk_value) if pk_value is not None else None,
                                extra_data=make_json_safe(extra_data),
                            )
                        )
                        flexible_saved += 1
                    imported_count += 1

                    if imported_count % 100 == 0:
                        db.commit()

                except Exception as e:
                    # Rollback the transaction for this record
                    db.rollback()
                    # Persist row in flexible storage instead of counting as error only
                    try:
                        db.add(
                            FlexibleImport(
                                file_type=file_type,
                                target_table=model_class.__tablename__,
                                primary_key_field=None,
                                primary_key_value=None,
                                extra_data=make_json_safe({
                                    "mapped": model_data,
                                    "unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
                                    "error": str(e),
                                }),
                            )
                        )
                        flexible_saved += 1
                    except Exception as flex_e:
                        errors.append({
                            "row": row_num,
                            "error": f"{str(e)} | Flexible save failed: {str(flex_e)}",
                        })
                    continue

            db.commit()

            total_imported += imported_count
            total_errors += len(errors)

            results.append({
                "file_type": file_type,
                "status": "success" if len(errors) == 0 else "completed_with_errors",
                "imported_count": imported_count,
                "errors": len(errors),
                "message": f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
                "auto_mapping": {
                    "mapped_headers": mapped_headers,
                    "unmapped_headers": unmapped_headers,
                    "flexible_saved_rows": flexible_saved,
                },
            })
            try:
                db.add(ImportAuditFile(
                    audit_id=audit_row.id,
                    file_type=file_type,
                    status="success" if len(errors) == 0 else "completed_with_errors",
                    imported_count=imported_count,
                    errors=len(errors),
                    message=f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
                    details={
                        "mapped_headers": list(mapped_headers.keys()),
                        "unmapped_count": len(unmapped_headers),
                        "flexible_saved_rows": flexible_saved,
                        **({"saved_path": saved_path} if saved_path else {}),
                    }
                ))
                db.commit()
            except Exception:
                db.rollback()

        except Exception as e:
            db.rollback()
            results.append({
                "file_type": file_type,
                "status": "failed",
                "message": f"Import failed: {str(e)}"
            })
            try:
                db.add(ImportAuditFile(
                    audit_id=audit_row.id,
                    file_type=file_type,
                    status="failed",
                    imported_count=0,
                    errors=1,
                    message=f"Import failed: {str(e)}",
                    details={"saved_path": saved_path} if saved_path else {}
                ))
                db.commit()
            except Exception:
                db.rollback()

    summary = {
        "total_files": len(files),
        "successful_files": len([r for r in results if r["status"] in ["success", "completed_with_errors"]]),
        "failed_files": len([r for r in results if r["status"] == "failed"]),
        "total_imported": total_imported,
        "total_errors": total_errors
    }

    # Finalize audit row
    try:
        audit_row.successful_files = summary["successful_files"]
        audit_row.failed_files = summary["failed_files"]
        audit_row.total_imported = summary["total_imported"]
        audit_row.total_errors = summary["total_errors"]
        audit_row.status = "success" if summary["failed_files"] == 0 and summary["total_errors"] == 0 else (
            "completed_with_errors" if summary["successful_files"] > 0 else "failed"
        )
        audit_row.message = f"Batch import completed: {audit_row.successful_files}/{audit_row.total_files} files"
        audit_row.finished_at = datetime.utcnow()
        audit_row.details = {
            "files": [
                {"file_type": r.get("file_type"), "status": r.get("status"), "imported_count": r.get("imported_count", 0), "errors": r.get("errors", 0)}
                for r in results
            ]
        }
        db.add(audit_row)
        db.commit()
    except Exception:
        db.rollback()

    return {
        "batch_results": results,
        "summary": summary
    }


@router.get("/recent-batches")
async def recent_batch_imports(
    limit: int = Query(5, ge=1, le=50),
    offset: int = Query(0, ge=0),
    status: Optional[str] = Query(None, description="Filter by status: running|success|completed_with_errors|failed"),
    start: Optional[str] = Query(None, description="ISO datetime start for started_at filter"),
    end: Optional[str] = Query(None, description="ISO datetime end for started_at filter"),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
):
    """Return recent batch import audit rows (most recent first) with optional filters and pagination."""
    q = db.query(ImportAudit)
    if status and status.lower() != "all":
        q = q.filter(ImportAudit.status == status)
    # Date range filters on started_at
    try:
        if start:
            start_dt = datetime.fromisoformat(start)
            q = q.filter(ImportAudit.started_at >= start_dt)
    except Exception:
        pass
    try:
        if end:
            end_dt = datetime.fromisoformat(end)
            q = q.filter(ImportAudit.started_at <= end_dt)
    except Exception:
        pass
    total = q.count()
    rows = (
        q.order_by(ImportAudit.started_at.desc())
        .offset(offset)
        .limit(limit)
        .all()
    )
    def _row(r: ImportAudit):
        return {
            "id": r.id,
            "started_at": r.started_at.isoformat() if r.started_at else None,
            "finished_at": r.finished_at.isoformat() if r.finished_at else None,
            "status": r.status,
            "total_files": r.total_files,
            "successful_files": r.successful_files,
            "failed_files": r.failed_files,
            "total_imported": r.total_imported,
            "total_errors": r.total_errors,
            "initiated_by": r.initiated_by_username,
            "message": r.message,
        }
    return {"recent": [_row(r) for r in rows], "total": total, "limit": limit, "offset": offset}


@router.get("/recent-batches/{audit_id}")
async def get_batch_details(
    audit_id: int,
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
):
    """Return a specific audit entry with per-file details."""
    audit = db.query(ImportAudit).filter(ImportAudit.id == audit_id).first()
    if not audit:
        raise HTTPException(status_code=404, detail="Audit entry not found")
    files = (
        db.query(ImportAuditFile)
        .filter(ImportAuditFile.audit_id == audit.id)
        .order_by(ImportAuditFile.id.asc())
        .all()
    )
    def _row(r: ImportAudit):
        return {
            "id": r.id,
            "started_at": r.started_at.isoformat() if r.started_at else None,
            "finished_at": r.finished_at.isoformat() if r.finished_at else None,
            "status": r.status,
            "total_files": r.total_files,
            "successful_files": r.successful_files,
            "failed_files": r.failed_files,
            "total_imported": r.total_imported,
            "total_errors": r.total_errors,
            "initiated_by": r.initiated_by_username,
            "message": r.message,
            "details": r.details or {},
        }
    def _file(f: ImportAuditFile):
        return {
            "id": f.id,
            "file_type": f.file_type,
            "status": f.status,
            "imported_count": f.imported_count,
            "errors": f.errors,
            "message": f.message,
            "details": f.details or {},
            "created_at": f.created_at.isoformat() if f.created_at else None,
        }
    return {"audit": _row(audit), "files": [_file(f) for f in files]}


@router.post("/recent-batches/{audit_id}/rerun-failed")
async def rerun_failed_files(
    audit_id: int,
    replace_existing: bool = Form(False),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
):
    """Re-run only failed files for a given audit. Creates a new audit entry for the rerun."""
    prior = db.query(ImportAudit).filter(ImportAudit.id == audit_id).first()
    if not prior:
        raise HTTPException(status_code=404, detail="Audit entry not found")
    failed_files: List[ImportAuditFile] = (
        db.query(ImportAuditFile)
        .filter(ImportAuditFile.audit_id == audit_id, ImportAuditFile.status == "failed")
        .all()
    )
    if not failed_files:
        raise HTTPException(status_code=400, detail="No failed files to rerun for this audit")

    # Build list of (file_type, path) that exist
    items: List[Tuple[str, str]] = []
    for f in failed_files:
        saved_path = None
        try:
            saved_path = (f.details or {}).get("saved_path")
        except Exception:
            saved_path = None
        if saved_path and os.path.exists(saved_path):
            items.append((f.file_type, saved_path))
    if not items:
        raise HTTPException(status_code=400, detail="No saved files available to rerun. Upload again.")

    # Import order for sorting
    import_order = [
        "STATES.csv", "GRUPLKUP.csv", "EMPLOYEE.csv", "FILETYPE.csv", "FILESTAT.csv",
        "TRNSTYPE.csv", "TRNSLKUP.csv", "FOOTERS.csv", "SETUP.csv", "PRINTERS.csv",
        "ROLODEX.csv", "PHONE.csv", "FILES.csv", "LEDGER.csv", "TRNSACTN.csv",
        "QDROS.csv", "PENSIONS.csv", "PLANINFO.csv", "PAYMENTS.csv", "DEPOSITS.csv",
        "FILENOTS.csv", "FORM_INX.csv", "FORM_LST.csv", "FVARLKUP.csv", "RVARLKUP.csv"
    ]
    order_index = {name: i for i, name in enumerate(import_order)}
    items.sort(key=lambda x: order_index.get(x[0], len(import_order) + 1))

    # Create new audit row for rerun
    rerun_audit = ImportAudit(
        status="running",
        total_files=len(items),
        successful_files=0,
        failed_files=0,
        total_imported=0,
        total_errors=0,
        initiated_by_user_id=getattr(current_user, "id", None),
        initiated_by_username=getattr(current_user, "username", None),
        message=f"Rerun failed files for audit #{audit_id}",
        details={"rerun_of": audit_id},
    )
    db.add(rerun_audit)
    db.commit()
    db.refresh(rerun_audit)

    # Directory to persist rerun files
    rerun_dir = Path(settings.upload_dir).joinpath("import_audits", str(rerun_audit.id))
    try:
        rerun_dir.mkdir(parents=True, exist_ok=True)
    except Exception:
        pass

    results: List[Dict[str, Any]] = []
    total_imported = 0
    total_errors = 0

    for file_type, path in items:
        try:
            with open(path, "rb") as fh:
                content = fh.read()
            # Save a copy under the rerun audit
            saved_path = None
            try:
                file_path = rerun_dir.joinpath(file_type)
                with open(file_path, "wb") as out:
                    out.write(content)
                saved_path = str(file_path)
            except Exception:
                saved_path = None

            if file_type not in CSV_MODEL_MAPPING:
                # Flexible-only path
                encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
                csv_content = None
                for enc in encodings:
                    try:
                        csv_content = content.decode(enc)
                        break
                    except UnicodeDecodeError:
                        continue
                if csv_content is None:
                    results.append({"file_type": file_type, "status": "failed", "message": "Could not decode CSV file encoding"})
                    try:
                        db.add(ImportAuditFile(
                            audit_id=rerun_audit.id,
                            file_type=file_type,
                            status="failed",
                            imported_count=0,
                            errors=1,
                            message="Could not decode CSV file encoding",
                            details={"saved_path": saved_path} if saved_path else {}
                        ))
                        db.commit()
                    except Exception:
                        db.rollback()
                    continue

                rows_list, _headers = parse_csv_with_fallback(csv_content)
                flexible_count = 0
                for row in rows_list:
                    db.add(
                        FlexibleImport(
                            file_type=file_type,
                            target_table=None,
                            primary_key_field=None,
                            primary_key_value=None,
                            extra_data=make_json_safe({k: v for k, v in (row or {}).items() if v not in (None, "")}),
                        )
                    )
                    flexible_count += 1
                    if flexible_count % 200 == 0:
                        db.commit()
                db.commit()
                total_imported += flexible_count
                results.append({
                    "file_type": file_type,
                    "status": "success",
                    "imported_count": flexible_count,
                    "errors": 0,
                    "message": f"Stored {flexible_count} rows as flexible data (no known model)",
                })
                try:
                    db.add(ImportAuditFile(
                        audit_id=rerun_audit.id,
                        file_type=file_type,
                        status="success",
                        imported_count=flexible_count,
                        errors=0,
                        message=f"Stored {flexible_count} rows as flexible data",
                        details={"saved_path": saved_path} if saved_path else {}
                    ))
                    db.commit()
                except Exception:
                    db.rollback()
                continue

            # Known model path
            model_class = CSV_MODEL_MAPPING[file_type]
            encodings = ['utf-8-sig', 'utf-8', 'windows-1252', 'iso-8859-1', 'cp1252']
            csv_content = None
            for enc in encodings:
                try:
                    csv_content = content.decode(enc)
                    break
                except UnicodeDecodeError:
                    continue
            if csv_content is None:
                results.append({"file_type": file_type, "status": "failed", "message": "Could not decode CSV file encoding"})
                try:
                    db.add(ImportAuditFile(
                        audit_id=rerun_audit.id,
                        file_type=file_type,
                        status="failed",
                        imported_count=0,
                        errors=1,
                        message="Could not decode CSV file encoding",
                        details={"saved_path": saved_path} if saved_path else {}
                    ))
                    db.commit()
                except Exception:
                    db.rollback()
                continue

            csv_reader = csv.DictReader(io.StringIO(csv_content), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_headers = csv_reader.fieldnames or []
            mapping_info = _build_dynamic_mapping(csv_headers, model_class, file_type)
            mapped_headers = mapping_info["mapped_headers"]
            unmapped_headers = mapping_info["unmapped_headers"]
            imported_count = 0
            errors: List[Dict[str, Any]] = []

            if replace_existing:
                db.query(model_class).delete()
                db.query(FlexibleImport).filter(
                    FlexibleImport.file_type == file_type,
                    FlexibleImport.target_table == model_class.__tablename__,
                ).delete()
                db.commit()

            for row_num, row in enumerate(csv_reader, start=2):
                try:
                    model_data: Dict[str, Any] = {}
                    for csv_field, db_field in mapped_headers.items():
                        if csv_field in row and db_field is not None:
                            converted_value = convert_value(row[csv_field], db_field)
                            if converted_value is not None:
                                model_data[db_field] = converted_value
                    if not any(model_data.values()):
                        continue
                    required_fields = _get_required_fields(model_class)
                    missing_required = [f for f in required_fields if model_data.get(f) in (None, "")]
                    if missing_required:
                        db.add(
                            FlexibleImport(
                                file_type=file_type,
                                target_table=model_class.__tablename__,
                                primary_key_field=None,
                                primary_key_value=None,
                                extra_data={
                                    "mapped": model_data,
                                    "unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
                                    "missing_required": missing_required,
                                },
                            )
                        )
                        continue

                    if model_class == Phone and (not model_data.get('phone')):
                        continue
                    if model_class == Rolodex and (not model_data.get('last')):
                        continue
                    if model_class == Ledger and (not model_data.get('empl_num') or not model_data.get('file_no')):
                        continue

                    instance = model_class(**model_data)
                    db.add(instance)
                    db.flush()

                    _, pk_names = _get_model_columns(model_class)
                    pk_field_name = pk_names[0] if len(pk_names) == 1 else None
                    pk_value = None
                    if pk_field_name:
                        try:
                            pk_value = getattr(instance, pk_field_name)
                        except Exception:
                            pk_value = None
                    extra_data = {}
                    for csv_field in unmapped_headers:
                        if csv_field in row and row[csv_field] not in (None, ""):
                            extra_data[csv_field] = row[csv_field]
                    if extra_data:
                        db.add(
                            FlexibleImport(
                                file_type=file_type,
                                target_table=model_class.__tablename__,
                                primary_key_field=pk_field_name,
                                primary_key_value=str(pk_value) if pk_value is not None else None,
                                extra_data=extra_data,
                            )
                        )
                    imported_count += 1
                    if imported_count % 100 == 0:
                        db.commit()
                except Exception as e:
                    db.rollback()
                    try:
                        db.add(
                            FlexibleImport(
                                file_type=file_type,
                                target_table=model_class.__tablename__,
                                primary_key_field=None,
                                primary_key_value=None,
                                extra_data={
                                    "mapped": model_data,
                                    "unmapped": {h: row.get(h) for h in unmapped_headers if row.get(h) not in (None, "")},
                                    "error": str(e),
                                },
                            )
                        )
                    except Exception:
                        errors.append({"row": row_num, "error": str(e)})
                    continue

            db.commit()
            total_imported += imported_count
            total_errors += len(errors)
            results.append({
                "file_type": file_type,
                "status": "success" if len(errors) == 0 else "completed_with_errors",
                "imported_count": imported_count,
                "errors": len(errors),
                "message": f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
            })
            try:
                db.add(ImportAuditFile(
                    audit_id=rerun_audit.id,
                    file_type=file_type,
                    status="success" if len(errors) == 0 else "completed_with_errors",
                    imported_count=imported_count,
                    errors=len(errors),
                    message=f"Imported {imported_count} records" + (f" with {len(errors)} errors" if errors else ""),
                    details={"saved_path": saved_path} if saved_path else {}
                ))
                db.commit()
            except Exception:
                db.rollback()

        except Exception as e:
            db.rollback()
            results.append({"file_type": file_type, "status": "failed", "message": f"Import failed: {str(e)}"})
            try:
                db.add(ImportAuditFile(
                    audit_id=rerun_audit.id,
                    file_type=file_type,
                    status="failed",
                    imported_count=0,
                    errors=1,
                    message=f"Import failed: {str(e)}",
                    details={}
                ))
                db.commit()
            except Exception:
                db.rollback()

    # Finalize rerun audit
    summary = {
        "total_files": len(items),
        "successful_files": len([r for r in results if r["status"] in ["success", "completed_with_errors"]]),
        "failed_files": len([r for r in results if r["status"] == "failed"]),
        "total_imported": total_imported,
        "total_errors": total_errors,
    }
    try:
        rerun_audit.successful_files = summary["successful_files"]
        rerun_audit.failed_files = summary["failed_files"]
        rerun_audit.total_imported = summary["total_imported"]
        rerun_audit.total_errors = summary["total_errors"]
        rerun_audit.status = "success" if summary["failed_files"] == 0 and summary["total_errors"] == 0 else (
            "completed_with_errors" if summary["successful_files"] > 0 else "failed"
        )
        rerun_audit.message = f"Rerun completed: {rerun_audit.successful_files}/{rerun_audit.total_files} files"
        rerun_audit.finished_at = datetime.utcnow()
        rerun_audit.details = {"rerun_of": audit_id}
        db.add(rerun_audit)
        db.commit()
    except Exception:
        db.rollback()

    return {"batch_results": results, "summary": summary, "rerun_audit_id": rerun_audit.id}

@router.post("/upload-flexible")
async def upload_flexible_only(
    file: UploadFile = UploadFileForm(...),
    replace_existing: bool = Form(False),
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
):
    """Flexible-only single-file upload.

    Accepts any CSV and stores each row as a `FlexibleImport` record with `target_table=None`.
    """
    # Ensure CSV
    if not file.filename or not file.filename.lower().endswith(".csv"):
        raise HTTPException(status_code=400, detail="File must be a CSV file")

    file_type = file.filename

    try:
        # Optionally clear prior flexible rows for this file_type
        if replace_existing:
            db.query(FlexibleImport).filter(
                FlexibleImport.file_type == file_type,
                FlexibleImport.target_table == None,  # noqa: E711
            ).delete()
            db.commit()

        content = await file.read()
        encodings = ["utf-8-sig", "utf-8", "windows-1252", "iso-8859-1", "cp1252"]
        csv_content = None
        for encoding in encodings:
            try:
                csv_content = content.decode(encoding)
                break
            except UnicodeDecodeError:
                continue
        if csv_content is None:
            raise HTTPException(status_code=400, detail="Could not decode CSV file encoding")

        reader = csv.DictReader(io.StringIO(csv_content), delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
        headers = reader.fieldnames or []

        imported_count = 0
        for row in reader:
            payload = {k: v for k, v in (row or {}).items() if v not in (None, "")}
            db.add(
                FlexibleImport(
                    file_type=file_type,
                    target_table=None,
                    primary_key_field=None,
                    primary_key_value=None,
                    extra_data=payload,
                )
            )
            imported_count += 1
            if imported_count % 200 == 0:
                db.commit()

        db.commit()

        return {
            "file_type": file_type,
            "imported_count": imported_count,
            "errors": [],
            "total_errors": 0,
            "auto_mapping": {
                "mapped_headers": {},
                "unmapped_headers": list(headers),
                "flexible_saved_rows": imported_count,
            },
            "message": f"Stored {imported_count} rows as flexible data (no known model)",
        }
    except HTTPException:
        raise
    except Exception as e:
        db.rollback()
        raise HTTPException(status_code=500, detail=f"Flexible upload failed: {str(e)}")