delphi-database/app/api/import_csv.py

"""
CSV Import API Endpoints
"""
from fastapi import APIRouter, Depends, File, UploadFile, Form, HTTPException, BackgroundTasks, Body
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Session
from typing import List, Optional, Dict, Any
import logging
import uuid
from datetime import datetime

from app.database.base import get_db
from app.auth.security import get_admin_user
from app.models.user import User
from app.import_export.import_service import ImportService, TableType
from app.core.logging import get_logger

logger = get_logger("import_api")
router = APIRouter()

# In-memory storage for import progress (could be moved to Redis in production)
import_progress = {}


class ImportStatus:
    """Track import operation status"""
    def __init__(self, import_id: str, table_name: str):
        self.import_id = import_id
        self.table_name = table_name
        self.status = "PROCESSING"
        self.started_at = datetime.utcnow()
        self.completed_at = None
        self.result = None
        self.error = None


@router.get("/tables")
async def get_supported_tables(
    current_user: User = Depends(get_admin_user),
    db: Session = Depends(get_db)
):
    """Get list of supported tables for import"""
    try:
        service = ImportService(db)
        tables = service.get_supported_tables()

        return {
            "success": True,
            "tables": tables,
            "total": len(tables)
        }
    except Exception as e:
        logger.error(f"Error getting supported tables: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to get supported tables")


@router.get("/discover-files")
async def discover_csv_files(
    current_user: User = Depends(get_admin_user)
):
    """Discover available CSV files in the old database directory"""
    try:
        import os
        import glob
        from pathlib import Path

        # Look for CSV files in the old database directory
        base_dir = Path("old database/Office")
        csv_files = []

        if base_dir.exists():
            # Find all CSV files
            for csv_file in glob.glob(str(base_dir / "**/*.csv"), recursive=True):
                file_path = Path(csv_file)
                relative_path = file_path.relative_to(base_dir)

                # Try to map to known table types
                filename = file_path.stem.upper()
                table_mapping = {
                    "ROLODEX": "rolodex",
                    "ROLEX_V": "rolodex",  # ROLEX_V variant
                    "PHONE": "phone",
                    "FILES": "files",
                    "FILES_R": "files",
                    "FILES_V": "files",
                    "LEDGER": "ledger",
                    "QDROS": "qdros",
                    "PAYMENTS": "ledger",
                    "DEPOSITS": "ledger",
                    "EMPLOYEE": "employee",
                    "SETUP": "setup",
                    "FILETYPE": "filetype",
                    "TRNSTYPE": "trnstype",
                    "TRNSACTN": "trnsactn",
                    "TRNSLKUP": "trnslkup",
                    "PENSIONS": "pensions"
                }

                suggested_table = table_mapping.get(filename, "unknown")

                csv_files.append({
                    "filename": file_path.name,
                    "path": str(relative_path),
                    "full_path": str(file_path),
                    "suggested_table": suggested_table,
                    "size": file_path.stat().st_size if file_path.exists() else 0
                })

        # Sort by filename
        csv_files.sort(key=lambda x: x["filename"])

        return {
            "success": True,
            "files": csv_files,
            "total": len(csv_files),
            "base_directory": str(base_dir)
        }

    except Exception as e:
        logger.error(f"Error discovering CSV files: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to discover CSV files")


@router.get("/tables/{table_name}/schema")
async def get_table_schema(
    table_name: str,
    current_user: User = Depends(get_admin_user),
    db: Session = Depends(get_db)
):
    """Get schema information for a specific table"""
    try:
        service = ImportService(db)
        schema = service.get_table_schema(table_name)

        if not schema:
            raise HTTPException(status_code=404, detail=f"Table '{table_name}' not found")

        return {
            "success": True,
            "schema": schema
        }
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error getting table schema for {table_name}: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to get table schema")


@router.post("/validate")
async def validate_csv_headers(
    table_name: str = Form(...),
    file: UploadFile = File(...),
    current_user: User = Depends(get_admin_user),
    db: Session = Depends(get_db)
):
    """Validate CSV headers without importing data"""
    try:
        # Read file content with encoding detection
        content = await file.read()

        # Try multiple encodings
        encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
        csv_content = None
        used_encoding = None

        for encoding in encodings:
            try:
                csv_content = content.decode(encoding)
                used_encoding = encoding
                break
            except UnicodeDecodeError:
                continue

        if csv_content is None:
            raise HTTPException(
                status_code=400,
                detail="Could not decode file. Please ensure it's a valid text file."
            )

        service = ImportService(db)
        result = service.validate_csv_headers(table_name, csv_content)

        return {
            "success": result.success,
            "table_name": table_name,
            "filename": file.filename,
            "validation_result": result.to_dict()
        }

    except UnicodeDecodeError:
        raise HTTPException(status_code=400, detail="Could not decode file. Please ensure it's a valid text file.")
    except Exception as e:
        logger.error(f"Error validating CSV headers: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to validate CSV headers")


async def process_import_background(
    import_id: str,
    table_name: str,
    csv_content: str,
    db: Session
):
    """Background task to process CSV import"""
    try:
        logger.info(f"Starting background import {import_id} for table {table_name}")
        print(f"[IMPORT] Starting background import {import_id} for table {table_name}")

        service = ImportService(db)
        result = service.import_csv(table_name, csv_content, import_id=import_id)

        # Update progress
        if import_id in import_progress:
            progress = import_progress[import_id]
            progress.status = "COMPLETED" if result.success else "FAILED"
            progress.completed_at = datetime.utcnow()
            progress.result = result

        logger.info(f"Import {import_id} completed with {result.imported_rows} rows imported")
        print(f"[IMPORT] Import {import_id} completed: success={result.success}, rows={result.imported_rows}")

    except Exception as e:
        logger.error(f"Background import {import_id} failed: {str(e)}")
        print(f"[IMPORT] Background import {import_id} failed: {str(e)}")
        if import_id in import_progress:
            progress = import_progress[import_id]
            progress.status = "FAILED"
            progress.completed_at = datetime.utcnow()
            progress.error = str(e)


@router.post("/csv")
async def import_csv_file(
    background_tasks: BackgroundTasks,
    table_name: str = Form(...),
    file: UploadFile = File(...),
    current_user: User = Depends(get_admin_user),
    db: Session = Depends(get_db)
):
    """Import CSV file to specified table"""
    try:
        logger.info(f"Received CSV import request: table={table_name}, file={file.filename}, user={current_user.username}")
        print(f"[IMPORT API] CSV import request: table={table_name}, file={file.filename}")

        # Validate table name
        if table_name.lower() not in [t.value for t in TableType]:
            print(f"[IMPORT API] Invalid table name: {table_name}")
            raise HTTPException(
                status_code=400,
                detail=f"Unsupported table: {table_name}"
            )

        # Validate file type
        if not file.filename.lower().endswith('.csv'):
            raise HTTPException(
                status_code=400,
                detail="File must be a CSV file"
            )

        # Read file content with encoding detection
        content = await file.read()

        # Try multiple encodings
        encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
        csv_content = None
        used_encoding = None

        for encoding in encodings:
            try:
                csv_content = content.decode(encoding)
                used_encoding = encoding
                break
            except UnicodeDecodeError:
                continue

        if csv_content is None:
            raise HTTPException(
                status_code=400,
                detail="Could not decode file. Please ensure it's a valid text file."
            )

        if not csv_content.strip():
            raise HTTPException(status_code=400, detail="File is empty")

        # Generate import ID
        import_id = str(uuid.uuid4())
        print(f"[IMPORT API] Generated import ID: {import_id}")

        # Create progress tracker
        progress = ImportStatus(import_id, table_name)
        import_progress[import_id] = progress

        # Start background import
        background_tasks.add_task(
            process_import_background,
            import_id,
            table_name,
            csv_content,
            db
        )

        logger.info(f"Started CSV import {import_id} for table {table_name}")
        print(f"[IMPORT API] Background task queued for import {import_id}")

        return {
            "success": True,
            "import_id": import_id,
            "table_name": table_name,
            "filename": file.filename,
            "status": "PROCESSING",
            "message": "Import started successfully"
        }

    except UnicodeDecodeError:
        raise HTTPException(status_code=400, detail="Could not decode file. Please ensure it's a valid text file.")
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error starting CSV import: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to start import")


@router.get("/status/{import_id}")
async def get_import_status(
    import_id: str,
    current_user: User = Depends(get_admin_user)
):
    """Get status of an import operation"""
    try:
        if import_id not in import_progress:
            raise HTTPException(status_code=404, detail="Import not found")

        progress = import_progress[import_id]

        response = {
            "import_id": import_id,
            "table_name": progress.table_name,
            "status": progress.status,
            "started_at": progress.started_at.isoformat(),
            "completed_at": progress.completed_at.isoformat() if progress.completed_at else None
        }

        if progress.result:
            response["result"] = progress.result.to_dict()
            # Also include error details if the import failed
            if not progress.result.success and progress.result.errors:
                response["error"] = "; ".join(progress.result.errors[:3])
        elif progress.error:
            response["error"] = progress.error

        return response

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error getting import status: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to get import status")


@router.post("/batch")
async def batch_import_csv(
    background_tasks: BackgroundTasks,
    files: List[UploadFile] = File(...),
    table_names: List[str] = Form(...),
    current_user: User = Depends(get_admin_user),
    db: Session = Depends(get_db)
):
    """Import multiple CSV files in batch"""
    try:
        if len(files) != len(table_names):
            raise HTTPException(
                status_code=400,
                detail="Number of files must match number of table names"
            )

        imports = []
        import_ids = []

        for i, (file, table_name) in enumerate(zip(files, table_names)):
            # Validate table name
            if table_name.lower() not in [t.value for t in TableType]:
                raise HTTPException(
                    status_code=400,
                    detail=f"Unsupported table: {table_name}"
                )

            # Validate file type
            if not file.filename.lower().endswith('.csv'):
                raise HTTPException(
                    status_code=400,
                    detail=f"File {file.filename} must be a CSV file"
                )

            # Read file content with encoding detection
            content = await file.read()

            # Try multiple encodings
            encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
            csv_content = None
            used_encoding = None

            for encoding in encodings:
                try:
                    csv_content = content.decode(encoding)
                    used_encoding = encoding
                    break
                except UnicodeDecodeError:
                    continue

            if csv_content is None:
                raise HTTPException(
                    status_code=400,
                    detail="Could not decode file. Please ensure it's a valid text file."
                )

            if not csv_content.strip():
                raise HTTPException(
                    status_code=400,
                    detail=f"File {file.filename} is empty"
                )

            imports.append({
                "table_name": table_name,
                "csv_content": csv_content,
                "filename": file.filename
            })

            # Generate import ID for tracking
            import_id = str(uuid.uuid4())
            import_ids.append(import_id)

            # Create progress tracker
            progress = ImportStatus(import_id, table_name)
            import_progress[import_id] = progress

        # Process batch import in background
        async def process_batch_background():
            try:
                service = ImportService(db)
                results = service.batch_import(imports)

                # Update progress for each import
                for i, import_id in enumerate(import_ids):
                    if import_id in import_progress:
                        progress = import_progress[import_id]
                        table_name = progress.table_name

                        # Find result for this table
                        result = None
                        for key, res in results.items():
                            if key.startswith(table_name):
                                result = res
                                break

                        if result:
                            progress.status = "COMPLETED" if result.success else "FAILED"
                            progress.result = result
                            # If import failed, capture the error details
                            if not result.success and result.errors:
                                progress.error = "; ".join(result.errors[:3])  # Show first 3 errors
                        else:
                            progress.status = "FAILED"
                            progress.error = "No result found"

                        progress.completed_at = datetime.utcnow()

            except Exception as e:
                logger.error(f"Batch import failed: {str(e)}")
                for import_id in import_ids:
                    if import_id in import_progress:
                        progress = import_progress[import_id]
                        progress.status = "FAILED"
                        progress.error = str(e)
                        progress.completed_at = datetime.utcnow()

        background_tasks.add_task(process_batch_background)

        logger.info(f"Started batch import with {len(files)} files")

        return {
            "success": True,
            "import_ids": import_ids,
            "total_files": len(files),
            "status": "PROCESSING",
            "message": "Batch import started successfully"
        }

    except UnicodeDecodeError:
        raise HTTPException(status_code=400, detail="Invalid file encoding. Please use UTF-8.")
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error starting batch import: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to start batch import")


@router.post("/batch-from-files")
async def batch_import_from_files(
    background_tasks: BackgroundTasks,
    file_mappings: List[Dict[str, str]] = Body(...),
    current_user: User = Depends(get_admin_user),
    db: Session = Depends(get_db)
):
    """Import multiple CSV files from discovered file paths"""
    try:
        from pathlib import Path

        if not file_mappings:
            raise HTTPException(
                status_code=400,
                detail="No file mappings provided"
            )

        imports = []
        import_ids = []

        for mapping in file_mappings:
            file_path = mapping.get("file_path")
            table_name = mapping.get("table_name")

            if not file_path or not table_name:
                raise HTTPException(
                    status_code=400,
                    detail="Each mapping must have file_path and table_name"
                )

            # Validate table name
            if table_name.lower() not in [t.value for t in TableType]:
                raise HTTPException(
                    status_code=400,
                    detail=f"Unsupported table: {table_name}"
                )

            # Read file content
            full_path = Path(file_path)
            if not full_path.exists():
                raise HTTPException(
                    status_code=400,
                    detail=f"File not found: {file_path}"
                )

            # Read file content with encoding detection
            encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
            csv_content = None
            used_encoding = None

            for encoding in encodings:
                try:
                    with open(full_path, 'r', encoding=encoding) as f:
                        csv_content = f.read()
                    used_encoding = encoding
                    break
                except UnicodeDecodeError:
                    continue

            if csv_content is None:
                raise HTTPException(
                    status_code=400,
                    detail=f"Could not decode file {file_path}. Please ensure it's a valid text file."
                )

            if not csv_content.strip():
                raise HTTPException(
                    status_code=400,
                    detail=f"File {file_path} is empty"
                )

            imports.append({
                "table_name": table_name,
                "csv_content": csv_content,
                "filename": full_path.name
            })

            # Generate import ID for tracking
            import_id = str(uuid.uuid4())
            import_ids.append(import_id)

            # Create progress tracker
            progress = ImportStatus(import_id, table_name)
            import_progress[import_id] = progress

        # Process batch import in background
        async def process_batch_background():
            try:
                service = ImportService(db)
                results = service.batch_import(imports)

                # Update progress for each import
                for i, import_id in enumerate(import_ids):
                    if import_id in import_progress:
                        progress = import_progress[import_id]
                        table_name = progress.table_name

                        # Find result for this table
                        result = None
                        for key, res in results.items():
                            if key.startswith(table_name):
                                result = res
                                break

                        if result:
                            progress.status = "COMPLETED" if result.success else "FAILED"
                            progress.result = result
                            # If import failed, capture the error details
                            if not result.success and result.errors:
                                progress.error = "; ".join(result.errors[:3])  # Show first 3 errors
                        else:
                            progress.status = "FAILED"
                            progress.error = "No result found"

                        progress.completed_at = datetime.utcnow()

            except Exception as e:
                logger.error(f"Batch import failed: {str(e)}")
                for import_id in import_ids:
                    if import_id in import_progress:
                        progress = import_progress[import_id]
                        progress.status = "FAILED"
                        progress.error = str(e)
                        progress.completed_at = datetime.utcnow()

        background_tasks.add_task(process_batch_background)

        logger.info(f"Started batch import from files with {len(imports)} files")

        return {
            "success": True,
            "import_ids": import_ids,
            "total_files": len(imports),
            "status": "PROCESSING",
            "message": "Batch import from files started successfully"
        }

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error starting batch import from files: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to start batch import")


@router.get("/status")
async def get_import_status_overview(
    current_user: User = Depends(get_admin_user),
    db: Session = Depends(get_db)
):
    """Get overview of which tables have been successfully imported"""
    try:
        from sqlalchemy import inspect, text

        # Get list of all tables in the database
        inspector = inspect(db.bind)
        existing_tables = inspector.get_table_names()

        # Define all possible table types and their display names
        table_info = {
            "rolodex": {"name": "ROLODEX (Contacts)", "category": "Core", "expected_files": ["ROLODEX.csv", "ROLEX_V.csv"]},
            "phone": {"name": "PHONE (Phone Numbers)", "category": "Core", "expected_files": ["PHONE.csv"]},
            "files": {"name": "FILES (Case Files)", "category": "Core", "expected_files": ["FILES.csv", "FILES_R.csv", "FILES_V.csv"]},
            "ledger": {"name": "LEDGER (Financial)", "category": "Core", "expected_files": ["LEDGER.csv"]},
            "qdros": {"name": "QDROS (Documents)", "category": "Core", "expected_files": ["QDROS.csv"]},
            "gruplkup": {"name": "GRUPLKUP", "category": "Lookup", "expected_files": ["GRUPLKUP.csv"]},
            "employee": {"name": "EMPLOYEE", "category": "Lookup", "expected_files": ["EMPLOYEE.csv"]},
            "filetype": {"name": "FILETYPE", "category": "Lookup", "expected_files": ["FILETYPE.csv"]},
            "trnstype": {"name": "TRNSTYPE", "category": "Lookup", "expected_files": ["TRNSTYPE.csv"]},
            "trnslkup": {"name": "TRNSLKUP", "category": "Lookup", "expected_files": ["TRNSLKUP.csv"]},
            "rvarlkup": {"name": "RVARLKUP", "category": "Lookup", "expected_files": ["RVARLKUP.csv"]},
            "fvarlkup": {"name": "FVARLKUP", "category": "Lookup", "expected_files": ["FVARLKUP.csv"]},
            "filenots": {"name": "FILENOTS", "category": "Lookup", "expected_files": ["FILENOTS.csv"]},
            "planinfo": {"name": "PLANINFO", "category": "Lookup", "expected_files": ["PLANINFO.csv"]},
            "setup": {"name": "SETUP", "category": "Configuration", "expected_files": ["SETUP.csv"]},
            "deposits": {"name": "DEPOSITS", "category": "Financial", "expected_files": ["DEPOSITS.csv"]},
            "payments": {"name": "PAYMENTS", "category": "Financial", "expected_files": ["PAYMENTS.csv"]},
            "trnsactn": {"name": "TRNSACTN", "category": "Financial", "expected_files": ["TRNSACTN.csv"]},
            "pensions": {"name": "PENSIONS", "category": "Pension", "expected_files": ["PENSIONS.csv"]},
            "marriage": {"name": "MARRIAGE", "category": "Pension", "expected_files": ["MARRIAGE.csv"]},
            "death": {"name": "DEATH", "category": "Pension", "expected_files": ["DEATH.csv"]},
            "separate": {"name": "SEPARATE", "category": "Pension", "expected_files": ["SEPARATE.csv"]},
            "schedule": {"name": "SCHEDULE", "category": "Pension", "expected_files": ["SCHEDULE.csv"]},
            "numberal": {"name": "NUMBERAL", "category": "Forms", "expected_files": ["NUMBERAL.csv"]},
            "inx_lkup": {"name": "INX_LKUP", "category": "Forms", "expected_files": ["INX_LKUP.csv"]},
            "form_lst": {"name": "FORM_LST", "category": "Forms", "expected_files": ["FORM_LST.csv"]},
            "form_inx": {"name": "FORM_INX", "category": "Forms", "expected_files": ["FORM_INX.csv"]},
            "lifetabl": {"name": "LIFETABL", "category": "Forms", "expected_files": ["LIFETABL.csv"]}
        }

        # Check status of each table
        table_status = []
        for table_name, info in table_info.items():
            status = {
                "table_name": table_name,
                "display_name": info["name"],
                "category": info["category"],
                "expected_files": info["expected_files"],
                "exists": table_name in existing_tables,
                "row_count": 0,
                "imported": False
            }

            if status["exists"]:
                try:
                    # Get row count
                    result = db.execute(text(f"SELECT COUNT(*) FROM {table_name}"))
                    status["row_count"] = result.scalar()
                    status["imported"] = status["row_count"] > 0
                except Exception as e:
                    logger.warning(f"Could not get row count for {table_name}: {e}")
                    status["row_count"] = -1  # Error getting count

            table_status.append(status)

        # Group by category
        categories = {}
        for status in table_status:
            category = status["category"]
            if category not in categories:
                categories[category] = []
            categories[category].append(status)

        # Calculate summary stats
        total_tables = len(table_status)
        imported_tables = len([s for s in table_status if s["imported"]])
        total_rows = sum(s["row_count"] for s in table_status if s["row_count"] > 0)

        return {
            "success": True,
            "summary": {
                "total_tables": total_tables,
                "imported_tables": imported_tables,
                "empty_tables": len([s for s in table_status if s["exists"] and s["row_count"] == 0]),
                "missing_tables": len([s for s in table_status if not s["exists"]]),
                "total_rows": total_rows,
                "completion_percentage": round((imported_tables / total_tables) * 100, 1) if total_tables > 0 else 0
            },
            "categories": categories,
            "tables": table_status
        }

    except Exception as e:
        logger.error(f"Error getting import status overview: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to get import status")


@router.delete("/progress")
async def cleanup_import_progress(
    current_user: User = Depends(get_admin_user)
):
    """Clean up completed import progress records"""
    try:
        completed_count = 0
        to_remove = []

        for import_id, progress in import_progress.items():
            if progress.status in ["COMPLETED", "FAILED"]:
                # Remove progress older than 1 hour
                if progress.completed_at:
                    age = datetime.utcnow() - progress.completed_at
                    if age.total_seconds() > 3600:  # 1 hour
                        to_remove.append(import_id)
                        completed_count += 1

        for import_id in to_remove:
            del import_progress[import_id]

        return {
            "success": True,
            "cleaned_up": completed_count,
            "remaining": len(import_progress)
        }

    except Exception as e:
        logger.error(f"Error cleaning up import progress: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to cleanup progress")