work on import

2025-09-22 22:07:50 -04:00
parent 830ddcc4d1
commit 1116658d40
7 changed files with 1426 additions and 132 deletions
--- a/app/api/import_csv.py
+++ b/app/api/import_csv.py
@@ -1,7 +1,7 @@
 """
 CSV Import API Endpoints
 """
-from fastapi import APIRouter, Depends, File, UploadFile, Form, HTTPException, BackgroundTasks
+from fastapi import APIRouter, Depends, File, UploadFile, Form, HTTPException, BackgroundTasks, Body
 from fastapi.responses import JSONResponse
 from sqlalchemy.orm import Session
 from typing import List, Optional, Dict, Any
@@ -54,6 +54,73 @@ async def get_supported_tables(
        raise HTTPException(status_code=500, detail="Failed to get supported tables")


+@router.get("/discover-files")
+async def discover_csv_files(
+    current_user: User = Depends(get_admin_user)
+):
+    """Discover available CSV files in the old database directory"""
+    try:
+        import os
+        import glob
+        from pathlib import Path
+        
+        # Look for CSV files in the old database directory
+        base_dir = Path("old database/Office")
+        csv_files = []
+        
+        if base_dir.exists():
+            # Find all CSV files
+            for csv_file in glob.glob(str(base_dir / "**/*.csv"), recursive=True):
+                file_path = Path(csv_file)
+                relative_path = file_path.relative_to(base_dir)
+                
+                # Try to map to known table types
+                filename = file_path.stem.upper()
+                table_mapping = {
+                    "ROLODEX": "rolodex",
+                    "ROLEX_V": "rolodex",  # ROLEX_V variant
+                    "PHONE": "phone", 
+                    "FILES": "files",
+                    "FILES_R": "files",
+                    "FILES_V": "files",
+                    "LEDGER": "ledger",
+                    "QDROS": "qdros",
+                    "PAYMENTS": "ledger",
+                    "DEPOSITS": "ledger",
+                    "EMPLOYEE": "employee",
+                    "SETUP": "setup",
+                    "FILETYPE": "filetype",
+                    "TRNSTYPE": "trnstype",
+                    "TRNSACTN": "trnsactn",
+                    "TRNSLKUP": "trnslkup",
+                    "PENSIONS": "pensions"
+                }
+                
+                suggested_table = table_mapping.get(filename, "unknown")
+                
+                csv_files.append({
+                    "filename": file_path.name,
+                    "path": str(relative_path),
+                    "full_path": str(file_path),
+                    "suggested_table": suggested_table,
+                    "size": file_path.stat().st_size if file_path.exists() else 0
+                })
+        
+        # Sort by filename
+        csv_files.sort(key=lambda x: x["filename"])
+        
+        return {
+            "success": True,
+            "files": csv_files,
+            "total": len(csv_files),
+            "base_directory": str(base_dir)
+        }
+        
+    except Exception as e:
+        logger.error(f"Error discovering CSV files: {str(e)}")
+        raise HTTPException(status_code=500, detail="Failed to discover CSV files")
+
+
@router.get("/tables/{table_name}/schema")
 async def get_table_schema(
    table_name: str,
@@ -88,9 +155,27 @@ async def validate_csv_headers(
 ):
    """Validate CSV headers without importing data"""
    try:
-        # Read file content
+        # Read file content with encoding detection
        content = await file.read()
-        csv_content = content.decode('utf-8')
+        
+        # Try multiple encodings
+        encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
+        csv_content = None
+        used_encoding = None
+        
+        for encoding in encodings:
+            try:
+                csv_content = content.decode(encoding)
+                used_encoding = encoding
+                break
+            except UnicodeDecodeError:
+                continue
+        
+        if csv_content is None:
+            raise HTTPException(
+                status_code=400,
+                detail="Could not decode file. Please ensure it's a valid text file."
+            )
        
        service = ImportService(db)
        result = service.validate_csv_headers(table_name, csv_content)
@@ -103,7 +188,7 @@ async def validate_csv_headers(
        }
        
    except UnicodeDecodeError:
-        raise HTTPException(status_code=400, detail="Invalid file encoding. Please use UTF-8.")
+        raise HTTPException(status_code=400, detail="Could not decode file. Please ensure it's a valid text file.")
    except Exception as e:
        logger.error(f"Error validating CSV headers: {str(e)}")
        raise HTTPException(status_code=500, detail="Failed to validate CSV headers")
@@ -171,9 +256,27 @@ async def import_csv_file(
                detail="File must be a CSV file"
            )
            
-        # Read file content
+        # Read file content with encoding detection
        content = await file.read()
-        csv_content = content.decode('utf-8')
+        
+        # Try multiple encodings
+        encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
+        csv_content = None
+        used_encoding = None
+        
+        for encoding in encodings:
+            try:
+                csv_content = content.decode(encoding)
+                used_encoding = encoding
+                break
+            except UnicodeDecodeError:
+                continue
+        
+        if csv_content is None:
+            raise HTTPException(
+                status_code=400,
+                detail="Could not decode file. Please ensure it's a valid text file."
+            )
        
        if not csv_content.strip():
            raise HTTPException(status_code=400, detail="File is empty")
@@ -208,7 +311,7 @@ async def import_csv_file(
        }
        
    except UnicodeDecodeError:
-        raise HTTPException(status_code=400, detail="Invalid file encoding. Please use UTF-8.")
+        raise HTTPException(status_code=400, detail="Could not decode file. Please ensure it's a valid text file.")
    except HTTPException:
        raise
    except Exception as e:
@@ -238,6 +341,9 @@ async def get_import_status(
        
        if progress.result:
            response["result"] = progress.result.to_dict()
+            # Also include error details if the import failed
+            if not progress.result.success and progress.result.errors:
+                response["error"] = "; ".join(progress.result.errors[:3])
        elif progress.error:
            response["error"] = progress.error
            
@@ -284,9 +390,27 @@ async def batch_import_csv(
                    detail=f"File {file.filename} must be a CSV file"
                )
                
-            # Read file content
+            # Read file content with encoding detection
            content = await file.read()
-            csv_content = content.decode('utf-8')
+            
+            # Try multiple encodings
+            encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
+            csv_content = None
+            used_encoding = None
+            
+            for encoding in encodings:
+                try:
+                    csv_content = content.decode(encoding)
+                    used_encoding = encoding
+                    break
+                except UnicodeDecodeError:
+                    continue
+            
+            if csv_content is None:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Could not decode file. Please ensure it's a valid text file."
+                )
            
            if not csv_content.strip():
                raise HTTPException(
@@ -330,6 +454,9 @@ async def batch_import_csv(
                        if result:
                            progress.status = "COMPLETED" if result.success else "FAILED"
                            progress.result = result
+                            # If import failed, capture the error details
+                            if not result.success and result.errors:
+                                progress.error = "; ".join(result.errors[:3])  # Show first 3 errors
                        else:
                            progress.status = "FAILED"
                            progress.error = "No result found"
@@ -366,6 +493,252 @@ async def batch_import_csv(
        raise HTTPException(status_code=500, detail="Failed to start batch import")


+@router.post("/batch-from-files")
+async def batch_import_from_files(
+    background_tasks: BackgroundTasks,
+    file_mappings: List[Dict[str, str]] = Body(...),
+    current_user: User = Depends(get_admin_user),
+    db: Session = Depends(get_db)
+):
+    """Import multiple CSV files from discovered file paths"""
+    try:
+        from pathlib import Path
+        
+        if not file_mappings:
+            raise HTTPException(
+                status_code=400,
+                detail="No file mappings provided"
+            )
+            
+        imports = []
+        import_ids = []
+        
+        for mapping in file_mappings:
+            file_path = mapping.get("file_path")
+            table_name = mapping.get("table_name")
+            
+            if not file_path or not table_name:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Each mapping must have file_path and table_name"
+                )
+                
+            # Validate table name
+            if table_name.lower() not in [t.value for t in TableType]:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Unsupported table: {table_name}"
+                )
+            
+            # Read file content
+            full_path = Path(file_path)
+            if not full_path.exists():
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"File not found: {file_path}"
+                )
+                
+            # Read file content with encoding detection
+            encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
+            csv_content = None
+            used_encoding = None
+            
+            for encoding in encodings:
+                try:
+                    with open(full_path, 'r', encoding=encoding) as f:
+                        csv_content = f.read()
+                    used_encoding = encoding
+                    break
+                except UnicodeDecodeError:
+                    continue
+                    
+            if csv_content is None:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Could not decode file {file_path}. Please ensure it's a valid text file."
+                )
+            
+            if not csv_content.strip():
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"File {file_path} is empty"
+                )
+            
+            imports.append({
+                "table_name": table_name,
+                "csv_content": csv_content,
+                "filename": full_path.name
+            })
+            
+            # Generate import ID for tracking
+            import_id = str(uuid.uuid4())
+            import_ids.append(import_id)
+            
+            # Create progress tracker
+            progress = ImportStatus(import_id, table_name)
+            import_progress[import_id] = progress
+        
+        # Process batch import in background
+        async def process_batch_background():
+            try:
+                service = ImportService(db)
+                results = service.batch_import(imports)
+                
+                # Update progress for each import
+                for i, import_id in enumerate(import_ids):
+                    if import_id in import_progress:
+                        progress = import_progress[import_id]
+                        table_name = progress.table_name
+                        
+                        # Find result for this table
+                        result = None
+                        for key, res in results.items():
+                            if key.startswith(table_name):
+                                result = res
+                                break
+                                
+                        if result:
+                            progress.status = "COMPLETED" if result.success else "FAILED"
+                            progress.result = result
+                            # If import failed, capture the error details
+                            if not result.success and result.errors:
+                                progress.error = "; ".join(result.errors[:3])  # Show first 3 errors
+                        else:
+                            progress.status = "FAILED"
+                            progress.error = "No result found"
+                            
+                        progress.completed_at = datetime.utcnow()
+                        
+            except Exception as e:
+                logger.error(f"Batch import failed: {str(e)}")
+                for import_id in import_ids:
+                    if import_id in import_progress:
+                        progress = import_progress[import_id]
+                        progress.status = "FAILED"
+                        progress.error = str(e)
+                        progress.completed_at = datetime.utcnow()
+        
+        background_tasks.add_task(process_batch_background)
+        
+        logger.info(f"Started batch import from files with {len(imports)} files")
+        
+        return {
+            "success": True,
+            "import_ids": import_ids,
+            "total_files": len(imports),
+            "status": "PROCESSING",
+            "message": "Batch import from files started successfully"
+        }
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error starting batch import from files: {str(e)}")
+        raise HTTPException(status_code=500, detail="Failed to start batch import")
+
+
+@router.get("/status")
+async def get_import_status_overview(
+    current_user: User = Depends(get_admin_user),
+    db: Session = Depends(get_db)
+):
+    """Get overview of which tables have been successfully imported"""
+    try:
+        from sqlalchemy import inspect, text
+        
+        # Get list of all tables in the database
+        inspector = inspect(db.bind)
+        existing_tables = inspector.get_table_names()
+        
+        # Define all possible table types and their display names
+        table_info = {
+            "rolodex": {"name": "ROLODEX (Contacts)", "category": "Core", "expected_files": ["ROLODEX.csv", "ROLEX_V.csv"]},
+            "phone": {"name": "PHONE (Phone Numbers)", "category": "Core", "expected_files": ["PHONE.csv"]},
+            "files": {"name": "FILES (Case Files)", "category": "Core", "expected_files": ["FILES.csv", "FILES_R.csv", "FILES_V.csv"]},
+            "ledger": {"name": "LEDGER (Financial)", "category": "Core", "expected_files": ["LEDGER.csv"]},
+            "qdros": {"name": "QDROS (Documents)", "category": "Core", "expected_files": ["QDROS.csv"]},
+            "gruplkup": {"name": "GRUPLKUP", "category": "Lookup", "expected_files": ["GRUPLKUP.csv"]},
+            "employee": {"name": "EMPLOYEE", "category": "Lookup", "expected_files": ["EMPLOYEE.csv"]},
+            "filetype": {"name": "FILETYPE", "category": "Lookup", "expected_files": ["FILETYPE.csv"]},
+            "trnstype": {"name": "TRNSTYPE", "category": "Lookup", "expected_files": ["TRNSTYPE.csv"]},
+            "trnslkup": {"name": "TRNSLKUP", "category": "Lookup", "expected_files": ["TRNSLKUP.csv"]},
+            "rvarlkup": {"name": "RVARLKUP", "category": "Lookup", "expected_files": ["RVARLKUP.csv"]},
+            "fvarlkup": {"name": "FVARLKUP", "category": "Lookup", "expected_files": ["FVARLKUP.csv"]},
+            "filenots": {"name": "FILENOTS", "category": "Lookup", "expected_files": ["FILENOTS.csv"]},
+            "planinfo": {"name": "PLANINFO", "category": "Lookup", "expected_files": ["PLANINFO.csv"]},
+            "setup": {"name": "SETUP", "category": "Configuration", "expected_files": ["SETUP.csv"]},
+            "deposits": {"name": "DEPOSITS", "category": "Financial", "expected_files": ["DEPOSITS.csv"]},
+            "payments": {"name": "PAYMENTS", "category": "Financial", "expected_files": ["PAYMENTS.csv"]},
+            "trnsactn": {"name": "TRNSACTN", "category": "Financial", "expected_files": ["TRNSACTN.csv"]},
+            "pensions": {"name": "PENSIONS", "category": "Pension", "expected_files": ["PENSIONS.csv"]},
+            "marriage": {"name": "MARRIAGE", "category": "Pension", "expected_files": ["MARRIAGE.csv"]},
+            "death": {"name": "DEATH", "category": "Pension", "expected_files": ["DEATH.csv"]},
+            "separate": {"name": "SEPARATE", "category": "Pension", "expected_files": ["SEPARATE.csv"]},
+            "schedule": {"name": "SCHEDULE", "category": "Pension", "expected_files": ["SCHEDULE.csv"]},
+            "numberal": {"name": "NUMBERAL", "category": "Forms", "expected_files": ["NUMBERAL.csv"]},
+            "inx_lkup": {"name": "INX_LKUP", "category": "Forms", "expected_files": ["INX_LKUP.csv"]},
+            "form_lst": {"name": "FORM_LST", "category": "Forms", "expected_files": ["FORM_LST.csv"]},
+            "form_inx": {"name": "FORM_INX", "category": "Forms", "expected_files": ["FORM_INX.csv"]},
+            "lifetabl": {"name": "LIFETABL", "category": "Forms", "expected_files": ["LIFETABL.csv"]}
+        }
+        
+        # Check status of each table
+        table_status = []
+        for table_name, info in table_info.items():
+            status = {
+                "table_name": table_name,
+                "display_name": info["name"],
+                "category": info["category"],
+                "expected_files": info["expected_files"],
+                "exists": table_name in existing_tables,
+                "row_count": 0,
+                "imported": False
+            }
+            
+            if status["exists"]:
+                try:
+                    # Get row count
+                    result = db.execute(text(f"SELECT COUNT(*) FROM {table_name}"))
+                    status["row_count"] = result.scalar()
+                    status["imported"] = status["row_count"] > 0
+                except Exception as e:
+                    logger.warning(f"Could not get row count for {table_name}: {e}")
+                    status["row_count"] = -1  # Error getting count
+            
+            table_status.append(status)
+        
+        # Group by category
+        categories = {}
+        for status in table_status:
+            category = status["category"]
+            if category not in categories:
+                categories[category] = []
+            categories[category].append(status)
+        
+        # Calculate summary stats
+        total_tables = len(table_status)
+        imported_tables = len([s for s in table_status if s["imported"]])
+        total_rows = sum(s["row_count"] for s in table_status if s["row_count"] > 0)
+        
+        return {
+            "success": True,
+            "summary": {
+                "total_tables": total_tables,
+                "imported_tables": imported_tables,
+                "empty_tables": len([s for s in table_status if s["exists"] and s["row_count"] == 0]),
+                "missing_tables": len([s for s in table_status if not s["exists"]]),
+                "total_rows": total_rows,
+                "completion_percentage": round((imported_tables / total_tables) * 100, 1) if total_tables > 0 else 0
+            },
+            "categories": categories,
+            "tables": table_status
+        }
+        
+    except Exception as e:
+        logger.error(f"Error getting import status overview: {str(e)}")
+        raise HTTPException(status_code=500, detail="Failed to get import status")
+
+
@router.delete("/progress")
 async def cleanup_import_progress(
    current_user: User = Depends(get_admin_user)