work on import

This commit is contained in:
HotSwapp
2025-09-22 22:07:50 -04:00
parent 830ddcc4d1
commit 1116658d40
7 changed files with 1426 additions and 132 deletions

View File

@@ -1,7 +1,7 @@
"""
CSV Import API Endpoints
"""
from fastapi import APIRouter, Depends, File, UploadFile, Form, HTTPException, BackgroundTasks
from fastapi import APIRouter, Depends, File, UploadFile, Form, HTTPException, BackgroundTasks, Body
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Session
from typing import List, Optional, Dict, Any
@@ -54,6 +54,73 @@ async def get_supported_tables(
raise HTTPException(status_code=500, detail="Failed to get supported tables")
@router.get("/discover-files")
async def discover_csv_files(
current_user: User = Depends(get_admin_user)
):
"""Discover available CSV files in the old database directory"""
try:
import os
import glob
from pathlib import Path
# Look for CSV files in the old database directory
base_dir = Path("old database/Office")
csv_files = []
if base_dir.exists():
# Find all CSV files
for csv_file in glob.glob(str(base_dir / "**/*.csv"), recursive=True):
file_path = Path(csv_file)
relative_path = file_path.relative_to(base_dir)
# Try to map to known table types
filename = file_path.stem.upper()
table_mapping = {
"ROLODEX": "rolodex",
"ROLEX_V": "rolodex", # ROLEX_V variant
"PHONE": "phone",
"FILES": "files",
"FILES_R": "files",
"FILES_V": "files",
"LEDGER": "ledger",
"QDROS": "qdros",
"PAYMENTS": "ledger",
"DEPOSITS": "ledger",
"EMPLOYEE": "employee",
"SETUP": "setup",
"FILETYPE": "filetype",
"TRNSTYPE": "trnstype",
"TRNSACTN": "trnsactn",
"TRNSLKUP": "trnslkup",
"PENSIONS": "pensions"
}
suggested_table = table_mapping.get(filename, "unknown")
csv_files.append({
"filename": file_path.name,
"path": str(relative_path),
"full_path": str(file_path),
"suggested_table": suggested_table,
"size": file_path.stat().st_size if file_path.exists() else 0
})
# Sort by filename
csv_files.sort(key=lambda x: x["filename"])
return {
"success": True,
"files": csv_files,
"total": len(csv_files),
"base_directory": str(base_dir)
}
except Exception as e:
logger.error(f"Error discovering CSV files: {str(e)}")
raise HTTPException(status_code=500, detail="Failed to discover CSV files")
@router.get("/tables/{table_name}/schema")
async def get_table_schema(
table_name: str,
@@ -88,9 +155,27 @@ async def validate_csv_headers(
):
"""Validate CSV headers without importing data"""
try:
# Read file content
# Read file content with encoding detection
content = await file.read()
csv_content = content.decode('utf-8')
# Try multiple encodings
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
csv_content = None
used_encoding = None
for encoding in encodings:
try:
csv_content = content.decode(encoding)
used_encoding = encoding
break
except UnicodeDecodeError:
continue
if csv_content is None:
raise HTTPException(
status_code=400,
detail="Could not decode file. Please ensure it's a valid text file."
)
service = ImportService(db)
result = service.validate_csv_headers(table_name, csv_content)
@@ -103,7 +188,7 @@ async def validate_csv_headers(
}
except UnicodeDecodeError:
raise HTTPException(status_code=400, detail="Invalid file encoding. Please use UTF-8.")
raise HTTPException(status_code=400, detail="Could not decode file. Please ensure it's a valid text file.")
except Exception as e:
logger.error(f"Error validating CSV headers: {str(e)}")
raise HTTPException(status_code=500, detail="Failed to validate CSV headers")
@@ -171,9 +256,27 @@ async def import_csv_file(
detail="File must be a CSV file"
)
# Read file content
# Read file content with encoding detection
content = await file.read()
csv_content = content.decode('utf-8')
# Try multiple encodings
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
csv_content = None
used_encoding = None
for encoding in encodings:
try:
csv_content = content.decode(encoding)
used_encoding = encoding
break
except UnicodeDecodeError:
continue
if csv_content is None:
raise HTTPException(
status_code=400,
detail="Could not decode file. Please ensure it's a valid text file."
)
if not csv_content.strip():
raise HTTPException(status_code=400, detail="File is empty")
@@ -208,7 +311,7 @@ async def import_csv_file(
}
except UnicodeDecodeError:
raise HTTPException(status_code=400, detail="Invalid file encoding. Please use UTF-8.")
raise HTTPException(status_code=400, detail="Could not decode file. Please ensure it's a valid text file.")
except HTTPException:
raise
except Exception as e:
@@ -238,6 +341,9 @@ async def get_import_status(
if progress.result:
response["result"] = progress.result.to_dict()
# Also include error details if the import failed
if not progress.result.success and progress.result.errors:
response["error"] = "; ".join(progress.result.errors[:3])
elif progress.error:
response["error"] = progress.error
@@ -284,9 +390,27 @@ async def batch_import_csv(
detail=f"File {file.filename} must be a CSV file"
)
# Read file content
# Read file content with encoding detection
content = await file.read()
csv_content = content.decode('utf-8')
# Try multiple encodings
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
csv_content = None
used_encoding = None
for encoding in encodings:
try:
csv_content = content.decode(encoding)
used_encoding = encoding
break
except UnicodeDecodeError:
continue
if csv_content is None:
raise HTTPException(
status_code=400,
detail="Could not decode file. Please ensure it's a valid text file."
)
if not csv_content.strip():
raise HTTPException(
@@ -330,6 +454,9 @@ async def batch_import_csv(
if result:
progress.status = "COMPLETED" if result.success else "FAILED"
progress.result = result
# If import failed, capture the error details
if not result.success and result.errors:
progress.error = "; ".join(result.errors[:3]) # Show first 3 errors
else:
progress.status = "FAILED"
progress.error = "No result found"
@@ -366,6 +493,252 @@ async def batch_import_csv(
raise HTTPException(status_code=500, detail="Failed to start batch import")
@router.post("/batch-from-files")
async def batch_import_from_files(
background_tasks: BackgroundTasks,
file_mappings: List[Dict[str, str]] = Body(...),
current_user: User = Depends(get_admin_user),
db: Session = Depends(get_db)
):
"""Import multiple CSV files from discovered file paths"""
try:
from pathlib import Path
if not file_mappings:
raise HTTPException(
status_code=400,
detail="No file mappings provided"
)
imports = []
import_ids = []
for mapping in file_mappings:
file_path = mapping.get("file_path")
table_name = mapping.get("table_name")
if not file_path or not table_name:
raise HTTPException(
status_code=400,
detail="Each mapping must have file_path and table_name"
)
# Validate table name
if table_name.lower() not in [t.value for t in TableType]:
raise HTTPException(
status_code=400,
detail=f"Unsupported table: {table_name}"
)
# Read file content
full_path = Path(file_path)
if not full_path.exists():
raise HTTPException(
status_code=400,
detail=f"File not found: {file_path}"
)
# Read file content with encoding detection
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
csv_content = None
used_encoding = None
for encoding in encodings:
try:
with open(full_path, 'r', encoding=encoding) as f:
csv_content = f.read()
used_encoding = encoding
break
except UnicodeDecodeError:
continue
if csv_content is None:
raise HTTPException(
status_code=400,
detail=f"Could not decode file {file_path}. Please ensure it's a valid text file."
)
if not csv_content.strip():
raise HTTPException(
status_code=400,
detail=f"File {file_path} is empty"
)
imports.append({
"table_name": table_name,
"csv_content": csv_content,
"filename": full_path.name
})
# Generate import ID for tracking
import_id = str(uuid.uuid4())
import_ids.append(import_id)
# Create progress tracker
progress = ImportStatus(import_id, table_name)
import_progress[import_id] = progress
# Process batch import in background
async def process_batch_background():
try:
service = ImportService(db)
results = service.batch_import(imports)
# Update progress for each import
for i, import_id in enumerate(import_ids):
if import_id in import_progress:
progress = import_progress[import_id]
table_name = progress.table_name
# Find result for this table
result = None
for key, res in results.items():
if key.startswith(table_name):
result = res
break
if result:
progress.status = "COMPLETED" if result.success else "FAILED"
progress.result = result
# If import failed, capture the error details
if not result.success and result.errors:
progress.error = "; ".join(result.errors[:3]) # Show first 3 errors
else:
progress.status = "FAILED"
progress.error = "No result found"
progress.completed_at = datetime.utcnow()
except Exception as e:
logger.error(f"Batch import failed: {str(e)}")
for import_id in import_ids:
if import_id in import_progress:
progress = import_progress[import_id]
progress.status = "FAILED"
progress.error = str(e)
progress.completed_at = datetime.utcnow()
background_tasks.add_task(process_batch_background)
logger.info(f"Started batch import from files with {len(imports)} files")
return {
"success": True,
"import_ids": import_ids,
"total_files": len(imports),
"status": "PROCESSING",
"message": "Batch import from files started successfully"
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error starting batch import from files: {str(e)}")
raise HTTPException(status_code=500, detail="Failed to start batch import")
@router.get("/status")
async def get_import_status_overview(
current_user: User = Depends(get_admin_user),
db: Session = Depends(get_db)
):
"""Get overview of which tables have been successfully imported"""
try:
from sqlalchemy import inspect, text
# Get list of all tables in the database
inspector = inspect(db.bind)
existing_tables = inspector.get_table_names()
# Define all possible table types and their display names
table_info = {
"rolodex": {"name": "ROLODEX (Contacts)", "category": "Core", "expected_files": ["ROLODEX.csv", "ROLEX_V.csv"]},
"phone": {"name": "PHONE (Phone Numbers)", "category": "Core", "expected_files": ["PHONE.csv"]},
"files": {"name": "FILES (Case Files)", "category": "Core", "expected_files": ["FILES.csv", "FILES_R.csv", "FILES_V.csv"]},
"ledger": {"name": "LEDGER (Financial)", "category": "Core", "expected_files": ["LEDGER.csv"]},
"qdros": {"name": "QDROS (Documents)", "category": "Core", "expected_files": ["QDROS.csv"]},
"gruplkup": {"name": "GRUPLKUP", "category": "Lookup", "expected_files": ["GRUPLKUP.csv"]},
"employee": {"name": "EMPLOYEE", "category": "Lookup", "expected_files": ["EMPLOYEE.csv"]},
"filetype": {"name": "FILETYPE", "category": "Lookup", "expected_files": ["FILETYPE.csv"]},
"trnstype": {"name": "TRNSTYPE", "category": "Lookup", "expected_files": ["TRNSTYPE.csv"]},
"trnslkup": {"name": "TRNSLKUP", "category": "Lookup", "expected_files": ["TRNSLKUP.csv"]},
"rvarlkup": {"name": "RVARLKUP", "category": "Lookup", "expected_files": ["RVARLKUP.csv"]},
"fvarlkup": {"name": "FVARLKUP", "category": "Lookup", "expected_files": ["FVARLKUP.csv"]},
"filenots": {"name": "FILENOTS", "category": "Lookup", "expected_files": ["FILENOTS.csv"]},
"planinfo": {"name": "PLANINFO", "category": "Lookup", "expected_files": ["PLANINFO.csv"]},
"setup": {"name": "SETUP", "category": "Configuration", "expected_files": ["SETUP.csv"]},
"deposits": {"name": "DEPOSITS", "category": "Financial", "expected_files": ["DEPOSITS.csv"]},
"payments": {"name": "PAYMENTS", "category": "Financial", "expected_files": ["PAYMENTS.csv"]},
"trnsactn": {"name": "TRNSACTN", "category": "Financial", "expected_files": ["TRNSACTN.csv"]},
"pensions": {"name": "PENSIONS", "category": "Pension", "expected_files": ["PENSIONS.csv"]},
"marriage": {"name": "MARRIAGE", "category": "Pension", "expected_files": ["MARRIAGE.csv"]},
"death": {"name": "DEATH", "category": "Pension", "expected_files": ["DEATH.csv"]},
"separate": {"name": "SEPARATE", "category": "Pension", "expected_files": ["SEPARATE.csv"]},
"schedule": {"name": "SCHEDULE", "category": "Pension", "expected_files": ["SCHEDULE.csv"]},
"numberal": {"name": "NUMBERAL", "category": "Forms", "expected_files": ["NUMBERAL.csv"]},
"inx_lkup": {"name": "INX_LKUP", "category": "Forms", "expected_files": ["INX_LKUP.csv"]},
"form_lst": {"name": "FORM_LST", "category": "Forms", "expected_files": ["FORM_LST.csv"]},
"form_inx": {"name": "FORM_INX", "category": "Forms", "expected_files": ["FORM_INX.csv"]},
"lifetabl": {"name": "LIFETABL", "category": "Forms", "expected_files": ["LIFETABL.csv"]}
}
# Check status of each table
table_status = []
for table_name, info in table_info.items():
status = {
"table_name": table_name,
"display_name": info["name"],
"category": info["category"],
"expected_files": info["expected_files"],
"exists": table_name in existing_tables,
"row_count": 0,
"imported": False
}
if status["exists"]:
try:
# Get row count
result = db.execute(text(f"SELECT COUNT(*) FROM {table_name}"))
status["row_count"] = result.scalar()
status["imported"] = status["row_count"] > 0
except Exception as e:
logger.warning(f"Could not get row count for {table_name}: {e}")
status["row_count"] = -1 # Error getting count
table_status.append(status)
# Group by category
categories = {}
for status in table_status:
category = status["category"]
if category not in categories:
categories[category] = []
categories[category].append(status)
# Calculate summary stats
total_tables = len(table_status)
imported_tables = len([s for s in table_status if s["imported"]])
total_rows = sum(s["row_count"] for s in table_status if s["row_count"] > 0)
return {
"success": True,
"summary": {
"total_tables": total_tables,
"imported_tables": imported_tables,
"empty_tables": len([s for s in table_status if s["exists"] and s["row_count"] == 0]),
"missing_tables": len([s for s in table_status if not s["exists"]]),
"total_rows": total_rows,
"completion_percentage": round((imported_tables / total_tables) * 100, 1) if total_tables > 0 else 0
},
"categories": categories,
"tables": table_status
}
except Exception as e:
logger.error(f"Error getting import status overview: {str(e)}")
raise HTTPException(status_code=500, detail="Failed to get import status")
@router.delete("/progress")
async def cleanup_import_progress(
current_user: User = Depends(get_admin_user)