work on import

This commit is contained in:
HotSwapp
2025-09-22 22:07:50 -04:00
parent 830ddcc4d1
commit 1116658d40
7 changed files with 1426 additions and 132 deletions

View File

@@ -1,7 +1,7 @@
"""
CSV Import API Endpoints
"""
from fastapi import APIRouter, Depends, File, UploadFile, Form, HTTPException, BackgroundTasks
from fastapi import APIRouter, Depends, File, UploadFile, Form, HTTPException, BackgroundTasks, Body
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Session
from typing import List, Optional, Dict, Any
@@ -54,6 +54,73 @@ async def get_supported_tables(
raise HTTPException(status_code=500, detail="Failed to get supported tables")
@router.get("/discover-files")
async def discover_csv_files(
current_user: User = Depends(get_admin_user)
):
"""Discover available CSV files in the old database directory"""
try:
import os
import glob
from pathlib import Path
# Look for CSV files in the old database directory
base_dir = Path("old database/Office")
csv_files = []
if base_dir.exists():
# Find all CSV files
for csv_file in glob.glob(str(base_dir / "**/*.csv"), recursive=True):
file_path = Path(csv_file)
relative_path = file_path.relative_to(base_dir)
# Try to map to known table types
filename = file_path.stem.upper()
table_mapping = {
"ROLODEX": "rolodex",
"ROLEX_V": "rolodex", # ROLEX_V variant
"PHONE": "phone",
"FILES": "files",
"FILES_R": "files",
"FILES_V": "files",
"LEDGER": "ledger",
"QDROS": "qdros",
"PAYMENTS": "ledger",
"DEPOSITS": "ledger",
"EMPLOYEE": "employee",
"SETUP": "setup",
"FILETYPE": "filetype",
"TRNSTYPE": "trnstype",
"TRNSACTN": "trnsactn",
"TRNSLKUP": "trnslkup",
"PENSIONS": "pensions"
}
suggested_table = table_mapping.get(filename, "unknown")
csv_files.append({
"filename": file_path.name,
"path": str(relative_path),
"full_path": str(file_path),
"suggested_table": suggested_table,
"size": file_path.stat().st_size if file_path.exists() else 0
})
# Sort by filename
csv_files.sort(key=lambda x: x["filename"])
return {
"success": True,
"files": csv_files,
"total": len(csv_files),
"base_directory": str(base_dir)
}
except Exception as e:
logger.error(f"Error discovering CSV files: {str(e)}")
raise HTTPException(status_code=500, detail="Failed to discover CSV files")
@router.get("/tables/{table_name}/schema")
async def get_table_schema(
table_name: str,
@@ -88,9 +155,27 @@ async def validate_csv_headers(
):
"""Validate CSV headers without importing data"""
try:
# Read file content
# Read file content with encoding detection
content = await file.read()
csv_content = content.decode('utf-8')
# Try multiple encodings
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
csv_content = None
used_encoding = None
for encoding in encodings:
try:
csv_content = content.decode(encoding)
used_encoding = encoding
break
except UnicodeDecodeError:
continue
if csv_content is None:
raise HTTPException(
status_code=400,
detail="Could not decode file. Please ensure it's a valid text file."
)
service = ImportService(db)
result = service.validate_csv_headers(table_name, csv_content)
@@ -103,7 +188,7 @@ async def validate_csv_headers(
}
except UnicodeDecodeError:
raise HTTPException(status_code=400, detail="Invalid file encoding. Please use UTF-8.")
raise HTTPException(status_code=400, detail="Could not decode file. Please ensure it's a valid text file.")
except Exception as e:
logger.error(f"Error validating CSV headers: {str(e)}")
raise HTTPException(status_code=500, detail="Failed to validate CSV headers")
@@ -171,9 +256,27 @@ async def import_csv_file(
detail="File must be a CSV file"
)
# Read file content
# Read file content with encoding detection
content = await file.read()
csv_content = content.decode('utf-8')
# Try multiple encodings
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
csv_content = None
used_encoding = None
for encoding in encodings:
try:
csv_content = content.decode(encoding)
used_encoding = encoding
break
except UnicodeDecodeError:
continue
if csv_content is None:
raise HTTPException(
status_code=400,
detail="Could not decode file. Please ensure it's a valid text file."
)
if not csv_content.strip():
raise HTTPException(status_code=400, detail="File is empty")
@@ -208,7 +311,7 @@ async def import_csv_file(
}
except UnicodeDecodeError:
raise HTTPException(status_code=400, detail="Invalid file encoding. Please use UTF-8.")
raise HTTPException(status_code=400, detail="Could not decode file. Please ensure it's a valid text file.")
except HTTPException:
raise
except Exception as e:
@@ -238,6 +341,9 @@ async def get_import_status(
if progress.result:
response["result"] = progress.result.to_dict()
# Also include error details if the import failed
if not progress.result.success and progress.result.errors:
response["error"] = "; ".join(progress.result.errors[:3])
elif progress.error:
response["error"] = progress.error
@@ -284,9 +390,27 @@ async def batch_import_csv(
detail=f"File {file.filename} must be a CSV file"
)
# Read file content
# Read file content with encoding detection
content = await file.read()
csv_content = content.decode('utf-8')
# Try multiple encodings
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
csv_content = None
used_encoding = None
for encoding in encodings:
try:
csv_content = content.decode(encoding)
used_encoding = encoding
break
except UnicodeDecodeError:
continue
if csv_content is None:
raise HTTPException(
status_code=400,
detail="Could not decode file. Please ensure it's a valid text file."
)
if not csv_content.strip():
raise HTTPException(
@@ -330,6 +454,9 @@ async def batch_import_csv(
if result:
progress.status = "COMPLETED" if result.success else "FAILED"
progress.result = result
# If import failed, capture the error details
if not result.success and result.errors:
progress.error = "; ".join(result.errors[:3]) # Show first 3 errors
else:
progress.status = "FAILED"
progress.error = "No result found"
@@ -366,6 +493,252 @@ async def batch_import_csv(
raise HTTPException(status_code=500, detail="Failed to start batch import")
@router.post("/batch-from-files")
async def batch_import_from_files(
background_tasks: BackgroundTasks,
file_mappings: List[Dict[str, str]] = Body(...),
current_user: User = Depends(get_admin_user),
db: Session = Depends(get_db)
):
"""Import multiple CSV files from discovered file paths"""
try:
from pathlib import Path
if not file_mappings:
raise HTTPException(
status_code=400,
detail="No file mappings provided"
)
imports = []
import_ids = []
for mapping in file_mappings:
file_path = mapping.get("file_path")
table_name = mapping.get("table_name")
if not file_path or not table_name:
raise HTTPException(
status_code=400,
detail="Each mapping must have file_path and table_name"
)
# Validate table name
if table_name.lower() not in [t.value for t in TableType]:
raise HTTPException(
status_code=400,
detail=f"Unsupported table: {table_name}"
)
# Read file content
full_path = Path(file_path)
if not full_path.exists():
raise HTTPException(
status_code=400,
detail=f"File not found: {file_path}"
)
# Read file content with encoding detection
encodings = ['utf-8', 'windows-1252', 'iso-8859-1', 'cp1252', 'latin-1']
csv_content = None
used_encoding = None
for encoding in encodings:
try:
with open(full_path, 'r', encoding=encoding) as f:
csv_content = f.read()
used_encoding = encoding
break
except UnicodeDecodeError:
continue
if csv_content is None:
raise HTTPException(
status_code=400,
detail=f"Could not decode file {file_path}. Please ensure it's a valid text file."
)
if not csv_content.strip():
raise HTTPException(
status_code=400,
detail=f"File {file_path} is empty"
)
imports.append({
"table_name": table_name,
"csv_content": csv_content,
"filename": full_path.name
})
# Generate import ID for tracking
import_id = str(uuid.uuid4())
import_ids.append(import_id)
# Create progress tracker
progress = ImportStatus(import_id, table_name)
import_progress[import_id] = progress
# Process batch import in background
async def process_batch_background():
try:
service = ImportService(db)
results = service.batch_import(imports)
# Update progress for each import
for i, import_id in enumerate(import_ids):
if import_id in import_progress:
progress = import_progress[import_id]
table_name = progress.table_name
# Find result for this table
result = None
for key, res in results.items():
if key.startswith(table_name):
result = res
break
if result:
progress.status = "COMPLETED" if result.success else "FAILED"
progress.result = result
# If import failed, capture the error details
if not result.success and result.errors:
progress.error = "; ".join(result.errors[:3]) # Show first 3 errors
else:
progress.status = "FAILED"
progress.error = "No result found"
progress.completed_at = datetime.utcnow()
except Exception as e:
logger.error(f"Batch import failed: {str(e)}")
for import_id in import_ids:
if import_id in import_progress:
progress = import_progress[import_id]
progress.status = "FAILED"
progress.error = str(e)
progress.completed_at = datetime.utcnow()
background_tasks.add_task(process_batch_background)
logger.info(f"Started batch import from files with {len(imports)} files")
return {
"success": True,
"import_ids": import_ids,
"total_files": len(imports),
"status": "PROCESSING",
"message": "Batch import from files started successfully"
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error starting batch import from files: {str(e)}")
raise HTTPException(status_code=500, detail="Failed to start batch import")
@router.get("/status")
async def get_import_status_overview(
current_user: User = Depends(get_admin_user),
db: Session = Depends(get_db)
):
"""Get overview of which tables have been successfully imported"""
try:
from sqlalchemy import inspect, text
# Get list of all tables in the database
inspector = inspect(db.bind)
existing_tables = inspector.get_table_names()
# Define all possible table types and their display names
table_info = {
"rolodex": {"name": "ROLODEX (Contacts)", "category": "Core", "expected_files": ["ROLODEX.csv", "ROLEX_V.csv"]},
"phone": {"name": "PHONE (Phone Numbers)", "category": "Core", "expected_files": ["PHONE.csv"]},
"files": {"name": "FILES (Case Files)", "category": "Core", "expected_files": ["FILES.csv", "FILES_R.csv", "FILES_V.csv"]},
"ledger": {"name": "LEDGER (Financial)", "category": "Core", "expected_files": ["LEDGER.csv"]},
"qdros": {"name": "QDROS (Documents)", "category": "Core", "expected_files": ["QDROS.csv"]},
"gruplkup": {"name": "GRUPLKUP", "category": "Lookup", "expected_files": ["GRUPLKUP.csv"]},
"employee": {"name": "EMPLOYEE", "category": "Lookup", "expected_files": ["EMPLOYEE.csv"]},
"filetype": {"name": "FILETYPE", "category": "Lookup", "expected_files": ["FILETYPE.csv"]},
"trnstype": {"name": "TRNSTYPE", "category": "Lookup", "expected_files": ["TRNSTYPE.csv"]},
"trnslkup": {"name": "TRNSLKUP", "category": "Lookup", "expected_files": ["TRNSLKUP.csv"]},
"rvarlkup": {"name": "RVARLKUP", "category": "Lookup", "expected_files": ["RVARLKUP.csv"]},
"fvarlkup": {"name": "FVARLKUP", "category": "Lookup", "expected_files": ["FVARLKUP.csv"]},
"filenots": {"name": "FILENOTS", "category": "Lookup", "expected_files": ["FILENOTS.csv"]},
"planinfo": {"name": "PLANINFO", "category": "Lookup", "expected_files": ["PLANINFO.csv"]},
"setup": {"name": "SETUP", "category": "Configuration", "expected_files": ["SETUP.csv"]},
"deposits": {"name": "DEPOSITS", "category": "Financial", "expected_files": ["DEPOSITS.csv"]},
"payments": {"name": "PAYMENTS", "category": "Financial", "expected_files": ["PAYMENTS.csv"]},
"trnsactn": {"name": "TRNSACTN", "category": "Financial", "expected_files": ["TRNSACTN.csv"]},
"pensions": {"name": "PENSIONS", "category": "Pension", "expected_files": ["PENSIONS.csv"]},
"marriage": {"name": "MARRIAGE", "category": "Pension", "expected_files": ["MARRIAGE.csv"]},
"death": {"name": "DEATH", "category": "Pension", "expected_files": ["DEATH.csv"]},
"separate": {"name": "SEPARATE", "category": "Pension", "expected_files": ["SEPARATE.csv"]},
"schedule": {"name": "SCHEDULE", "category": "Pension", "expected_files": ["SCHEDULE.csv"]},
"numberal": {"name": "NUMBERAL", "category": "Forms", "expected_files": ["NUMBERAL.csv"]},
"inx_lkup": {"name": "INX_LKUP", "category": "Forms", "expected_files": ["INX_LKUP.csv"]},
"form_lst": {"name": "FORM_LST", "category": "Forms", "expected_files": ["FORM_LST.csv"]},
"form_inx": {"name": "FORM_INX", "category": "Forms", "expected_files": ["FORM_INX.csv"]},
"lifetabl": {"name": "LIFETABL", "category": "Forms", "expected_files": ["LIFETABL.csv"]}
}
# Check status of each table
table_status = []
for table_name, info in table_info.items():
status = {
"table_name": table_name,
"display_name": info["name"],
"category": info["category"],
"expected_files": info["expected_files"],
"exists": table_name in existing_tables,
"row_count": 0,
"imported": False
}
if status["exists"]:
try:
# Get row count
result = db.execute(text(f"SELECT COUNT(*) FROM {table_name}"))
status["row_count"] = result.scalar()
status["imported"] = status["row_count"] > 0
except Exception as e:
logger.warning(f"Could not get row count for {table_name}: {e}")
status["row_count"] = -1 # Error getting count
table_status.append(status)
# Group by category
categories = {}
for status in table_status:
category = status["category"]
if category not in categories:
categories[category] = []
categories[category].append(status)
# Calculate summary stats
total_tables = len(table_status)
imported_tables = len([s for s in table_status if s["imported"]])
total_rows = sum(s["row_count"] for s in table_status if s["row_count"] > 0)
return {
"success": True,
"summary": {
"total_tables": total_tables,
"imported_tables": imported_tables,
"empty_tables": len([s for s in table_status if s["exists"] and s["row_count"] == 0]),
"missing_tables": len([s for s in table_status if not s["exists"]]),
"total_rows": total_rows,
"completion_percentage": round((imported_tables / total_tables) * 100, 1) if total_tables > 0 else 0
},
"categories": categories,
"tables": table_status
}
except Exception as e:
logger.error(f"Error getting import status overview: {str(e)}")
raise HTTPException(status_code=500, detail="Failed to get import status")
@router.delete("/progress")
async def cleanup_import_progress(
current_user: User = Depends(get_admin_user)

View File

@@ -0,0 +1,377 @@
"""
Generic CSV Importer - handles any CSV structure dynamically
"""
import csv
import io
import logging
import re
from typing import Dict, Any, List, Optional
from datetime import datetime
from sqlalchemy import text, Column, String, Integer, Text, MetaData, Table, create_engine, Date
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError
from .base import BaseCSVImporter, ImportResult
logger = logging.getLogger(__name__)
class GenericCSVImporter(BaseCSVImporter):
"""Generic importer that can handle any CSV structure by creating tables dynamically"""
def __init__(self, db_session: Session, table_name: str, import_id: str = None):
# Set table name first, before calling super().__init__()
# because BaseCSVImporter.__init__ calls self.table_name
self._table_name = table_name.lower()
self.dynamic_table = None
self.csv_headers = []
super().__init__(db_session, import_id)
@property
def table_name(self) -> str:
return self._table_name
@property
def required_fields(self) -> List[str]:
"""No required fields for generic import"""
return []
@property
def field_mapping(self) -> Dict[str, str]:
"""Dynamic mapping based on CSV headers"""
if self.csv_headers:
mapping = {}
for header in self.csv_headers:
safe_name = self._make_safe_name(header)
# Handle 'id' column renaming for conflict avoidance
if safe_name.lower() == 'id':
safe_name = 'csv_id'
mapping[header] = safe_name
return mapping
return {}
def create_model_instance(self, row_data: Dict[str, Any]) -> Dict[str, Any]:
"""For generic import, just return the processed row data"""
return row_data
def create_dynamic_table(self, headers: List[str]) -> Table:
"""Create a table dynamically based on CSV headers"""
try:
# Create metadata
metadata = MetaData()
# Clean table name
safe_table_name = self._make_safe_name(self.table_name)
# Check if table already exists BEFORE creating the Table object
from sqlalchemy import inspect
inspector = inspect(self.db_session.bind)
existing_tables = inspector.get_table_names()
if safe_table_name in existing_tables:
logger.info(f"Table '{safe_table_name}' already exists, using unique table name")
# Instead of trying to drop, create a new table with timestamp suffix
import time
timestamp = str(int(time.time()))
safe_table_name = f"{safe_table_name}_{timestamp}"
logger.info(f"Creating new table with unique name: '{safe_table_name}'")
else:
logger.info(f"Creating new table: '{safe_table_name}'")
# Create columns dynamically
columns = [Column('id', Integer, primary_key=True, autoincrement=True)]
for header in headers:
if header and header.strip():
safe_column_name = self._make_safe_name(header.strip())
# Skip if this would create a duplicate 'id' column
if safe_column_name.lower() == 'id':
# Rename the CSV column to avoid conflict with auto-generated id
safe_column_name = 'csv_id'
columns.append(Column(safe_column_name, Text))
# Create table with the final table name
table = Table(safe_table_name, metadata, *columns)
# Store the actual table name for use in data insertion
self.actual_table_name = safe_table_name
self._table_name = safe_table_name # Update the stored table name to use the timestamped version
logger.info(f"Using table name for data insertion: '{safe_table_name}'")
# Create the table in the database with retry logic for locks
max_retries = 3
retry_delay = 1.0
for attempt in range(max_retries):
try:
# Use explicit transaction to avoid deadlocks
self.db_session.begin()
metadata.create_all(self.db_session.bind)
self.db_session.commit()
logger.info(f"Created dynamic table '{safe_table_name}' with {len(columns)} columns")
return table
except Exception as create_error:
self.db_session.rollback()
if "database is locked" in str(create_error).lower() and attempt < max_retries - 1:
import time
logger.warning(f"Database locked, retrying in {retry_delay}s (attempt {attempt + 1}/{max_retries})")
time.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
continue
elif "already present" in str(create_error).lower():
# Table was created by another process, reflect it
logger.info(f"Table '{safe_table_name}' created by another process, reflecting existing table")
try:
metadata.reflect(bind=self.db_session.bind, only=[safe_table_name])
return metadata.tables[safe_table_name]
except Exception:
# If reflection fails, re-raise original error
raise create_error
else:
# Re-raise if not a recoverable error
raise create_error
except Exception as e:
logger.error(f"Error creating dynamic table: {e}")
raise
def _make_safe_name(self, name: str) -> str:
"""Make a database-safe name from any string"""
import re
# Remove special characters and replace with underscore
safe_name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
# Remove multiple underscores
safe_name = re.sub(r'_+', '_', safe_name)
# Remove trailing underscore
safe_name = safe_name.strip('_')
# Ensure it's not empty
if not safe_name:
safe_name = 'unnamed_column'
# Special handling for purely numeric names or names starting with numbers
if safe_name.isdigit() or (safe_name and safe_name[0].isdigit()):
safe_name = f'col_{safe_name}'
# Ensure it starts with a letter or underscore (final check)
elif safe_name and not (safe_name[0].isalpha() or safe_name[0] == '_'):
safe_name = 'col_' + safe_name
return safe_name.lower()
def _parse_date_value(self, value: str) -> Optional[str]:
"""Try to parse a date value and return it in ISO format"""
if not value or value.strip() == '':
return None
value = value.strip()
# Common date formats to try
date_formats = [
'%m/%d/%Y', # MM/DD/YYYY
'%m/%d/%y', # MM/DD/YY
'%Y-%m-%d', # YYYY-MM-DD
'%d/%m/%Y', # DD/MM/YYYY
'%d-%m-%Y', # DD-MM-YYYY
'%Y/%m/%d', # YYYY/MM/DD
]
for fmt in date_formats:
try:
parsed_date = datetime.strptime(value, fmt)
return parsed_date.strftime('%Y-%m-%d') # Return in ISO format
except ValueError:
continue
# If no format matches, return the original value
return value
def process_csv_content(self, csv_content: str, encoding: str = "utf-8") -> ImportResult:
"""Override the main processing method to handle dynamic table creation"""
try:
# Preprocess CSV content to handle common issues
# Remove trailing empty lines and normalize line endings
lines = csv_content.strip().splitlines()
# Remove empty lines that might cause parsing issues
non_empty_lines = [line for line in lines if line.strip()]
if not non_empty_lines:
result = ImportResult()
result.add_error("CSV file is empty or contains only empty lines")
return result
# Reconstruct CSV content with clean line endings
cleaned_csv_content = '\n'.join(non_empty_lines)
# Parse CSV and get headers with flexible parsing
# Handle various CSV format issues including embedded newlines
csv_file = io.StringIO(cleaned_csv_content)
# Try with different CSV dialect configurations
headers = None
parsing_strategies = [
# Strategy 1: Standard CSV parsing
lambda f: csv.DictReader(f),
# Strategy 2: Handle newlines in fields with strict quoting
lambda f: csv.DictReader(f, skipinitialspace=True, quoting=csv.QUOTE_MINIMAL, strict=False),
# Strategy 3: More flexible quoting
lambda f: csv.DictReader(f, quoting=csv.QUOTE_ALL, strict=False),
# Strategy 4: Excel dialect
lambda f: csv.DictReader(f, dialect='excel'),
# Strategy 5: Unix dialect
lambda f: csv.DictReader(f, dialect='unix'),
# Strategy 6: Very permissive - ignore malformed lines
lambda f: csv.DictReader(f, quoting=csv.QUOTE_NONE, escapechar='\\', strict=False)
]
for i, strategy in enumerate(parsing_strategies):
try:
csv_file.seek(0)
csv_reader = strategy(csv_file)
headers = csv_reader.fieldnames
if headers:
logger.debug(f"CSV parsing successful with strategy {i+1}")
break
except (csv.Error, UnicodeDecodeError) as e:
logger.debug(f"CSV parsing strategy {i+1} failed: {e}")
continue
if not headers:
result = ImportResult()
result.add_error("No headers found in CSV file")
return result
# Store headers and create dynamic table
self.csv_headers = [h.strip() for h in headers if h and h.strip()]
if not self.csv_headers:
result = ImportResult()
result.add_error("No valid headers found in CSV file")
return result
self.dynamic_table = self.create_dynamic_table(self.csv_headers)
# Reset reader and process rows with the same successful parsing strategy
csv_file = io.StringIO(cleaned_csv_content)
csv_reader = None
# Use the same parsing strategies to ensure consistency
for i, strategy in enumerate(parsing_strategies):
try:
csv_file.seek(0)
csv_reader = strategy(csv_file)
# Test that it works by trying to read headers
test_headers = csv_reader.fieldnames
if test_headers:
logger.debug(f"Data parsing using strategy {i+1}")
break
except (csv.Error, UnicodeDecodeError) as e:
logger.debug(f"Data parsing strategy {i+1} failed: {e}")
continue
if not csv_reader:
result = ImportResult()
result.add_error("Unable to parse CSV file with any available strategy")
return result
imported_count = 0
error_count = 0
total_count = 0
# Check if file has any data rows
rows = list(csv_reader)
if not rows:
logger.info(f"CSV file for table '{self.table_name}' contains headers only, no data rows to import")
self.result.success = True
self.result.total_rows = 0
self.result.imported_rows = 0
self.result.error_rows = 0
self.result.add_warning("File contains headers only, no data rows found")
return self.result
# Process all rows in a single transaction
try:
self.db_session.begin()
for row_num, row in enumerate(rows, start=2):
total_count += 1
try:
# Prepare row data
row_data = {}
for header in self.csv_headers:
safe_column_name = self._make_safe_name(header)
# Handle 'id' column renaming for conflict avoidance
if safe_column_name.lower() == 'id':
safe_column_name = 'csv_id'
value = row.get(header, '').strip() if row.get(header) else None
# Convert empty strings to None for better database handling
if value == '':
value = None
elif value and ('date' in header.lower() or 'time' in header.lower()):
# Try to parse date values for better format consistency
value = self._parse_date_value(value)
row_data[safe_column_name] = value
# Insert into database with conflict resolution
# Use INSERT OR IGNORE to handle potential duplicates gracefully
# Use the actual table name (which may have timestamp suffix) instead of dynamic_table.name
table_name = getattr(self, 'actual_table_name', self.dynamic_table.name)
logger.debug(f"Inserting into table: '{table_name}' (original: '{self._table_name}', dynamic: '{self.dynamic_table.name}')")
columns = list(row_data.keys())
values = list(row_data.values())
placeholders = ', '.join([':param' + str(i) for i in range(len(values))])
column_names = ', '.join(columns)
# Create parameter dictionary for SQLAlchemy
params = {f'param{i}': value for i, value in enumerate(values)}
ignore_sql = f"INSERT OR IGNORE INTO {table_name} ({column_names}) VALUES ({placeholders})"
result = self.db_session.execute(text(ignore_sql), params)
# Check if the row was actually inserted (rowcount > 0) or ignored (rowcount = 0)
if result.rowcount == 0:
logger.debug(f"Row {row_num}: Skipped duplicate record")
else:
logger.debug(f"Row {row_num}: Inserted successfully")
imported_count += 1
except Exception as e:
error_count += 1
error_msg = str(e)
# Provide more specific error messages for common database issues
if "NOT NULL constraint failed" in error_msg:
self.result.add_error(f"Row {row_num}: Missing required value in column")
elif "UNIQUE constraint failed" in error_msg:
self.result.add_error(f"Row {row_num}: Duplicate value detected")
elif "no such column" in error_msg:
self.result.add_error(f"Row {row_num}: Column structure mismatch")
else:
self.result.add_error(f"Row {row_num}: {error_msg}")
logger.warning(f"Error importing row {row_num}: {e}")
continue
# Commit all changes
self.db_session.commit()
except Exception as transaction_error:
self.db_session.rollback()
logger.error(f"Transaction failed, rolled back: {transaction_error}")
self.result.add_error(f"Transaction failed: {str(transaction_error)}")
# Update result
self.result.success = imported_count > 0
self.result.total_rows = total_count
self.result.imported_rows = imported_count
self.result.error_rows = error_count
if imported_count > 0:
logger.info(f"Successfully imported {imported_count} rows into {self.table_name}")
return self.result
except Exception as e:
logger.error(f"Error during CSV import: {e}")
self.result.add_error(f"Import failed: {str(e)}")
return self.result

View File

@@ -12,6 +12,7 @@ from .phone_importer import PhoneCSVImporter
from .files_importer import FilesCSVImporter
from .ledger_importer import LedgerCSVImporter
from .qdros_importer import QdrosCSVImporter
from .generic_importer import GenericCSVImporter
logger = logging.getLogger(__name__)
@@ -23,6 +24,32 @@ class TableType(Enum):
FILES = "files"
LEDGER = "ledger"
QDROS = "qdros"
# Generic table types for all other CSV files
GRUPLKUP = "gruplkup"
EMPLOYEE = "employee"
SETUP = "setup"
FILETYPE = "filetype"
TRNSTYPE = "trnstype"
TRNSACTN = "trnsactn"
TRNSLKUP = "trnslkup"
RVARLKUP = "rvarlkup"
FVARLKUP = "fvarlkup"
FILENOTS = "filenots"
DEPOSITS = "deposits"
PAYMENTS = "payments"
PENSIONS = "pensions"
PLANINFO = "planinfo"
# Form tables
NUMBERAL = "numberal"
INX_LKUP = "inx_lkup"
FORM_LST = "form_lst"
FORM_INX = "form_inx"
LIFETABL = "lifetabl"
# Pension tables
MARRIAGE = "marriage"
DEATH = "death"
SEPARATE = "separate"
SCHEDULE = "schedule"
class ImportService:
@@ -31,11 +58,36 @@ class ImportService:
def __init__(self, db_session: Session):
self.db_session = db_session
self._importers = {
TableType.ROLODEX: RolodexCSVImporter,
TableType.PHONE: PhoneCSVImporter,
TableType.FILES: FilesCSVImporter,
TableType.LEDGER: LedgerCSVImporter,
TableType.QDROS: QdrosCSVImporter
# Use generic importers for all tables to handle legacy CSV structure variations
TableType.ROLODEX: GenericCSVImporter, # Use generic importer for rolodex (more flexible)
TableType.PHONE: GenericCSVImporter, # Use generic importer for phone
TableType.FILES: GenericCSVImporter, # Use generic importer for files
TableType.LEDGER: GenericCSVImporter, # Use generic importer for ledger (to avoid FK issues)
TableType.QDROS: GenericCSVImporter, # Use generic importer for qdros (to avoid FK issues)
# Generic importer for all other tables
TableType.GRUPLKUP: GenericCSVImporter,
TableType.EMPLOYEE: GenericCSVImporter,
TableType.SETUP: GenericCSVImporter,
TableType.FILETYPE: GenericCSVImporter,
TableType.TRNSTYPE: GenericCSVImporter,
TableType.TRNSACTN: GenericCSVImporter,
TableType.TRNSLKUP: GenericCSVImporter,
TableType.RVARLKUP: GenericCSVImporter,
TableType.FVARLKUP: GenericCSVImporter,
TableType.FILENOTS: GenericCSVImporter,
TableType.DEPOSITS: GenericCSVImporter,
TableType.PAYMENTS: GenericCSVImporter,
TableType.PENSIONS: GenericCSVImporter,
TableType.PLANINFO: GenericCSVImporter,
TableType.NUMBERAL: GenericCSVImporter,
TableType.INX_LKUP: GenericCSVImporter,
TableType.FORM_LST: GenericCSVImporter,
TableType.FORM_INX: GenericCSVImporter,
TableType.LIFETABL: GenericCSVImporter,
TableType.MARRIAGE: GenericCSVImporter,
TableType.DEATH: GenericCSVImporter,
TableType.SEPARATE: GenericCSVImporter,
TableType.SCHEDULE: GenericCSVImporter,
}
def get_supported_tables(self) -> List[str]:
@@ -47,7 +99,12 @@ class ImportService:
try:
table_type = TableType(table_name.lower())
importer_class = self._importers[table_type]
temp_importer = importer_class(self.db_session, "temp_schema_check")
# Handle generic importer differently
if importer_class == GenericCSVImporter:
temp_importer = importer_class(self.db_session, table_name, "temp_schema_check")
else:
temp_importer = importer_class(self.db_session, "temp_schema_check")
return {
"table_name": temp_importer.table_name,
@@ -77,7 +134,12 @@ class ImportService:
# Get appropriate importer
importer_class = self._importers[table_type]
importer = importer_class(self.db_session, import_id)
# Handle generic importer differently
if importer_class == GenericCSVImporter:
importer = importer_class(self.db_session, table_name, import_id)
else:
importer = importer_class(self.db_session, import_id)
logger.info(f"Starting CSV import for table: {table_name} (import_id: {importer.import_id})")
@@ -119,11 +181,39 @@ class ImportService:
# Recommended import order (dependencies first)
import_order = [
# Core tables with dependencies
TableType.ROLODEX, # No dependencies
TableType.PHONE, # Depends on ROLODEX
TableType.FILES, # Depends on ROLODEX
TableType.LEDGER, # Depends on FILES
TableType.QDROS # Depends on FILES
TableType.QDROS, # Depends on FILES
# Lookup and reference tables (no dependencies)
TableType.GRUPLKUP,
TableType.EMPLOYEE,
TableType.SETUP,
TableType.FILETYPE,
TableType.TRNSTYPE,
TableType.TRNSACTN,
TableType.TRNSLKUP,
TableType.RVARLKUP,
TableType.FVARLKUP,
TableType.FILENOTS,
TableType.PLANINFO,
# Financial tables
TableType.DEPOSITS,
TableType.PAYMENTS,
TableType.PENSIONS,
# Form tables
TableType.NUMBERAL,
TableType.INX_LKUP,
TableType.FORM_LST,
TableType.FORM_INX,
TableType.LIFETABL,
# Pension tables
TableType.MARRIAGE,
TableType.DEATH,
TableType.SEPARATE,
TableType.SCHEDULE
]
# Group imports by table type
@@ -134,11 +224,15 @@ class ImportService:
imports_by_table[table_name] = []
imports_by_table[table_name].append(import_data)
# Track processed tables
processed_tables = set()
# Process in dependency order
for table_type in import_order:
table_name = table_type.value
if table_name in imports_by_table:
table_imports = imports_by_table[table_name]
processed_tables.add(table_name)
for import_data in table_imports:
result = self.import_csv(
@@ -160,6 +254,35 @@ class ImportService:
if not result.success and table_type in [TableType.ROLODEX, TableType.FILES]:
logger.error(f"Critical import failed for {table_name}, stopping batch")
break
# Small delay to reduce database lock contention
import time
time.sleep(0.1)
# Process any remaining tables not in the explicit order
for table_name, table_imports in imports_by_table.items():
if table_name not in processed_tables:
logger.info(f"Processing table {table_name} (not in explicit order)")
for import_data in table_imports:
result = self.import_csv(
table_name,
import_data["csv_content"],
import_data.get("encoding", "utf-8")
)
# Use a unique key if multiple imports for same table
key = table_name
counter = 1
while key in results:
counter += 1
key = f"{table_name}_{counter}"
results[key] = result
# Small delay to reduce database lock contention
import time
time.sleep(0.1)
return results
@@ -174,7 +297,12 @@ class ImportService:
# Get appropriate importer
importer_class = self._importers[table_type]
importer = importer_class(self.db_session, "validation_check")
# Handle generic importer differently
if importer_class == GenericCSVImporter:
importer = importer_class(self.db_session, table_name, "validation_check")
else:
importer = importer_class(self.db_session, "validation_check")
# Parse headers only
import csv

View File

@@ -10,12 +10,13 @@ from fastapi.middleware.cors import CORSMiddleware
from app.config import settings
from app.database.base import engine
from sqlalchemy import text
from sqlalchemy.orm import sessionmaker
from app.database.fts import ensure_rolodex_fts, ensure_files_fts, ensure_ledger_fts, ensure_qdros_fts
from app.database.indexes import ensure_secondary_indexes
from app.database.schema_updates import ensure_schema_updates
from app.models import BaseModel
from app.models.user import User
from app.auth.security import get_admin_user
from app.auth.security import get_admin_user, get_password_hash, verify_password
from app.core.logging import setup_logging, get_logger
from app.middleware.logging import LoggingMiddleware
from app.middleware.errors import register_exception_handlers
@@ -54,6 +55,48 @@ ensure_secondary_indexes(engine)
logger.info("Ensuring schema updates (new columns)")
ensure_schema_updates(engine)
def ensure_admin_user():
"""Ensure admin user exists and password matches environment variable"""
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
db = SessionLocal()
try:
# Check if admin user exists
admin_user = db.query(User).filter(User.username == settings.admin_username).first()
if not admin_user:
# Create admin user if it doesn't exist
logger.info(f"Creating admin user '{settings.admin_username}'")
admin_user = User(
username=settings.admin_username,
email=f"{settings.admin_username}@delphicg.local",
full_name="System Administrator",
hashed_password=get_password_hash(settings.admin_password),
is_active=True,
is_admin=True
)
db.add(admin_user)
db.commit()
logger.info(f"Admin user '{settings.admin_username}' created successfully")
else:
# Check if password needs to be updated
if not verify_password(settings.admin_password, admin_user.hashed_password):
logger.info(f"Updating admin password for user '{settings.admin_username}'")
admin_user.hashed_password = get_password_hash(settings.admin_password)
db.commit()
logger.info("Admin password updated successfully")
else:
logger.debug(f"Admin user '{settings.admin_username}' password is current")
except Exception as e:
logger.error(f"Error ensuring admin user: {e}")
db.rollback()
raise
finally:
db.close()
# Initialize FastAPI app
logger.info("Initializing FastAPI application", version=settings.app_version, debug=settings.debug)
app = FastAPI(
@@ -67,6 +110,11 @@ app = FastAPI(
async def startup_event():
"""Initialize WebSocket pool and other startup tasks"""
from app.services.websocket_pool import initialize_websocket_pool
# Ensure admin user exists and password is synced with environment
logger.info("Ensuring admin user exists and password is current")
ensure_admin_user()
logger.info("Initializing WebSocket connection pool")
await initialize_websocket_pool(
cleanup_interval=60,