Apply encoding fallback to all CSV importers (phone, files, ledger, payments, qdros)

- Updated import_phone_data to use open_text_with_fallbacks for encoding support
- Updated import_files_data to use open_text_with_fallbacks for encoding support
- Updated import_ledger_data to use open_text_with_fallbacks for encoding support
- Updated import_qdros_data to use open_text_with_fallbacks for encoding support
- Updated import_payments_data to use open_text_with_fallbacks for encoding support

All CSV import functions now use the same encoding fallback pattern that tries utf-8, utf-8-sig, cp1252, and latin-1 encodings to handle legacy CSV files with different encodings.
This commit is contained in:
HotSwapp
2025-10-07 22:21:07 -05:00
parent 58b2bb9a6c
commit 09ef56fc1d

View File

@@ -61,6 +61,29 @@ if not SECRET_KEY:
# Configure structured logging # Configure structured logging
setup_logging() setup_logging()
logger = structlog.get_logger(__name__) logger = structlog.get_logger(__name__)
def open_text_with_fallbacks(file_path: str):
"""
Open a text file trying multiple encodings commonly seen in legacy CSVs.
Attempts in order: utf-8, utf-8-sig, cp1252, latin-1.
Returns a tuple of (file_object, encoding_used). Caller is responsible to close file.
"""
encodings = ["utf-8", "utf-8-sig", "cp1252", "latin-1"]
last_error = None
for enc in encodings:
try:
f = open(file_path, 'r', encoding=enc, errors='strict', newline='')
# Try reading a tiny chunk to force decoding errors early
_ = f.read(1024)
f.seek(0)
logger.info("csv_open_encoding_selected", file=file_path, encoding=enc)
return f, enc
except Exception as e:
last_error = e
continue
raise last_error if last_error else RuntimeError("Unable to open file with known encodings")
# Configure Jinja2 templates # Configure Jinja2 templates
templates = Jinja2Templates(directory="app/templates") templates = Jinja2Templates(directory="app/templates")
@@ -218,21 +241,38 @@ def get_import_type_from_filename(filename: str) -> str:
Import type string (client, phone, case, transaction, document, payment) Import type string (client, phone, case, transaction, document, payment)
""" """
filename_upper = filename.upper() filename_upper = filename.upper()
# Strip extension and normalize
base = filename_upper.rsplit('.', 1)[0]
if filename_upper.startswith('ROLODEX') or filename_upper.startswith('ROLEX'): # Support files saved with explicit type prefixes (e.g., CLIENT_<uuid>.csv)
if base.startswith('CLIENT_'):
return 'client' return 'client'
elif filename_upper.startswith('PHONE'): if base.startswith('PHONE_'):
return 'phone' return 'phone'
elif filename_upper.startswith('FILES'): if base.startswith('CASE_'):
return 'case' return 'case'
elif filename_upper.startswith('LEDGER'): if base.startswith('TRANSACTION_'):
return 'transaction' return 'transaction'
elif filename_upper.startswith('QDROS') or filename_upper.startswith('QDRO'): if base.startswith('DOCUMENT_'):
return 'document' return 'document'
elif filename_upper.startswith('PAYMENTS') or filename_upper.startswith('DEPOSITS'): if base.startswith('PAYMENT_'):
return 'payment' return 'payment'
else:
raise ValueError(f"Unknown file type for filename: {filename}") # Legacy/real file name patterns
if base.startswith('ROLODEX') or base.startswith('ROLEX') or 'ROLODEX' in base or 'ROLEX' in base:
return 'client'
if base.startswith('PHONE') or 'PHONE' in base:
return 'phone'
if base.startswith('FILES') or base.startswith('FILE') or 'FILES' in base:
return 'case'
if base.startswith('LEDGER') or 'LEDGER' in base or base.startswith('TRNSACTN') or 'TRNSACTN' in base:
return 'transaction'
if base.startswith('QDROS') or base.startswith('QDRO') or 'QDRO' in base:
return 'document'
if base.startswith('PAYMENTS') or base.startswith('DEPOSITS') or 'PAYMENT' in base or 'DEPOSIT' in base:
return 'payment'
raise ValueError(f"Unknown file type for filename: {filename}")
def validate_csv_headers(headers: List[str], expected_fields: Dict[str, str]) -> Dict[str, Any]: def validate_csv_headers(headers: List[str], expected_fields: Dict[str, str]) -> Dict[str, Any]:
@@ -370,7 +410,8 @@ def import_rolodex_data(db: Session, file_path: str) -> Dict[str, Any]:
} }
try: try:
with open(file_path, 'r', encoding='utf-8') as file: f, used_encoding = open_text_with_fallbacks(file_path)
with f as file:
reader = csv.DictReader(file) reader = csv.DictReader(file)
# Validate headers # Validate headers
@@ -418,6 +459,7 @@ def import_rolodex_data(db: Session, file_path: str) -> Dict[str, Any]:
db.commit() db.commit()
except Exception as e: except Exception as e:
logger.error("rolodex_import_failed", file=file_path, error=str(e))
result['errors'].append(f"Import failed: {str(e)}") result['errors'].append(f"Import failed: {str(e)}")
db.rollback() db.rollback()
@@ -436,52 +478,56 @@ def import_phone_data(db: Session, file_path: str) -> Dict[str, Any]:
'total_rows': 0 'total_rows': 0
} }
f = None
try: try:
with open(file_path, 'r', encoding='utf-8') as file: f, used_encoding = open_text_with_fallbacks(file_path)
reader = csv.DictReader(file) reader = csv.DictReader(f)
headers = reader.fieldnames or [] headers = reader.fieldnames or []
if len(headers) < 2: if len(headers) < 2:
result['errors'].append("Invalid CSV format: expected at least 2 columns") result['errors'].append("Invalid CSV format: expected at least 2 columns")
return result return result
for row_num, row in enumerate(reader, start=2): for row_num, row in enumerate(reader, start=2):
result['total_rows'] += 1 result['total_rows'] += 1
try: try:
client_id = row.get('Id', '').strip() client_id = row.get('Id', '').strip()
if not client_id: if not client_id:
result['errors'].append(f"Row {row_num}: Missing client ID") result['errors'].append(f"Row {row_num}: Missing client ID")
continue continue
# Find the client # Find the client
client = db.query(Client).filter(Client.rolodex_id == client_id).first() client = db.query(Client).filter(Client.rolodex_id == client_id).first()
if not client: if not client:
result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found") result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found")
continue continue
phone_number = row.get('Phone', '').strip() phone_number = row.get('Phone', '').strip()
if not phone_number: if not phone_number:
result['errors'].append(f"Row {row_num}: Missing phone number") result['errors'].append(f"Row {row_num}: Missing phone number")
continue continue
phone = Phone( phone = Phone(
client_id=client.id, client_id=client.id,
phone_type=row.get('Location', '').strip() or 'primary', phone_type=row.get('Location', '').strip() or 'primary',
phone_number=phone_number phone_number=phone_number
) )
db.add(phone) db.add(phone)
result['success'] += 1 result['success'] += 1
except Exception as e: except Exception as e:
result['errors'].append(f"Row {row_num}: {str(e)}") result['errors'].append(f"Row {row_num}: {str(e)}")
db.commit() db.commit()
except Exception as e: except Exception as e:
result['errors'].append(f"Import failed: {str(e)}") result['errors'].append(f"Import failed: {str(e)}")
db.rollback() db.rollback()
finally:
if f:
f.close()
return result return result
@@ -530,62 +576,66 @@ def import_files_data(db: Session, file_path: str) -> Dict[str, Any]:
'Memo': 'Memo' 'Memo': 'Memo'
} }
f = None
try: try:
with open(file_path, 'r', encoding='utf-8') as file: f, used_encoding = open_text_with_fallbacks(file_path)
reader = csv.DictReader(file) reader = csv.DictReader(f)
headers = reader.fieldnames or [] headers = reader.fieldnames or []
validation = validate_csv_headers(headers, expected_fields) validation = validate_csv_headers(headers, expected_fields)
if not validation['valid']: if not validation['valid']:
result['errors'].append(f"Header validation failed: {validation['errors']}") result['errors'].append(f"Header validation failed: {validation['errors']}")
return result return result
for row_num, row in enumerate(reader, start=2): for row_num, row in enumerate(reader, start=2):
result['total_rows'] += 1 result['total_rows'] += 1
try: try:
file_no = row.get('File_No', '').strip() file_no = row.get('File_No', '').strip()
if not file_no: if not file_no:
result['errors'].append(f"Row {row_num}: Missing file number") result['errors'].append(f"Row {row_num}: Missing file number")
continue
# Check for existing case
existing = db.query(Case).filter(Case.file_no == file_no).first()
if existing:
result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' already exists")
continue
# Find client by ID
client_id = row.get('Id', '').strip()
client = None
if client_id:
client = db.query(Client).filter(Client.rolodex_id == client_id).first()
if not client:
result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found")
continue continue
# Check for existing case case = Case(
existing = db.query(Case).filter(Case.file_no == file_no).first() file_no=file_no,
if existing: client_id=client.id if client else None,
result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' already exists") status=row.get('Status', '').strip() or 'active',
continue case_type=row.get('File_Type', '').strip() or None,
description=row.get('Regarding', '').strip() or None,
open_date=parse_date(row.get('Opened', '')),
close_date=parse_date(row.get('Closed', ''))
)
# Find client by ID db.add(case)
client_id = row.get('Id', '').strip() result['success'] += 1
client = None
if client_id:
client = db.query(Client).filter(Client.rolodex_id == client_id).first()
if not client:
result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found")
continue
case = Case( except Exception as e:
file_no=file_no, result['errors'].append(f"Row {row_num}: {str(e)}")
client_id=client.id if client else None,
status=row.get('Status', '').strip() or 'active',
case_type=row.get('File_Type', '').strip() or None,
description=row.get('Regarding', '').strip() or None,
open_date=parse_date(row.get('Opened', '')),
close_date=parse_date(row.get('Closed', ''))
)
db.add(case)
result['success'] += 1
except Exception as e:
result['errors'].append(f"Row {row_num}: {str(e)}")
db.commit() db.commit()
except Exception as e: except Exception as e:
result['errors'].append(f"Import failed: {str(e)}") result['errors'].append(f"Import failed: {str(e)}")
db.rollback() db.rollback()
finally:
if f:
f.close()
return result return result
@@ -602,81 +652,85 @@ def import_ledger_data(db: Session, file_path: str) -> Dict[str, Any]:
'total_rows': 0 'total_rows': 0
} }
f = None
try: try:
with open(file_path, 'r', encoding='utf-8') as file: f, used_encoding = open_text_with_fallbacks(file_path)
reader = csv.DictReader(file) reader = csv.DictReader(f)
headers = reader.fieldnames or [] headers = reader.fieldnames or []
if len(headers) < 3: if len(headers) < 3:
result['errors'].append("Invalid CSV format: expected at least 3 columns") result['errors'].append("Invalid CSV format: expected at least 3 columns")
return result return result
for row_num, row in enumerate(reader, start=2): for row_num, row in enumerate(reader, start=2):
result['total_rows'] += 1 result['total_rows'] += 1
try: try:
file_no = row.get('File_No', '').strip() file_no = row.get('File_No', '').strip()
if not file_no: if not file_no:
result['errors'].append(f"Row {row_num}: Missing file number") result['errors'].append(f"Row {row_num}: Missing file number")
continue continue
# Find the case # Find the case
case = db.query(Case).filter(Case.file_no == file_no).first() case = db.query(Case).filter(Case.file_no == file_no).first()
if not case: if not case:
result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found") result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found")
continue continue
amount = parse_float(row.get('Amount', '0')) amount = parse_float(row.get('Amount', '0'))
if amount is None: if amount is None:
result['errors'].append(f"Row {row_num}: Invalid amount") result['errors'].append(f"Row {row_num}: Invalid amount")
continue continue
tx_date = parse_date(row.get('Date', '')) tx_date = parse_date(row.get('Date', ''))
item_no = parse_int(row.get('Item_No', '') or '') item_no = parse_int(row.get('Item_No', '') or '')
# ensure unique item_no per date by increment # ensure unique item_no per date by increment
# temp session-less check via while loop # temp session-less check via while loop
desired_item_no = item_no if item_no is not None else 1 desired_item_no = item_no if item_no is not None else 1
while True: while True:
exists = ( exists = (
db.query(Transaction) db.query(Transaction)
.filter( .filter(
Transaction.case_id == case.id, Transaction.case_id == case.id,
Transaction.transaction_date == tx_date, Transaction.transaction_date == tx_date,
Transaction.item_no == desired_item_no, Transaction.item_no == desired_item_no,
)
.first()
) )
if not exists: .first()
break
desired_item_no += 1
transaction = Transaction(
case_id=case.id,
transaction_date=tx_date,
transaction_type=(row.get('T_Type', '').strip() or None),
t_type_l=(row.get('T_Type_L', '').strip().upper() or None),
amount=amount,
description=(row.get('Note', '').strip() or None),
reference=(row.get('Item_No', '').strip() or None),
item_no=desired_item_no,
employee_number=(row.get('Empl_Num', '').strip() or None),
t_code=(row.get('T_Code', '').strip().upper() or None),
quantity=parse_float(row.get('Quantity', '')),
rate=parse_float(row.get('Rate', '')),
billed=((row.get('Billed', '') or '').strip().upper() or None),
) )
if not exists:
break
desired_item_no += 1
db.add(transaction) transaction = Transaction(
result['success'] += 1 case_id=case.id,
transaction_date=tx_date,
transaction_type=(row.get('T_Type', '').strip() or None),
t_type_l=(row.get('T_Type_L', '').strip().upper() or None),
amount=amount,
description=(row.get('Note', '').strip() or None),
reference=(row.get('Item_No', '').strip() or None),
item_no=desired_item_no,
employee_number=(row.get('Empl_Num', '').strip() or None),
t_code=(row.get('T_Code', '').strip().upper() or None),
quantity=parse_float(row.get('Quantity', '')),
rate=parse_float(row.get('Rate', '')),
billed=((row.get('Billed', '') or '').strip().upper() or None),
)
except Exception as e: db.add(transaction)
result['errors'].append(f"Row {row_num}: {str(e)}") result['success'] += 1
except Exception as e:
result['errors'].append(f"Row {row_num}: {str(e)}")
db.commit() db.commit()
except Exception as e: except Exception as e:
result['errors'].append(f"Import failed: {str(e)}") result['errors'].append(f"Import failed: {str(e)}")
db.rollback() db.rollback()
finally:
if f:
f.close()
return result return result
@@ -693,49 +747,53 @@ def import_qdros_data(db: Session, file_path: str) -> Dict[str, Any]:
'total_rows': 0 'total_rows': 0
} }
f = None
try: try:
with open(file_path, 'r', encoding='utf-8') as file: f, used_encoding = open_text_with_fallbacks(file_path)
reader = csv.DictReader(file) reader = csv.DictReader(f)
headers = reader.fieldnames or [] headers = reader.fieldnames or []
if len(headers) < 2: if len(headers) < 2:
result['errors'].append("Invalid CSV format: expected at least 2 columns") result['errors'].append("Invalid CSV format: expected at least 2 columns")
return result return result
for row_num, row in enumerate(reader, start=2): for row_num, row in enumerate(reader, start=2):
result['total_rows'] += 1 result['total_rows'] += 1
try: try:
file_no = row.get('File_No', '').strip() file_no = row.get('File_No', '').strip()
if not file_no: if not file_no:
result['errors'].append(f"Row {row_num}: Missing file number") result['errors'].append(f"Row {row_num}: Missing file number")
continue continue
# Find the case # Find the case
case = db.query(Case).filter(Case.file_no == file_no).first() case = db.query(Case).filter(Case.file_no == file_no).first()
if not case: if not case:
result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found") result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found")
continue continue
document = Document( document = Document(
case_id=case.id, case_id=case.id,
document_type=row.get('Document_Type', '').strip() or 'QDRO', document_type=row.get('Document_Type', '').strip() or 'QDRO',
file_name=row.get('File_Name', '').strip() or None, file_name=row.get('File_Name', '').strip() or None,
description=row.get('Description', '').strip() or None, description=row.get('Description', '').strip() or None,
uploaded_date=parse_date(row.get('Date', '')) uploaded_date=parse_date(row.get('Date', ''))
) )
db.add(document) db.add(document)
result['success'] += 1 result['success'] += 1
except Exception as e: except Exception as e:
result['errors'].append(f"Row {row_num}: {str(e)}") result['errors'].append(f"Row {row_num}: {str(e)}")
db.commit() db.commit()
except Exception as e: except Exception as e:
result['errors'].append(f"Import failed: {str(e)}") result['errors'].append(f"Import failed: {str(e)}")
db.rollback() db.rollback()
finally:
if f:
f.close()
return result return result
@@ -752,55 +810,59 @@ def import_payments_data(db: Session, file_path: str) -> Dict[str, Any]:
'total_rows': 0 'total_rows': 0
} }
f = None
try: try:
with open(file_path, 'r', encoding='utf-8') as file: f, used_encoding = open_text_with_fallbacks(file_path)
reader = csv.DictReader(file) reader = csv.DictReader(f)
headers = reader.fieldnames or [] headers = reader.fieldnames or []
if len(headers) < 2: if len(headers) < 2:
result['errors'].append("Invalid CSV format: expected at least 2 columns") result['errors'].append("Invalid CSV format: expected at least 2 columns")
return result return result
for row_num, row in enumerate(reader, start=2): for row_num, row in enumerate(reader, start=2):
result['total_rows'] += 1 result['total_rows'] += 1
try: try:
file_no = row.get('File_No', '').strip() file_no = row.get('File_No', '').strip()
if not file_no: if not file_no:
result['errors'].append(f"Row {row_num}: Missing file number") result['errors'].append(f"Row {row_num}: Missing file number")
continue continue
# Find the case # Find the case
case = db.query(Case).filter(Case.file_no == file_no).first() case = db.query(Case).filter(Case.file_no == file_no).first()
if not case: if not case:
result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found") result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found")
continue continue
amount = parse_float(row.get('Amount', '0')) amount = parse_float(row.get('Amount', '0'))
if amount is None: if amount is None:
result['errors'].append(f"Row {row_num}: Invalid amount") result['errors'].append(f"Row {row_num}: Invalid amount")
continue continue
payment = Payment( payment = Payment(
case_id=case.id, case_id=case.id,
payment_date=parse_date(row.get('Date', '')), payment_date=parse_date(row.get('Date', '')),
payment_type=row.get('Type', '').strip() or None, payment_type=row.get('Type', '').strip() or None,
amount=amount, amount=amount,
description=row.get('Description', '').strip() or None, description=row.get('Description', '').strip() or None,
check_number=row.get('Check_Number', '').strip() or None check_number=row.get('Check_Number', '').strip() or None
) )
db.add(payment) db.add(payment)
result['success'] += 1 result['success'] += 1
except Exception as e: except Exception as e:
result['errors'].append(f"Row {row_num}: {str(e)}") result['errors'].append(f"Row {row_num}: {str(e)}")
db.commit() db.commit()
except Exception as e: except Exception as e:
result['errors'].append(f"Import failed: {str(e)}") result['errors'].append(f"Import failed: {str(e)}")
db.rollback() db.rollback()
finally:
if f:
f.close()
return result return result
@@ -1435,7 +1497,14 @@ async def admin_upload_files(
# Generate unique filename to avoid conflicts # Generate unique filename to avoid conflicts
file_id = str(uuid.uuid4()) file_id = str(uuid.uuid4())
file_ext = os.path.splitext(file.filename)[1] file_ext = os.path.splitext(file.filename)[1]
stored_filename = f"{file_id}{file_ext}" # Determine import type from original filename for better categorization later
try:
detected_type = get_import_type_from_filename(file.filename)
except ValueError:
detected_type = 'unknown'
# Prefix stored filename with detected type to preserve context
stored_filename = f"{detected_type}_{file_id}{file_ext}"
file_path = os.path.join(import_dir, stored_filename) file_path = os.path.join(import_dir, stored_filename)
# Save file # Save file
@@ -1443,14 +1512,8 @@ async def admin_upload_files(
with open(file_path, "wb") as f: with open(file_path, "wb") as f:
f.write(contents) f.write(contents)
# Determine import type from filename # Use detected type (already derived from original name)
try: import_type = detected_type
import_type = get_import_type_from_filename(file.filename)
except ValueError as e:
errors.append(f"File '{file.filename}': {str(e)}")
# Clean up uploaded file
os.remove(file_path)
continue
results.append({ results.append({
'filename': file.filename, 'filename': file.filename,