From 09ef56fc1ddd61e15406bcc2ec0364c6a94f1190 Mon Sep 17 00:00:00 2001 From: HotSwapp <47397945+HotSwapp@users.noreply.github.com> Date: Tue, 7 Oct 2025 22:21:07 -0500 Subject: [PATCH] Apply encoding fallback to all CSV importers (phone, files, ledger, payments, qdros) - Updated import_phone_data to use open_text_with_fallbacks for encoding support - Updated import_files_data to use open_text_with_fallbacks for encoding support - Updated import_ledger_data to use open_text_with_fallbacks for encoding support - Updated import_qdros_data to use open_text_with_fallbacks for encoding support - Updated import_payments_data to use open_text_with_fallbacks for encoding support All CSV import functions now use the same encoding fallback pattern that tries utf-8, utf-8-sig, cp1252, and latin-1 encodings to handle legacy CSV files with different encodings. --- app/main.py | 485 +++++++++++++++++++++++++++++----------------------- 1 file changed, 274 insertions(+), 211 deletions(-) diff --git a/app/main.py b/app/main.py index af6b59a..9866ac0 100644 --- a/app/main.py +++ b/app/main.py @@ -61,6 +61,29 @@ if not SECRET_KEY: # Configure structured logging setup_logging() logger = structlog.get_logger(__name__) +def open_text_with_fallbacks(file_path: str): + """ + Open a text file trying multiple encodings commonly seen in legacy CSVs. + + Attempts in order: utf-8, utf-8-sig, cp1252, latin-1. + + Returns a tuple of (file_object, encoding_used). Caller is responsible to close file. + """ + encodings = ["utf-8", "utf-8-sig", "cp1252", "latin-1"] + last_error = None + for enc in encodings: + try: + f = open(file_path, 'r', encoding=enc, errors='strict', newline='') + # Try reading a tiny chunk to force decoding errors early + _ = f.read(1024) + f.seek(0) + logger.info("csv_open_encoding_selected", file=file_path, encoding=enc) + return f, enc + except Exception as e: + last_error = e + continue + raise last_error if last_error else RuntimeError("Unable to open file with known encodings") + # Configure Jinja2 templates templates = Jinja2Templates(directory="app/templates") @@ -218,21 +241,38 @@ def get_import_type_from_filename(filename: str) -> str: Import type string (client, phone, case, transaction, document, payment) """ filename_upper = filename.upper() + # Strip extension and normalize + base = filename_upper.rsplit('.', 1)[0] - if filename_upper.startswith('ROLODEX') or filename_upper.startswith('ROLEX'): + # Support files saved with explicit type prefixes (e.g., CLIENT_.csv) + if base.startswith('CLIENT_'): return 'client' - elif filename_upper.startswith('PHONE'): + if base.startswith('PHONE_'): return 'phone' - elif filename_upper.startswith('FILES'): + if base.startswith('CASE_'): return 'case' - elif filename_upper.startswith('LEDGER'): + if base.startswith('TRANSACTION_'): return 'transaction' - elif filename_upper.startswith('QDROS') or filename_upper.startswith('QDRO'): + if base.startswith('DOCUMENT_'): return 'document' - elif filename_upper.startswith('PAYMENTS') or filename_upper.startswith('DEPOSITS'): + if base.startswith('PAYMENT_'): return 'payment' - else: - raise ValueError(f"Unknown file type for filename: {filename}") + + # Legacy/real file name patterns + if base.startswith('ROLODEX') or base.startswith('ROLEX') or 'ROLODEX' in base or 'ROLEX' in base: + return 'client' + if base.startswith('PHONE') or 'PHONE' in base: + return 'phone' + if base.startswith('FILES') or base.startswith('FILE') or 'FILES' in base: + return 'case' + if base.startswith('LEDGER') or 'LEDGER' in base or base.startswith('TRNSACTN') or 'TRNSACTN' in base: + return 'transaction' + if base.startswith('QDROS') or base.startswith('QDRO') or 'QDRO' in base: + return 'document' + if base.startswith('PAYMENTS') or base.startswith('DEPOSITS') or 'PAYMENT' in base or 'DEPOSIT' in base: + return 'payment' + + raise ValueError(f"Unknown file type for filename: {filename}") def validate_csv_headers(headers: List[str], expected_fields: Dict[str, str]) -> Dict[str, Any]: @@ -370,7 +410,8 @@ def import_rolodex_data(db: Session, file_path: str) -> Dict[str, Any]: } try: - with open(file_path, 'r', encoding='utf-8') as file: + f, used_encoding = open_text_with_fallbacks(file_path) + with f as file: reader = csv.DictReader(file) # Validate headers @@ -418,6 +459,7 @@ def import_rolodex_data(db: Session, file_path: str) -> Dict[str, Any]: db.commit() except Exception as e: + logger.error("rolodex_import_failed", file=file_path, error=str(e)) result['errors'].append(f"Import failed: {str(e)}") db.rollback() @@ -436,52 +478,56 @@ def import_phone_data(db: Session, file_path: str) -> Dict[str, Any]: 'total_rows': 0 } + f = None try: - with open(file_path, 'r', encoding='utf-8') as file: - reader = csv.DictReader(file) + f, used_encoding = open_text_with_fallbacks(file_path) + reader = csv.DictReader(f) - headers = reader.fieldnames or [] - if len(headers) < 2: - result['errors'].append("Invalid CSV format: expected at least 2 columns") - return result + headers = reader.fieldnames or [] + if len(headers) < 2: + result['errors'].append("Invalid CSV format: expected at least 2 columns") + return result - for row_num, row in enumerate(reader, start=2): - result['total_rows'] += 1 + for row_num, row in enumerate(reader, start=2): + result['total_rows'] += 1 - try: - client_id = row.get('Id', '').strip() - if not client_id: - result['errors'].append(f"Row {row_num}: Missing client ID") - continue + try: + client_id = row.get('Id', '').strip() + if not client_id: + result['errors'].append(f"Row {row_num}: Missing client ID") + continue - # Find the client - client = db.query(Client).filter(Client.rolodex_id == client_id).first() - if not client: - result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found") - continue + # Find the client + client = db.query(Client).filter(Client.rolodex_id == client_id).first() + if not client: + result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found") + continue - phone_number = row.get('Phone', '').strip() - if not phone_number: - result['errors'].append(f"Row {row_num}: Missing phone number") - continue + phone_number = row.get('Phone', '').strip() + if not phone_number: + result['errors'].append(f"Row {row_num}: Missing phone number") + continue - phone = Phone( - client_id=client.id, - phone_type=row.get('Location', '').strip() or 'primary', - phone_number=phone_number - ) + phone = Phone( + client_id=client.id, + phone_type=row.get('Location', '').strip() or 'primary', + phone_number=phone_number + ) - db.add(phone) - result['success'] += 1 + db.add(phone) + result['success'] += 1 - except Exception as e: - result['errors'].append(f"Row {row_num}: {str(e)}") + except Exception as e: + result['errors'].append(f"Row {row_num}: {str(e)}") db.commit() except Exception as e: result['errors'].append(f"Import failed: {str(e)}") db.rollback() + finally: + if f: + f.close() return result @@ -530,62 +576,66 @@ def import_files_data(db: Session, file_path: str) -> Dict[str, Any]: 'Memo': 'Memo' } + f = None try: - with open(file_path, 'r', encoding='utf-8') as file: - reader = csv.DictReader(file) + f, used_encoding = open_text_with_fallbacks(file_path) + reader = csv.DictReader(f) - headers = reader.fieldnames or [] - validation = validate_csv_headers(headers, expected_fields) + headers = reader.fieldnames or [] + validation = validate_csv_headers(headers, expected_fields) - if not validation['valid']: - result['errors'].append(f"Header validation failed: {validation['errors']}") - return result + if not validation['valid']: + result['errors'].append(f"Header validation failed: {validation['errors']}") + return result - for row_num, row in enumerate(reader, start=2): - result['total_rows'] += 1 + for row_num, row in enumerate(reader, start=2): + result['total_rows'] += 1 - try: - file_no = row.get('File_No', '').strip() - if not file_no: - result['errors'].append(f"Row {row_num}: Missing file number") + try: + file_no = row.get('File_No', '').strip() + if not file_no: + result['errors'].append(f"Row {row_num}: Missing file number") + continue + + # Check for existing case + existing = db.query(Case).filter(Case.file_no == file_no).first() + if existing: + result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' already exists") + continue + + # Find client by ID + client_id = row.get('Id', '').strip() + client = None + if client_id: + client = db.query(Client).filter(Client.rolodex_id == client_id).first() + if not client: + result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found") continue - # Check for existing case - existing = db.query(Case).filter(Case.file_no == file_no).first() - if existing: - result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' already exists") - continue + case = Case( + file_no=file_no, + client_id=client.id if client else None, + status=row.get('Status', '').strip() or 'active', + case_type=row.get('File_Type', '').strip() or None, + description=row.get('Regarding', '').strip() or None, + open_date=parse_date(row.get('Opened', '')), + close_date=parse_date(row.get('Closed', '')) + ) - # Find client by ID - client_id = row.get('Id', '').strip() - client = None - if client_id: - client = db.query(Client).filter(Client.rolodex_id == client_id).first() - if not client: - result['errors'].append(f"Row {row_num}: Client with ID '{client_id}' not found") - continue + db.add(case) + result['success'] += 1 - case = Case( - file_no=file_no, - client_id=client.id if client else None, - status=row.get('Status', '').strip() or 'active', - case_type=row.get('File_Type', '').strip() or None, - description=row.get('Regarding', '').strip() or None, - open_date=parse_date(row.get('Opened', '')), - close_date=parse_date(row.get('Closed', '')) - ) - - db.add(case) - result['success'] += 1 - - except Exception as e: - result['errors'].append(f"Row {row_num}: {str(e)}") + except Exception as e: + result['errors'].append(f"Row {row_num}: {str(e)}") db.commit() except Exception as e: result['errors'].append(f"Import failed: {str(e)}") db.rollback() + finally: + if f: + f.close() return result @@ -602,81 +652,85 @@ def import_ledger_data(db: Session, file_path: str) -> Dict[str, Any]: 'total_rows': 0 } + f = None try: - with open(file_path, 'r', encoding='utf-8') as file: - reader = csv.DictReader(file) + f, used_encoding = open_text_with_fallbacks(file_path) + reader = csv.DictReader(f) - headers = reader.fieldnames or [] - if len(headers) < 3: - result['errors'].append("Invalid CSV format: expected at least 3 columns") - return result + headers = reader.fieldnames or [] + if len(headers) < 3: + result['errors'].append("Invalid CSV format: expected at least 3 columns") + return result - for row_num, row in enumerate(reader, start=2): - result['total_rows'] += 1 + for row_num, row in enumerate(reader, start=2): + result['total_rows'] += 1 - try: - file_no = row.get('File_No', '').strip() - if not file_no: - result['errors'].append(f"Row {row_num}: Missing file number") - continue + try: + file_no = row.get('File_No', '').strip() + if not file_no: + result['errors'].append(f"Row {row_num}: Missing file number") + continue - # Find the case - case = db.query(Case).filter(Case.file_no == file_no).first() - if not case: - result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found") - continue + # Find the case + case = db.query(Case).filter(Case.file_no == file_no).first() + if not case: + result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found") + continue - amount = parse_float(row.get('Amount', '0')) - if amount is None: - result['errors'].append(f"Row {row_num}: Invalid amount") - continue + amount = parse_float(row.get('Amount', '0')) + if amount is None: + result['errors'].append(f"Row {row_num}: Invalid amount") + continue - tx_date = parse_date(row.get('Date', '')) - item_no = parse_int(row.get('Item_No', '') or '') - # ensure unique item_no per date by increment - # temp session-less check via while loop - desired_item_no = item_no if item_no is not None else 1 - while True: - exists = ( - db.query(Transaction) - .filter( - Transaction.case_id == case.id, - Transaction.transaction_date == tx_date, - Transaction.item_no == desired_item_no, - ) - .first() + tx_date = parse_date(row.get('Date', '')) + item_no = parse_int(row.get('Item_No', '') or '') + # ensure unique item_no per date by increment + # temp session-less check via while loop + desired_item_no = item_no if item_no is not None else 1 + while True: + exists = ( + db.query(Transaction) + .filter( + Transaction.case_id == case.id, + Transaction.transaction_date == tx_date, + Transaction.item_no == desired_item_no, ) - if not exists: - break - desired_item_no += 1 - - transaction = Transaction( - case_id=case.id, - transaction_date=tx_date, - transaction_type=(row.get('T_Type', '').strip() or None), - t_type_l=(row.get('T_Type_L', '').strip().upper() or None), - amount=amount, - description=(row.get('Note', '').strip() or None), - reference=(row.get('Item_No', '').strip() or None), - item_no=desired_item_no, - employee_number=(row.get('Empl_Num', '').strip() or None), - t_code=(row.get('T_Code', '').strip().upper() or None), - quantity=parse_float(row.get('Quantity', '')), - rate=parse_float(row.get('Rate', '')), - billed=((row.get('Billed', '') or '').strip().upper() or None), + .first() ) + if not exists: + break + desired_item_no += 1 - db.add(transaction) - result['success'] += 1 + transaction = Transaction( + case_id=case.id, + transaction_date=tx_date, + transaction_type=(row.get('T_Type', '').strip() or None), + t_type_l=(row.get('T_Type_L', '').strip().upper() or None), + amount=amount, + description=(row.get('Note', '').strip() or None), + reference=(row.get('Item_No', '').strip() or None), + item_no=desired_item_no, + employee_number=(row.get('Empl_Num', '').strip() or None), + t_code=(row.get('T_Code', '').strip().upper() or None), + quantity=parse_float(row.get('Quantity', '')), + rate=parse_float(row.get('Rate', '')), + billed=((row.get('Billed', '') or '').strip().upper() or None), + ) - except Exception as e: - result['errors'].append(f"Row {row_num}: {str(e)}") + db.add(transaction) + result['success'] += 1 + + except Exception as e: + result['errors'].append(f"Row {row_num}: {str(e)}") db.commit() except Exception as e: result['errors'].append(f"Import failed: {str(e)}") db.rollback() + finally: + if f: + f.close() return result @@ -693,49 +747,53 @@ def import_qdros_data(db: Session, file_path: str) -> Dict[str, Any]: 'total_rows': 0 } + f = None try: - with open(file_path, 'r', encoding='utf-8') as file: - reader = csv.DictReader(file) + f, used_encoding = open_text_with_fallbacks(file_path) + reader = csv.DictReader(f) - headers = reader.fieldnames or [] - if len(headers) < 2: - result['errors'].append("Invalid CSV format: expected at least 2 columns") - return result + headers = reader.fieldnames or [] + if len(headers) < 2: + result['errors'].append("Invalid CSV format: expected at least 2 columns") + return result - for row_num, row in enumerate(reader, start=2): - result['total_rows'] += 1 + for row_num, row in enumerate(reader, start=2): + result['total_rows'] += 1 - try: - file_no = row.get('File_No', '').strip() - if not file_no: - result['errors'].append(f"Row {row_num}: Missing file number") - continue + try: + file_no = row.get('File_No', '').strip() + if not file_no: + result['errors'].append(f"Row {row_num}: Missing file number") + continue - # Find the case - case = db.query(Case).filter(Case.file_no == file_no).first() - if not case: - result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found") - continue + # Find the case + case = db.query(Case).filter(Case.file_no == file_no).first() + if not case: + result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found") + continue - document = Document( - case_id=case.id, - document_type=row.get('Document_Type', '').strip() or 'QDRO', - file_name=row.get('File_Name', '').strip() or None, - description=row.get('Description', '').strip() or None, - uploaded_date=parse_date(row.get('Date', '')) - ) + document = Document( + case_id=case.id, + document_type=row.get('Document_Type', '').strip() or 'QDRO', + file_name=row.get('File_Name', '').strip() or None, + description=row.get('Description', '').strip() or None, + uploaded_date=parse_date(row.get('Date', '')) + ) - db.add(document) - result['success'] += 1 + db.add(document) + result['success'] += 1 - except Exception as e: - result['errors'].append(f"Row {row_num}: {str(e)}") + except Exception as e: + result['errors'].append(f"Row {row_num}: {str(e)}") db.commit() except Exception as e: result['errors'].append(f"Import failed: {str(e)}") db.rollback() + finally: + if f: + f.close() return result @@ -752,55 +810,59 @@ def import_payments_data(db: Session, file_path: str) -> Dict[str, Any]: 'total_rows': 0 } + f = None try: - with open(file_path, 'r', encoding='utf-8') as file: - reader = csv.DictReader(file) + f, used_encoding = open_text_with_fallbacks(file_path) + reader = csv.DictReader(f) - headers = reader.fieldnames or [] - if len(headers) < 2: - result['errors'].append("Invalid CSV format: expected at least 2 columns") - return result + headers = reader.fieldnames or [] + if len(headers) < 2: + result['errors'].append("Invalid CSV format: expected at least 2 columns") + return result - for row_num, row in enumerate(reader, start=2): - result['total_rows'] += 1 + for row_num, row in enumerate(reader, start=2): + result['total_rows'] += 1 - try: - file_no = row.get('File_No', '').strip() - if not file_no: - result['errors'].append(f"Row {row_num}: Missing file number") - continue + try: + file_no = row.get('File_No', '').strip() + if not file_no: + result['errors'].append(f"Row {row_num}: Missing file number") + continue - # Find the case - case = db.query(Case).filter(Case.file_no == file_no).first() - if not case: - result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found") - continue + # Find the case + case = db.query(Case).filter(Case.file_no == file_no).first() + if not case: + result['errors'].append(f"Row {row_num}: Case with file number '{file_no}' not found") + continue - amount = parse_float(row.get('Amount', '0')) - if amount is None: - result['errors'].append(f"Row {row_num}: Invalid amount") - continue + amount = parse_float(row.get('Amount', '0')) + if amount is None: + result['errors'].append(f"Row {row_num}: Invalid amount") + continue - payment = Payment( - case_id=case.id, - payment_date=parse_date(row.get('Date', '')), - payment_type=row.get('Type', '').strip() or None, - amount=amount, - description=row.get('Description', '').strip() or None, - check_number=row.get('Check_Number', '').strip() or None - ) + payment = Payment( + case_id=case.id, + payment_date=parse_date(row.get('Date', '')), + payment_type=row.get('Type', '').strip() or None, + amount=amount, + description=row.get('Description', '').strip() or None, + check_number=row.get('Check_Number', '').strip() or None + ) - db.add(payment) - result['success'] += 1 + db.add(payment) + result['success'] += 1 - except Exception as e: - result['errors'].append(f"Row {row_num}: {str(e)}") + except Exception as e: + result['errors'].append(f"Row {row_num}: {str(e)}") db.commit() except Exception as e: result['errors'].append(f"Import failed: {str(e)}") db.rollback() + finally: + if f: + f.close() return result @@ -1435,7 +1497,14 @@ async def admin_upload_files( # Generate unique filename to avoid conflicts file_id = str(uuid.uuid4()) file_ext = os.path.splitext(file.filename)[1] - stored_filename = f"{file_id}{file_ext}" + # Determine import type from original filename for better categorization later + try: + detected_type = get_import_type_from_filename(file.filename) + except ValueError: + detected_type = 'unknown' + + # Prefix stored filename with detected type to preserve context + stored_filename = f"{detected_type}_{file_id}{file_ext}" file_path = os.path.join(import_dir, stored_filename) # Save file @@ -1443,14 +1512,8 @@ async def admin_upload_files( with open(file_path, "wb") as f: f.write(contents) - # Determine import type from filename - try: - import_type = get_import_type_from_filename(file.filename) - except ValueError as e: - errors.append(f"File '{file.filename}': {str(e)}") - # Clean up uploaded file - os.remove(file_path) - continue + # Use detected type (already derived from original name) + import_type = detected_type results.append({ 'filename': file.filename,