fix: Handle duplicate IDs in rolodex import gracefully

- Added duplicate tracking within import session (seen_in_import set)
- Skip records that already exist in database
- Added fallback to row-by-row insert when bulk insert fails
- Track skipped records in result
- Prevents cascade errors after UNIQUE constraint violation
- Gracefully handles legacy data with duplicate IDs
This commit is contained in:
HotSwapp
2025-10-12 21:07:52 -05:00
parent 789eb2c134
commit c3e741b7ad
3 changed files with 49 additions and 8 deletions

View File

@@ -591,13 +591,16 @@ def import_rvarlkup(db: Session, file_path: str) -> Dict[str, Any]:
def import_rolodex(db: Session, file_path: str) -> Dict[str, Any]:
"""Import ROLODEX.csv → Rolodex model."""
result = {'success': 0, 'errors': [], 'total_rows': 0}
result = {'success': 0, 'errors': [], 'total_rows': 0, 'skipped': 0}
try:
f, encoding = open_text_with_fallbacks(file_path)
reader = csv.DictReader(f)
# Track IDs we've seen in this import to handle duplicates
seen_in_import = set()
batch = []
for row_num, row in enumerate(reader, start=2):
result['total_rows'] += 1
@@ -606,6 +609,17 @@ def import_rolodex(db: Session, file_path: str) -> Dict[str, Any]:
if not rolodex_id:
continue
# Skip if we've already processed this ID in current import
if rolodex_id in seen_in_import:
result['skipped'] += 1
continue
# Skip if it already exists in database
if db.query(Rolodex).filter(Rolodex.id == rolodex_id).first():
result['skipped'] += 1
seen_in_import.add(rolodex_id)
continue
record = Rolodex(
id=rolodex_id,
prefix=clean_string(row.get('Prefix')),
@@ -629,20 +643,47 @@ def import_rolodex(db: Session, file_path: str) -> Dict[str, Any]:
memo=clean_string(row.get('Memo'))
)
batch.append(record)
seen_in_import.add(rolodex_id)
if len(batch) >= BATCH_SIZE:
db.bulk_save_objects(batch)
db.commit()
result['success'] += len(batch)
batch = []
try:
db.bulk_save_objects(batch)
db.commit()
result['success'] += len(batch)
batch = []
except IntegrityError as ie:
db.rollback()
# Handle any remaining duplicates by inserting one at a time
for record in batch:
try:
db.add(record)
db.commit()
result['success'] += 1
except IntegrityError:
db.rollback()
result['skipped'] += 1
batch = []
except Exception as e:
result['errors'].append(f"Row {row_num}: {str(e)}")
# Save remaining batch
if batch:
db.bulk_save_objects(batch)
db.commit()
result['success'] += len(batch)
try:
db.bulk_save_objects(batch)
db.commit()
result['success'] += len(batch)
except IntegrityError:
db.rollback()
# Handle any remaining duplicates by inserting one at a time
for record in batch:
try:
db.add(record)
db.commit()
result['success'] += 1
except IntegrityError:
db.rollback()
result['skipped'] += 1
f.close()
logger.info("import_rolodex_complete", **result)

BIN
delphi.db

Binary file not shown.

BIN
test_encoding.db Normal file

Binary file not shown.