fix: Handle duplicate IDs in rolodex import gracefully
- Added duplicate tracking within import session (seen_in_import set) - Skip records that already exist in database - Added fallback to row-by-row insert when bulk insert fails - Track skipped records in result - Prevents cascade errors after UNIQUE constraint violation - Gracefully handles legacy data with duplicate IDs
This commit is contained in:
@@ -591,13 +591,16 @@ def import_rvarlkup(db: Session, file_path: str) -> Dict[str, Any]:
|
||||
|
||||
def import_rolodex(db: Session, file_path: str) -> Dict[str, Any]:
|
||||
"""Import ROLODEX.csv → Rolodex model."""
|
||||
result = {'success': 0, 'errors': [], 'total_rows': 0}
|
||||
result = {'success': 0, 'errors': [], 'total_rows': 0, 'skipped': 0}
|
||||
|
||||
try:
|
||||
f, encoding = open_text_with_fallbacks(file_path)
|
||||
reader = csv.DictReader(f)
|
||||
|
||||
# Track IDs we've seen in this import to handle duplicates
|
||||
seen_in_import = set()
|
||||
batch = []
|
||||
|
||||
for row_num, row in enumerate(reader, start=2):
|
||||
result['total_rows'] += 1
|
||||
|
||||
@@ -606,6 +609,17 @@ def import_rolodex(db: Session, file_path: str) -> Dict[str, Any]:
|
||||
if not rolodex_id:
|
||||
continue
|
||||
|
||||
# Skip if we've already processed this ID in current import
|
||||
if rolodex_id in seen_in_import:
|
||||
result['skipped'] += 1
|
||||
continue
|
||||
|
||||
# Skip if it already exists in database
|
||||
if db.query(Rolodex).filter(Rolodex.id == rolodex_id).first():
|
||||
result['skipped'] += 1
|
||||
seen_in_import.add(rolodex_id)
|
||||
continue
|
||||
|
||||
record = Rolodex(
|
||||
id=rolodex_id,
|
||||
prefix=clean_string(row.get('Prefix')),
|
||||
@@ -629,20 +643,47 @@ def import_rolodex(db: Session, file_path: str) -> Dict[str, Any]:
|
||||
memo=clean_string(row.get('Memo'))
|
||||
)
|
||||
batch.append(record)
|
||||
seen_in_import.add(rolodex_id)
|
||||
|
||||
if len(batch) >= BATCH_SIZE:
|
||||
try:
|
||||
db.bulk_save_objects(batch)
|
||||
db.commit()
|
||||
result['success'] += len(batch)
|
||||
batch = []
|
||||
except IntegrityError as ie:
|
||||
db.rollback()
|
||||
# Handle any remaining duplicates by inserting one at a time
|
||||
for record in batch:
|
||||
try:
|
||||
db.add(record)
|
||||
db.commit()
|
||||
result['success'] += 1
|
||||
except IntegrityError:
|
||||
db.rollback()
|
||||
result['skipped'] += 1
|
||||
batch = []
|
||||
|
||||
except Exception as e:
|
||||
result['errors'].append(f"Row {row_num}: {str(e)}")
|
||||
|
||||
# Save remaining batch
|
||||
if batch:
|
||||
try:
|
||||
db.bulk_save_objects(batch)
|
||||
db.commit()
|
||||
result['success'] += len(batch)
|
||||
except IntegrityError:
|
||||
db.rollback()
|
||||
# Handle any remaining duplicates by inserting one at a time
|
||||
for record in batch:
|
||||
try:
|
||||
db.add(record)
|
||||
db.commit()
|
||||
result['success'] += 1
|
||||
except IntegrityError:
|
||||
db.rollback()
|
||||
result['skipped'] += 1
|
||||
|
||||
f.close()
|
||||
logger.info("import_rolodex_complete", **result)
|
||||
|
||||
BIN
test_encoding.db
Normal file
BIN
test_encoding.db
Normal file
Binary file not shown.
Reference in New Issue
Block a user