fix: Handle duplicate IDs in rolodex import gracefully
- Added duplicate tracking within import session (seen_in_import set) - Skip records that already exist in database - Added fallback to row-by-row insert when bulk insert fails - Track skipped records in result - Prevents cascade errors after UNIQUE constraint violation - Gracefully handles legacy data with duplicate IDs
This commit is contained in:
@@ -591,13 +591,16 @@ def import_rvarlkup(db: Session, file_path: str) -> Dict[str, Any]:
|
|||||||
|
|
||||||
def import_rolodex(db: Session, file_path: str) -> Dict[str, Any]:
|
def import_rolodex(db: Session, file_path: str) -> Dict[str, Any]:
|
||||||
"""Import ROLODEX.csv → Rolodex model."""
|
"""Import ROLODEX.csv → Rolodex model."""
|
||||||
result = {'success': 0, 'errors': [], 'total_rows': 0}
|
result = {'success': 0, 'errors': [], 'total_rows': 0, 'skipped': 0}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
f, encoding = open_text_with_fallbacks(file_path)
|
f, encoding = open_text_with_fallbacks(file_path)
|
||||||
reader = csv.DictReader(f)
|
reader = csv.DictReader(f)
|
||||||
|
|
||||||
|
# Track IDs we've seen in this import to handle duplicates
|
||||||
|
seen_in_import = set()
|
||||||
batch = []
|
batch = []
|
||||||
|
|
||||||
for row_num, row in enumerate(reader, start=2):
|
for row_num, row in enumerate(reader, start=2):
|
||||||
result['total_rows'] += 1
|
result['total_rows'] += 1
|
||||||
|
|
||||||
@@ -606,6 +609,17 @@ def import_rolodex(db: Session, file_path: str) -> Dict[str, Any]:
|
|||||||
if not rolodex_id:
|
if not rolodex_id:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Skip if we've already processed this ID in current import
|
||||||
|
if rolodex_id in seen_in_import:
|
||||||
|
result['skipped'] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip if it already exists in database
|
||||||
|
if db.query(Rolodex).filter(Rolodex.id == rolodex_id).first():
|
||||||
|
result['skipped'] += 1
|
||||||
|
seen_in_import.add(rolodex_id)
|
||||||
|
continue
|
||||||
|
|
||||||
record = Rolodex(
|
record = Rolodex(
|
||||||
id=rolodex_id,
|
id=rolodex_id,
|
||||||
prefix=clean_string(row.get('Prefix')),
|
prefix=clean_string(row.get('Prefix')),
|
||||||
@@ -629,20 +643,47 @@ def import_rolodex(db: Session, file_path: str) -> Dict[str, Any]:
|
|||||||
memo=clean_string(row.get('Memo'))
|
memo=clean_string(row.get('Memo'))
|
||||||
)
|
)
|
||||||
batch.append(record)
|
batch.append(record)
|
||||||
|
seen_in_import.add(rolodex_id)
|
||||||
|
|
||||||
if len(batch) >= BATCH_SIZE:
|
if len(batch) >= BATCH_SIZE:
|
||||||
|
try:
|
||||||
db.bulk_save_objects(batch)
|
db.bulk_save_objects(batch)
|
||||||
db.commit()
|
db.commit()
|
||||||
result['success'] += len(batch)
|
result['success'] += len(batch)
|
||||||
batch = []
|
batch = []
|
||||||
|
except IntegrityError as ie:
|
||||||
|
db.rollback()
|
||||||
|
# Handle any remaining duplicates by inserting one at a time
|
||||||
|
for record in batch:
|
||||||
|
try:
|
||||||
|
db.add(record)
|
||||||
|
db.commit()
|
||||||
|
result['success'] += 1
|
||||||
|
except IntegrityError:
|
||||||
|
db.rollback()
|
||||||
|
result['skipped'] += 1
|
||||||
|
batch = []
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
result['errors'].append(f"Row {row_num}: {str(e)}")
|
result['errors'].append(f"Row {row_num}: {str(e)}")
|
||||||
|
|
||||||
|
# Save remaining batch
|
||||||
if batch:
|
if batch:
|
||||||
|
try:
|
||||||
db.bulk_save_objects(batch)
|
db.bulk_save_objects(batch)
|
||||||
db.commit()
|
db.commit()
|
||||||
result['success'] += len(batch)
|
result['success'] += len(batch)
|
||||||
|
except IntegrityError:
|
||||||
|
db.rollback()
|
||||||
|
# Handle any remaining duplicates by inserting one at a time
|
||||||
|
for record in batch:
|
||||||
|
try:
|
||||||
|
db.add(record)
|
||||||
|
db.commit()
|
||||||
|
result['success'] += 1
|
||||||
|
except IntegrityError:
|
||||||
|
db.rollback()
|
||||||
|
result['skipped'] += 1
|
||||||
|
|
||||||
f.close()
|
f.close()
|
||||||
logger.info("import_rolodex_complete", **result)
|
logger.info("import_rolodex_complete", **result)
|
||||||
|
|||||||
BIN
test_encoding.db
Normal file
BIN
test_encoding.db
Normal file
Binary file not shown.
Reference in New Issue
Block a user