Fix: Skip already-processed assets on B1→B2 retry runs

Previously the script re-downloaded and re-uploaded all assets on every
retry, even those already successfully stored in DB and Box. For large
campaigns (1300+ assets) this caused unnecessary load and duplicate uploads.

Now checks DB via find_global_master_by_opentext_id() before downloading.
Assets already in DB with a valid Box URL are skipped and counted toward
the processed total, so only genuinely failed assets are retried.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
nickviljoen 2026-04-10 09:07:07 +02:00
parent 03c5ab65a8
commit 39a495e4cc

View file

@ -103,6 +103,7 @@ def process_campaign(campaign, dam, box, db, notifier, config):
return {'success': False, 'processed': 0, 'failed': total_assets}
# Process each asset
skipped_count = 0
for asset in master_assets:
asset_id = asset['asset_id']
asset_name = asset.get('name', 'unknown')
@ -117,7 +118,7 @@ def process_campaign(campaign, dam, box, db, notifier, config):
# SAFEGUARD: Check if it's a folder (should be handled by dam_client, but double check)
asset_type = asset.get('asset_type', {})
type_name = asset_type.get('name', '') if isinstance(asset_type, dict) else str(asset_type)
if 'folder' in type_name.lower():
logger.warning("Skipping item identified as folder: {} (Type: {})".format(asset_name, type_name))
continue
@ -128,6 +129,22 @@ def process_campaign(campaign, dam, box, db, notifier, config):
logger.warning("Skipping item with no extension (likely folder/container): {}".format(asset_name))
continue
# SKIP CHECK: If this asset was already processed (exists in DB), skip re-downloading
existing_tracking_id = db.find_global_master_by_opentext_id(asset_id)
if existing_tracking_id:
existing_asset = db.get_master_asset(existing_tracking_id)
if existing_asset and existing_asset.get('box_url'):
skipped_count += 1
logger.info("⏭ Already processed: {}{} (skipping)".format(asset_name, existing_tracking_id))
processed_assets.append({
'asset_id': asset_id,
'asset_name': asset_name,
'tracking_id': existing_tracking_id,
'box_file_id': existing_asset.get('box_file_id', ''),
'box_url': existing_asset.get('box_url', '')
})
continue
# 1. Download from DAM
file_path = dam.download_asset(
asset_id,
@ -190,6 +207,8 @@ def process_campaign(campaign, dam, box, db, notifier, config):
logger.info("Campaign {} Results:".format(campaign_id))
logger.info(" Total: {}".format(total_assets))
logger.info(" Successful: {}".format(len(processed_assets)))
logger.info(" Skipped (already done): {}".format(skipped_count))
logger.info(" New this run: {}".format(len(processed_assets) - skipped_count))
logger.info(" Failed: {}".format(len(failed_assets)))
logger.info(" All Done: {}".format("YES" if all_done else "NO"))
logger.info("")