#!/usr/bin/env python3 """ Debug Script for B1->B2 Pagination Issue ---------------------------------------- Emulates the B1->B2 download process but ONLY lists assets. Does NOT download files or upload to Box. Generates a CSV of found assets to verify if we are hitting a 100-file limit. """ import sys import os import csv import logging import argparse from datetime import datetime # Add shared library to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from shared.config_loader import load_config from shared.dam_client import DAMClient # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler()] ) logger = logging.getLogger('DebugB1Pagination') def main(): parser = argparse.ArgumentParser(description='Debug B1->B2 Pagination') parser.add_argument('--campaign-id', help='Specific Campaign ID to debug (e.g., C000000527)') parser.add_argument('--auth-pfx', action='store_true', help='Use mTLS certificate authentication') parser.add_argument('--auth-pfx-v2', action='store_true', help='Use mTLS V2 (Hybrid) authentication') args = parser.parse_args() # Determine auth mode auth_mode = 'oauth' if args.auth_pfx_v2: auth_mode = 'mtls_v2' elif args.auth_pfx: auth_mode = 'mtls' logger.info(f"Starting Debug Script (Auth: {auth_mode})") # Load config config = load_config('config/config.yaml') dam = DAMClient(config, auth_mode=auth_mode) if not dam.test_connection(): logger.error("DAM connection failed") sys.exit(1) campaign = None if args.campaign_id: logger.info(f"Searching for specific campaign: {args.campaign_id}") # Search ALL campaigns (status=None) to find the specific one, as it might be B1 or B2 campaigns = dam.search_campaigns(status=None, campaign_type="Global comm") for c in campaigns: if c.get('campaign_id') == args.campaign_id: campaign = c break if not campaign: logger.warning(f"Campaign {args.campaign_id} not found in Global comm list.") else: logger.info("Searching for any B1 Global campaign...") campaigns = dam.search_campaigns(status="B1", campaign_type="Global comm") if campaigns: campaign = campaigns[0] logger.info(f"Found {len(campaigns)} campaigns. Using first one: {campaign.get('campaign_name')}") if not campaign and not args.campaign_id: logger.error("No B1 campaigns found and no ID provided.") sys.exit(1) # If we found a campaign object, use it. if campaign: campaign_asset_id = campaign['asset_id'] campaign_name = campaign['campaign_name'] campaign_number = campaign.get('campaign_id', 'N/A') else: # If we didn't find it in the list but have an arg, we might be stuck if we don't have the Folder Asset ID. # The 'campaign_id' arg is likely the 'C000000...' string. # We need to find the folder with that metadata. # For now, let's rely on the search finding it. logger.error(f"Could not find campaign {args.campaign_id} in the B1 list. This script requires the campaign to be found via search to get the Folder Asset ID.") sys.exit(1) logger.info(f"Processing Campaign: {campaign_name} ({campaign_number})") logger.info(f"Campaign Folder Asset ID: {campaign_asset_id}") # Get Master Assets (B1 uses Final Assets, is_global=True) logger.info("Retrieving assets (is_global=True)...") try: assets = dam.get_master_assets(campaign_asset_id, is_global=True) except Exception as e: logger.error(f"Failed to get assets: {e}") sys.exit(1) count = len(assets) logger.info(f"Total Assets Found: {count}") # Generate CSV timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") csv_filename = f"debug_b1_assets_{campaign_number}_{timestamp}.csv" logger.info(f"Writing results to {csv_filename}...") with open(csv_filename, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(['Asset ID', 'Name', 'Type', 'Folder Path', 'File Size']) for asset in assets: writer.writerow([ asset.get('asset_id'), asset.get('name'), asset.get('asset_type', {}).get('name') if isinstance(asset.get('asset_type'), dict) else asset.get('asset_type'), asset.get('folder_path'), asset.get('file_size') ]) logger.info("Done.") if count == 100: logger.warning("!!! WARNING: Exactly 100 assets found. This strongly suggests a pagination limit is being hit. !!!") elif count > 100: logger.info("Good news: More than 100 assets found. Pagination might not be the issue, or is working.") else: logger.info(f"Found {count} assets (less than 100).") if __name__ == '__main__': main()