ferrero-opentext/Python-Version/scripts/debug_b1_pagination.py

#!/usr/bin/env python3
"""
Debug Script for B1->B2 Pagination Issue
----------------------------------------
Emulates the B1->B2 download process but ONLY lists assets.
Does NOT download files or upload to Box.
Generates a CSV of found assets to verify if we are hitting a 100-file limit.
"""

import sys
import os
import csv
import logging
import argparse
from datetime import datetime

# Add shared library to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

from shared.config_loader import load_config
from shared.dam_client import DAMClient

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger('DebugB1Pagination')

def main():
    parser = argparse.ArgumentParser(description='Debug B1->B2 Pagination')
    parser.add_argument('--campaign-id', help='Specific Campaign ID to debug (e.g., C000000527)')
    parser.add_argument('--auth-pfx', action='store_true', help='Use mTLS certificate authentication')
    parser.add_argument('--auth-pfx-v2', action='store_true', help='Use mTLS V2 (Hybrid) authentication')
    args = parser.parse_args()

    # Determine auth mode
    auth_mode = 'oauth'
    if args.auth_pfx_v2:
        auth_mode = 'mtls_v2'
    elif args.auth_pfx:
        auth_mode = 'mtls'

    logger.info(f"Starting Debug Script (Auth: {auth_mode})")

    # Load config
    config = load_config('config/config.yaml')
    dam = DAMClient(config, auth_mode=auth_mode)

    if not dam.test_connection():
        logger.error("DAM connection failed")
        sys.exit(1)

    campaign = None

    if args.campaign_id:
        logger.info(f"Searching for specific campaign: {args.campaign_id}")
        # Search ALL campaigns (status=None) to find the specific one, as it might be B1 or B2
        campaigns = dam.search_campaigns(status=None, campaign_type="Global comm")
        for c in campaigns:
            if c.get('campaign_id') == args.campaign_id:
                campaign = c
                break

        if not campaign:
            logger.warning(f"Campaign {args.campaign_id} not found in Global comm list.")
    else:
        logger.info("Searching for any B1 Global campaign...")
        campaigns = dam.search_campaigns(status="B1", campaign_type="Global comm")
        if campaigns:
            campaign = campaigns[0]
            logger.info(f"Found {len(campaigns)} campaigns. Using first one: {campaign.get('campaign_name')}")

    if not campaign and not args.campaign_id:
        logger.error("No B1 campaigns found and no ID provided.")
        sys.exit(1)

    # If we found a campaign object, use it.
    if campaign:
        campaign_asset_id = campaign['asset_id']
        campaign_name = campaign['campaign_name']
        campaign_number = campaign.get('campaign_id', 'N/A')
    else:
        # If we didn't find it in the list but have an arg, we might be stuck if we don't have the Folder Asset ID.
        # The 'campaign_id' arg is likely the 'C000000...' string.
        # We need to find the folder with that metadata.
        # For now, let's rely on the search finding it.
        logger.error(f"Could not find campaign {args.campaign_id} in the B1 list. This script requires the campaign to be found via search to get the Folder Asset ID.")
        sys.exit(1)

    logger.info(f"Processing Campaign: {campaign_name} ({campaign_number})")
    logger.info(f"Campaign Folder Asset ID: {campaign_asset_id}")

    # Get Master Assets (B1 uses Final Assets, is_global=True)
    logger.info("Retrieving assets (is_global=True)...")
    try:
        assets = dam.get_master_assets(campaign_asset_id, is_global=True)
    except Exception as e:
        logger.error(f"Failed to get assets: {e}")
        sys.exit(1)

    count = len(assets)
    logger.info(f"Total Assets Found: {count}")

    # Generate CSV
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    csv_filename = f"debug_b1_assets_{campaign_number}_{timestamp}.csv"

    logger.info(f"Writing results to {csv_filename}...")

    with open(csv_filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Asset ID', 'Name', 'Type', 'Folder Path', 'File Size'])

        for asset in assets:
            writer.writerow([
                asset.get('asset_id'),
                asset.get('name'),
                asset.get('asset_type', {}).get('name') if isinstance(asset.get('asset_type'), dict) else asset.get('asset_type'),
                asset.get('folder_path'),
                asset.get('file_size')
            ])

    logger.info("Done.")
    if count == 100:
        logger.warning("!!! WARNING: Exactly 100 assets found. This strongly suggests a pagination limit is being hit. !!!")
    elif count > 100:
        logger.info("Good news: More than 100 assets found. Pagination might not be the issue, or is working.")
    else:
        logger.info(f"Found {count} assets (less than 100).")

if __name__ == '__main__':
    main()