ferrero-opentext/Python-Version/scripts/debug_b1_pagination.py

134 lines
5 KiB
Python

#!/usr/bin/env python3
"""
Debug Script for B1->B2 Pagination Issue
----------------------------------------
Emulates the B1->B2 download process but ONLY lists assets.
Does NOT download files or upload to Box.
Generates a CSV of found assets to verify if we are hitting a 100-file limit.
"""
import sys
import os
import csv
import logging
import argparse
from datetime import datetime
# Add shared library to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from shared.config_loader import load_config
from shared.dam_client import DAMClient
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger('DebugB1Pagination')
def main():
parser = argparse.ArgumentParser(description='Debug B1->B2 Pagination')
parser.add_argument('--campaign-id', help='Specific Campaign ID to debug (e.g., C000000527)')
parser.add_argument('--auth-pfx', action='store_true', help='Use mTLS certificate authentication')
parser.add_argument('--auth-pfx-v2', action='store_true', help='Use mTLS V2 (Hybrid) authentication')
args = parser.parse_args()
# Determine auth mode
auth_mode = 'oauth'
if args.auth_pfx_v2:
auth_mode = 'mtls_v2'
elif args.auth_pfx:
auth_mode = 'mtls'
logger.info(f"Starting Debug Script (Auth: {auth_mode})")
# Load config
config = load_config('config/config.yaml')
dam = DAMClient(config, auth_mode=auth_mode)
if not dam.test_connection():
logger.error("DAM connection failed")
sys.exit(1)
campaign = None
if args.campaign_id:
logger.info(f"Searching for specific campaign: {args.campaign_id}")
# Search ALL campaigns (status=None) to find the specific one, as it might be B1 or B2
campaigns = dam.search_campaigns(status=None, campaign_type="Global comm")
for c in campaigns:
if c.get('campaign_id') == args.campaign_id:
campaign = c
break
if not campaign:
logger.warning(f"Campaign {args.campaign_id} not found in Global comm list.")
else:
logger.info("Searching for any B1 Global campaign...")
campaigns = dam.search_campaigns(status="B1", campaign_type="Global comm")
if campaigns:
campaign = campaigns[0]
logger.info(f"Found {len(campaigns)} campaigns. Using first one: {campaign.get('campaign_name')}")
if not campaign and not args.campaign_id:
logger.error("No B1 campaigns found and no ID provided.")
sys.exit(1)
# If we found a campaign object, use it.
if campaign:
campaign_asset_id = campaign['asset_id']
campaign_name = campaign['campaign_name']
campaign_number = campaign.get('campaign_id', 'N/A')
else:
# If we didn't find it in the list but have an arg, we might be stuck if we don't have the Folder Asset ID.
# The 'campaign_id' arg is likely the 'C000000...' string.
# We need to find the folder with that metadata.
# For now, let's rely on the search finding it.
logger.error(f"Could not find campaign {args.campaign_id} in the B1 list. This script requires the campaign to be found via search to get the Folder Asset ID.")
sys.exit(1)
logger.info(f"Processing Campaign: {campaign_name} ({campaign_number})")
logger.info(f"Campaign Folder Asset ID: {campaign_asset_id}")
# Get Master Assets (B1 uses Final Assets, is_global=True)
logger.info("Retrieving assets (is_global=True)...")
try:
assets = dam.get_master_assets(campaign_asset_id, is_global=True)
except Exception as e:
logger.error(f"Failed to get assets: {e}")
sys.exit(1)
count = len(assets)
logger.info(f"Total Assets Found: {count}")
# Generate CSV
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"debug_b1_assets_{campaign_number}_{timestamp}.csv"
logger.info(f"Writing results to {csv_filename}...")
with open(csv_filename, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Asset ID', 'Name', 'Type', 'Folder Path', 'File Size'])
for asset in assets:
writer.writerow([
asset.get('asset_id'),
asset.get('name'),
asset.get('asset_type', {}).get('name') if isinstance(asset.get('asset_type'), dict) else asset.get('asset_type'),
asset.get('folder_path'),
asset.get('file_size')
])
logger.info("Done.")
if count == 100:
logger.warning("!!! WARNING: Exactly 100 assets found. This strongly suggests a pagination limit is being hit. !!!")
elif count > 100:
logger.info("Good news: More than 100 assets found. Pagination might not be the issue, or is working.")
else:
logger.info(f"Found {count} assets (less than 100).")
if __name__ == '__main__':
main()