ferrero-opentext/Python-Version/scripts/a2_to_a3_upload_polling.py
DJP 39a41df21d Fix CreativeX lookup to use original Box filename not stripped version
Changes database lookup strategy to match on full filename as it appears
in Box and in the CreativeX PDF report filename field.

Critical Design Change:

Old (incorrect):
- Strip job number and tracking ID from Box filename
- Lookup: NUT_PL_pl_TEST-E2E_EHI_1x1.png
- Database has: 6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png
- RESULT: No match found, uses defaults

New (correct):
- Use original Box filename for lookup
- Lookup: 6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png
- Database has: 6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png
- RESULT: Match found, uses actual score

Rationale:
The CreativeX PDF report contains a "filename" field that stores the
actual asset filename including job number and tracking ID. This is
the name that gets extracted by LlamaExtract and stored in database.

The A2→A3 workflow receives files from Box with the SAME filename
structure (job_brand_country_lang_subject_trackingID.ext).

Therefore, we match on the complete original filename, not the stripped
version.

Database Storage Pattern:
- CreativeX PDF named: anything.pdf (name doesn't matter)
- PDF contains field: filename = "6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png"
- Database stores: filename = "6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png"
- A2→A3 receives: 6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png from Box
- Lookup matches exactly

Clean filename still used for DAM upload, only the lookup is on original.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 12:29:55 -05:00

438 lines
17 KiB
Python
Executable file

#!/usr/bin/env python3
"""
A2→A3 Upload Handler - Box Folder Polling Version
Polls Box folder for new files with V2 naming, uploads to DAM
Updates status to A3 only when ALL assets for campaign uploaded
Supports --A3update flag to force status update for testing
Compatible with Python 3.6+
"""
import sys
import os
import time
import logging
import argparse
# Add shared library to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from shared.config_loader import load_config, load_field_mappings
from shared.dam_client import DAMClient
from shared.box_client import BoxClient
from shared.database import Database
from shared.notifier import Notifier
from shared.filename_parser import FilenameParser
from shared.metadata_extractor_mvp import MetadataExtractorMVP
# Load configuration
config = load_config('config/config.yaml')
field_mappings = load_field_mappings(config)
# Setup logging with rotation
from logging.handlers import RotatingFileHandler
# Create logs directory if it doesn't exist
os.makedirs('logs', exist_ok=True)
os.makedirs('logs/backup', exist_ok=True)
# Configure logging with rotation
# Keep 1 week of active logs (7 days * 10MB = 70MB)
# Backup rotates keep 4 weeks (28 backups * 10MB = 280MB total)
log_handler = RotatingFileHandler(
'logs/a2_to_a3.log',
maxBytes=10*1024*1024, # 10MB per file
backupCount=28 # Keep 28 rotated files (approximately 1 month)
)
log_handler.setLevel(logging.INFO)
log_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logging.basicConfig(
level=logging.INFO,
handlers=[log_handler, console_handler]
)
logger = logging.getLogger('A2toA3')
def process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, keep_files=False, dryrun=False):
"""
Process a single file from Box folder
Args:
keep_files: If True, don't delete file from Box after upload (for testing)
dryrun: If True, build metadata but don't upload to DAM (shows full JSON)
Returns:
dict with success, asset_id, tracking_id
"""
file_id = file_info['id']
filename = file_info['name']
logger.info("Processing: {}".format(filename))
try:
# 1. Parse V2 filename
parsed = parser.parse_filename(filename)
if not parsed['is_valid']:
raise ValueError("Invalid V2 filename: {} - {}".format(
filename, ', '.join(parsed['validation_errors'])
))
tracking_id = parsed['tracking_id']
tracking_mode = parsed.get('tracking_mode', 'full')
subfolder_path = file_info.get('subfolder_path')
if not tracking_id:
raise ValueError("No tracking ID in filename")
logger.info("Tracking ID: {} (mode: {})".format(tracking_id, tracking_mode))
if subfolder_path:
logger.info("From Box subfolder: {} -> will create in DAM".format(subfolder_path))
# 2. Load master metadata from database
master_asset = db.get_master_asset(tracking_id)
if not master_asset:
raise ValueError("No master asset for tracking ID: {}".format(tracking_id))
# 3. Get CreativeX score from database (lookup by original Box filename)
# The PDF contains the filename field with the full name (job + tracking ID)
# So we lookup using the original filename from Box, not the stripped version
creativex_data = db.get_creativex_score_by_filename(filename)
# Build box_metadata dict (for compatibility with existing code)
if creativex_data:
box_metadata = {
'score': creativex_data['quality_score'],
'url': creativex_data['creativex_url']
}
logger.info("CreativeX score found in database: Score={}, URL={}".format(
creativex_data['quality_score'], creativex_data['creativex_url']
))
creativex_found = True
else:
# Use default values when no CreativeX score found
box_metadata = {
'score': '0',
'url': 'https://app.creativex.com/preflight/pretests'
}
logger.warning("No CreativeX score found for: {} - Using default values (Score: 0, Placeholder URL)".format(
filename
))
creativex_found = False
# 4. Download from Box
temp_file = os.path.join('temp/downloads', filename)
box.download_file(file_id, temp_file)
# 5. Get clean filename
clean_filename = parser.strip_upload_components(filename)
# 6. Build MVP asset representation with CreativeX data from database
asset_rep = mvp_extractor.build_mvp_asset_representation(
master_metadata=master_asset['full_metadata'],
clean_filename=clean_filename,
parsed_filename=parsed,
box_metadata=box_metadata, # Pass CreativeX data from database
tracking_mode=tracking_mode # Pass tracking mode for folder-only handling
)
# DRYRUN MODE: Display full asset representation and exit
if dryrun:
import json
logger.info("")
logger.info("=" * 80)
logger.info("DRYRUN MODE - Asset Representation (will NOT upload to DAM)")
logger.info("=" * 80)
logger.info("")
logger.info("FULL ASSET REPRESENTATION (JSON):")
logger.info("")
logger.info(json.dumps(asset_rep, indent=2, ensure_ascii=False))
logger.info("")
logger.info("=" * 80)
logger.info("Field Count: {} fields".format(len(asset_rep)))
logger.info("=" * 80)
logger.info("")
logger.info("CreativeX Status:")
logger.info(" Found in database: {}".format(creativex_found))
logger.info(" Score: {}".format(box_metadata.get('score')))
logger.info(" URL: {}".format(box_metadata.get('url')))
logger.info("")
logger.info("DRYRUN: No upload performed, file kept in Box")
logger.info("=" * 80)
return {
'success': True,
'asset_id': 'DRYRUN_NO_UPLOAD',
'tracking_id': tracking_id,
'filename': filename,
'clean_filename': clean_filename,
'creativex_found': creativex_found,
'creativex_score': box_metadata.get('score', '0'),
'creativex_url': box_metadata.get('url', 'https://app.creativex.com/preflight/pretests'),
'dryrun': True
}
# 7. Rename to clean filename
clean_temp_file = os.path.join('temp/downloads', clean_filename)
if os.path.exists(clean_temp_file):
os.remove(clean_temp_file)
os.rename(temp_file, clean_temp_file)
# 7. Upload to DAM (with subfolder structure if present)
upload_folder_id = master_asset['upload_directory'] # Base "01. Final Assets" folder
# If file was in a Box subfolder, create/use matching DAM subfolder
if subfolder_path:
logger.info("Creating DAM subfolder path: {}".format(subfolder_path))
upload_folder_id = dam.get_or_create_subfolder_path(
base_folder_id=upload_folder_id,
subfolder_path=subfolder_path
)
logger.info("Will upload to: 01. Final Assets/{}".format(subfolder_path))
upload_result = dam.upload_asset(
file_path=clean_temp_file,
folder_id=upload_folder_id,
asset_representation=asset_rep
)
if not upload_result['success']:
raise Exception("Upload failed: {}".format(upload_result.get('error')))
# 8. Store derivative record
db.store_derivative_asset(
tracking_id=tracking_id,
master_asset_id=None,
dam_asset_id=upload_result['asset_id'],
filename=clean_filename
)
# 9. Delete file from Box after successful upload (unless --keep-files flag set)
if keep_files:
logger.info("--keep-files flag set - File kept in Box: {}".format(filename))
else:
try:
box_file = box.client.file(file_id)
box_file.delete()
logger.info("Deleted file from Box: {}".format(filename))
except Exception as e:
logger.warning("Could not delete file from Box: {}".format(str(e)))
# 10. Clean up local temp file
os.remove(clean_temp_file)
logger.info("✓ Success: {} → Asset ID: {}".format(filename, upload_result['asset_id']))
return {
'success': True,
'asset_id': upload_result['asset_id'],
'tracking_id': tracking_id,
'filename': filename,
'clean_filename': clean_filename,
'creativex_found': creativex_found,
'creativex_score': box_metadata.get('score', '0'),
'creativex_url': box_metadata.get('url', 'https://app.creativex.com/preflight/pretests')
}
except Exception as e:
logger.error("✗ Failed: {} - {}".format(filename, str(e)))
return {
'success': False,
'error': str(e),
'filename': filename,
'tracking_id': tracking_id if 'tracking_id' in locals() else None
}
def main():
"""Main entry point - single run mode"""
# Parse command-line arguments
parser_args = argparse.ArgumentParser(description='Ferrero A2→A3 Upload Handler')
parser_args.add_argument('--auth-pfx', action='store_true',
help='Use mTLS certificate authentication instead of OAuth2')
parser_args.add_argument('--A3update', action='store_true',
help='Force update campaign status A2→A3 after upload (for testing)')
parser_args.add_argument('--keep-files', action='store_true',
help='Keep files in Box after upload (don\'t delete, for testing)')
parser_args.add_argument('--dryrun', action='store_true',
help='Build metadata but don\'t upload to DAM (shows full JSON for debugging)')
args = parser_args.parse_args()
logger.info("=" * 60)
logger.info("Ferrero A2→A3 Upload Handler Starting (Polling Mode)")
if args.auth_pfx:
logger.info("Authentication: mTLS Certificate (--auth-pfx)")
else:
logger.info("Authentication: OAuth2 (default)")
if args.A3update:
logger.info("Mode: Auto-update campaign status A2→A3 (--A3update)")
if args.keep_files:
logger.info("Mode: Keep files in Box after upload (--keep-files)")
if args.dryrun:
logger.info("Mode: DRYRUN - Build metadata but DON'T upload (--dryrun)")
logger.info("=" * 60)
# Initialize clients
dam = DAMClient(config, use_mtls=args.auth_pfx)
# Use A2→A3 Box folder for polling
box = BoxClient(config, root_folder_id=config['box'].get('root_folder_a2_a3'))
db = Database(config)
notifier = Notifier(config)
parser = FilenameParser()
mvp_extractor = MetadataExtractorMVP(field_mappings)
# Test connections
logger.info("Testing connections...")
if not dam.test_connection():
logger.error("DAM connection failed")
sys.exit(1)
if not box.test_connection():
logger.error("Box connection failed")
sys.exit(1)
if not db.test_connection():
logger.error("Database connection failed")
sys.exit(1)
logger.info("All connections OK")
logger.info("")
try:
# Get Box folder ID to poll
box_folder_id = config['box'].get('root_folder_a2_a3', config['box'].get('root_folder_id'))
logger.info("Polling Box folder: {}".format(box_folder_id))
# List files recursively in Box folder (skips 1st level job folders, preserves 2nd+ levels)
files = box.list_folder_files_recursive(box_folder_id)
logger.info("Recursive scan complete")
if not files:
logger.info("No files found in Box folder - exiting")
db.close()
sys.exit(0)
logger.info("Found {} files in Box folder".format(len(files)))
# Show subfolder distribution
subfolders = set([f.get('subfolder_path') for f in files if f.get('subfolder_path')])
if subfolders:
logger.info("Files in {} subfolder(s): {}".format(
len(subfolders), ', '.join(sorted(subfolders))
))
files_at_root = len([f for f in files if not f.get('subfolder_path')])
if files_at_root:
logger.info("Files at job level (will go to DAM root): {}".format(files_at_root))
# Filter for V2 filenames only
valid_files = []
for file_info in files:
parsed = parser.parse_filename(file_info['name'])
if parsed['is_valid'] and parsed.get('tracking_id'):
valid_files.append(file_info)
else:
logger.info("Skipping invalid V2 file: {} - Errors: {}".format(
file_info['name'], parsed.get('validation_errors', [])
))
logger.info("Found {} valid V2 files to process".format(len(valid_files)))
if not valid_files:
logger.info("No valid V2 files to process - exiting")
db.close()
sys.exit(0)
# Process files one at a time (process first file only)
file_info = valid_files[0]
logger.info("Processing first file only (more will be processed on next run)")
logger.info("")
result = process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, keep_files=args.keep_files, dryrun=args.dryrun)
if result['success']:
logger.info("")
logger.info("=" * 60)
logger.info("✓ File processed successfully")
logger.info(" Filename: {}".format(result['filename']))
logger.info(" Clean filename: {}".format(result['clean_filename']))
logger.info(" Asset ID: {}".format(result['asset_id']))
logger.info(" Tracking ID: {}".format(result['tracking_id']))
logger.info("=" * 60)
# Send success email notification with details
# Get master asset info for email
master_asset = db.get_master_asset(result['tracking_id'])
notifier.send_email(
template_name='a2_to_a3_file_uploaded',
recipients=config['notifications']['recipients']['success'],
data={
'filename': result['filename'],
'clean_filename': result['clean_filename'],
'asset_id': result['asset_id'],
'tracking_id': result['tracking_id'],
'master_asset_name': master_asset.get('opentext_id', 'Unknown') if master_asset else 'Unknown',
'upload_folder': master_asset.get('upload_directory', 'Unknown') if master_asset else 'Unknown',
'box_folder': box_folder_id,
'creativex_found': result.get('creativex_found', False),
'creativex_score': result.get('creativex_score', '0'),
'creativex_url': result.get('creativex_url', 'https://app.creativex.com/preflight/pretests')
}
)
# Update campaign status A2→A3 if --A3update flag is set
if args.A3update and master_asset:
logger.info("")
logger.info("--A3update flag set - Attempting to update campaign status")
# Get campaign ID from master asset metadata
full_metadata = master_asset.get('full_metadata', {})
# Extract campaign ID from inherited_metadata_collections
campaign_id = None
collections = full_metadata.get('inherited_metadata_collections', [])
for collection in collections:
if collection.get('container_type_name') == 'L7+ - CAMPAIGN':
campaign_id = collection.get('container_id')
break
if campaign_id:
logger.info("Found campaign ID: {}".format(campaign_id))
logger.info("Updating campaign status A2 → A3...")
status_result = dam.update_campaign_status(campaign_id, 'A3')
if status_result['success']:
logger.info("✓ Campaign status updated successfully: A2 → A3")
else:
logger.error("✗ Campaign status update failed: {}".format(status_result.get('error')))
else:
logger.warning("⚠ Campaign ID not found in master asset metadata - cannot update status")
db.close()
sys.exit(0)
else:
logger.warning("")
logger.warning("=" * 60)
logger.warning("✗ File processing failed")
logger.warning(" Filename: {}".format(result['filename']))
logger.warning(" Error: {}".format(result['error']))
logger.warning("=" * 60)
db.close()
sys.exit(1)
except Exception as e:
logger.critical("Script error: {}".format(str(e)))
db.close()
sys.exit(1)
if __name__ == '__main__':
main()