Changes database lookup strategy to match on full filename as it appears in Box and in the CreativeX PDF report filename field. Critical Design Change: Old (incorrect): - Strip job number and tracking ID from Box filename - Lookup: NUT_PL_pl_TEST-E2E_EHI_1x1.png - Database has: 6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png - RESULT: No match found, uses defaults New (correct): - Use original Box filename for lookup - Lookup: 6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png - Database has: 6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png - RESULT: Match found, uses actual score Rationale: The CreativeX PDF report contains a "filename" field that stores the actual asset filename including job number and tracking ID. This is the name that gets extracted by LlamaExtract and stored in database. The A2→A3 workflow receives files from Box with the SAME filename structure (job_brand_country_lang_subject_trackingID.ext). Therefore, we match on the complete original filename, not the stripped version. Database Storage Pattern: - CreativeX PDF named: anything.pdf (name doesn't matter) - PDF contains field: filename = "6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png" - Database stores: filename = "6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png" - A2→A3 receives: 6487512_NUT_PL_pl_TEST-E2E_EHI_1x1_7xXgKp.png from Box - Lookup matches exactly Clean filename still used for DAM upload, only the lookup is on original. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
438 lines
17 KiB
Python
Executable file
438 lines
17 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
A2→A3 Upload Handler - Box Folder Polling Version
|
|
Polls Box folder for new files with V2 naming, uploads to DAM
|
|
Updates status to A3 only when ALL assets for campaign uploaded
|
|
Supports --A3update flag to force status update for testing
|
|
Compatible with Python 3.6+
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import time
|
|
import logging
|
|
import argparse
|
|
|
|
# Add shared library to path
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
from shared.config_loader import load_config, load_field_mappings
|
|
from shared.dam_client import DAMClient
|
|
from shared.box_client import BoxClient
|
|
from shared.database import Database
|
|
from shared.notifier import Notifier
|
|
from shared.filename_parser import FilenameParser
|
|
from shared.metadata_extractor_mvp import MetadataExtractorMVP
|
|
|
|
# Load configuration
|
|
config = load_config('config/config.yaml')
|
|
field_mappings = load_field_mappings(config)
|
|
|
|
# Setup logging with rotation
|
|
from logging.handlers import RotatingFileHandler
|
|
|
|
# Create logs directory if it doesn't exist
|
|
os.makedirs('logs', exist_ok=True)
|
|
os.makedirs('logs/backup', exist_ok=True)
|
|
|
|
# Configure logging with rotation
|
|
# Keep 1 week of active logs (7 days * 10MB = 70MB)
|
|
# Backup rotates keep 4 weeks (28 backups * 10MB = 280MB total)
|
|
log_handler = RotatingFileHandler(
|
|
'logs/a2_to_a3.log',
|
|
maxBytes=10*1024*1024, # 10MB per file
|
|
backupCount=28 # Keep 28 rotated files (approximately 1 month)
|
|
)
|
|
log_handler.setLevel(logging.INFO)
|
|
log_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
|
|
|
console_handler = logging.StreamHandler()
|
|
console_handler.setLevel(logging.INFO)
|
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
handlers=[log_handler, console_handler]
|
|
)
|
|
|
|
logger = logging.getLogger('A2toA3')
|
|
|
|
def process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, keep_files=False, dryrun=False):
|
|
"""
|
|
Process a single file from Box folder
|
|
|
|
Args:
|
|
keep_files: If True, don't delete file from Box after upload (for testing)
|
|
dryrun: If True, build metadata but don't upload to DAM (shows full JSON)
|
|
|
|
Returns:
|
|
dict with success, asset_id, tracking_id
|
|
"""
|
|
file_id = file_info['id']
|
|
filename = file_info['name']
|
|
|
|
logger.info("Processing: {}".format(filename))
|
|
|
|
try:
|
|
# 1. Parse V2 filename
|
|
parsed = parser.parse_filename(filename)
|
|
|
|
if not parsed['is_valid']:
|
|
raise ValueError("Invalid V2 filename: {} - {}".format(
|
|
filename, ', '.join(parsed['validation_errors'])
|
|
))
|
|
|
|
tracking_id = parsed['tracking_id']
|
|
tracking_mode = parsed.get('tracking_mode', 'full')
|
|
subfolder_path = file_info.get('subfolder_path')
|
|
|
|
if not tracking_id:
|
|
raise ValueError("No tracking ID in filename")
|
|
|
|
logger.info("Tracking ID: {} (mode: {})".format(tracking_id, tracking_mode))
|
|
if subfolder_path:
|
|
logger.info("From Box subfolder: {} -> will create in DAM".format(subfolder_path))
|
|
|
|
# 2. Load master metadata from database
|
|
master_asset = db.get_master_asset(tracking_id)
|
|
|
|
if not master_asset:
|
|
raise ValueError("No master asset for tracking ID: {}".format(tracking_id))
|
|
|
|
# 3. Get CreativeX score from database (lookup by original Box filename)
|
|
# The PDF contains the filename field with the full name (job + tracking ID)
|
|
# So we lookup using the original filename from Box, not the stripped version
|
|
creativex_data = db.get_creativex_score_by_filename(filename)
|
|
|
|
# Build box_metadata dict (for compatibility with existing code)
|
|
if creativex_data:
|
|
box_metadata = {
|
|
'score': creativex_data['quality_score'],
|
|
'url': creativex_data['creativex_url']
|
|
}
|
|
logger.info("CreativeX score found in database: Score={}, URL={}".format(
|
|
creativex_data['quality_score'], creativex_data['creativex_url']
|
|
))
|
|
creativex_found = True
|
|
else:
|
|
# Use default values when no CreativeX score found
|
|
box_metadata = {
|
|
'score': '0',
|
|
'url': 'https://app.creativex.com/preflight/pretests'
|
|
}
|
|
logger.warning("No CreativeX score found for: {} - Using default values (Score: 0, Placeholder URL)".format(
|
|
filename
|
|
))
|
|
creativex_found = False
|
|
|
|
# 4. Download from Box
|
|
temp_file = os.path.join('temp/downloads', filename)
|
|
box.download_file(file_id, temp_file)
|
|
|
|
# 5. Get clean filename
|
|
clean_filename = parser.strip_upload_components(filename)
|
|
|
|
# 6. Build MVP asset representation with CreativeX data from database
|
|
asset_rep = mvp_extractor.build_mvp_asset_representation(
|
|
master_metadata=master_asset['full_metadata'],
|
|
clean_filename=clean_filename,
|
|
parsed_filename=parsed,
|
|
box_metadata=box_metadata, # Pass CreativeX data from database
|
|
tracking_mode=tracking_mode # Pass tracking mode for folder-only handling
|
|
)
|
|
|
|
# DRYRUN MODE: Display full asset representation and exit
|
|
if dryrun:
|
|
import json
|
|
logger.info("")
|
|
logger.info("=" * 80)
|
|
logger.info("DRYRUN MODE - Asset Representation (will NOT upload to DAM)")
|
|
logger.info("=" * 80)
|
|
logger.info("")
|
|
logger.info("FULL ASSET REPRESENTATION (JSON):")
|
|
logger.info("")
|
|
logger.info(json.dumps(asset_rep, indent=2, ensure_ascii=False))
|
|
logger.info("")
|
|
logger.info("=" * 80)
|
|
logger.info("Field Count: {} fields".format(len(asset_rep)))
|
|
logger.info("=" * 80)
|
|
logger.info("")
|
|
logger.info("CreativeX Status:")
|
|
logger.info(" Found in database: {}".format(creativex_found))
|
|
logger.info(" Score: {}".format(box_metadata.get('score')))
|
|
logger.info(" URL: {}".format(box_metadata.get('url')))
|
|
logger.info("")
|
|
logger.info("DRYRUN: No upload performed, file kept in Box")
|
|
logger.info("=" * 80)
|
|
|
|
return {
|
|
'success': True,
|
|
'asset_id': 'DRYRUN_NO_UPLOAD',
|
|
'tracking_id': tracking_id,
|
|
'filename': filename,
|
|
'clean_filename': clean_filename,
|
|
'creativex_found': creativex_found,
|
|
'creativex_score': box_metadata.get('score', '0'),
|
|
'creativex_url': box_metadata.get('url', 'https://app.creativex.com/preflight/pretests'),
|
|
'dryrun': True
|
|
}
|
|
|
|
# 7. Rename to clean filename
|
|
clean_temp_file = os.path.join('temp/downloads', clean_filename)
|
|
if os.path.exists(clean_temp_file):
|
|
os.remove(clean_temp_file)
|
|
os.rename(temp_file, clean_temp_file)
|
|
|
|
# 7. Upload to DAM (with subfolder structure if present)
|
|
upload_folder_id = master_asset['upload_directory'] # Base "01. Final Assets" folder
|
|
|
|
# If file was in a Box subfolder, create/use matching DAM subfolder
|
|
if subfolder_path:
|
|
logger.info("Creating DAM subfolder path: {}".format(subfolder_path))
|
|
upload_folder_id = dam.get_or_create_subfolder_path(
|
|
base_folder_id=upload_folder_id,
|
|
subfolder_path=subfolder_path
|
|
)
|
|
logger.info("Will upload to: 01. Final Assets/{}".format(subfolder_path))
|
|
|
|
upload_result = dam.upload_asset(
|
|
file_path=clean_temp_file,
|
|
folder_id=upload_folder_id,
|
|
asset_representation=asset_rep
|
|
)
|
|
|
|
if not upload_result['success']:
|
|
raise Exception("Upload failed: {}".format(upload_result.get('error')))
|
|
|
|
# 8. Store derivative record
|
|
db.store_derivative_asset(
|
|
tracking_id=tracking_id,
|
|
master_asset_id=None,
|
|
dam_asset_id=upload_result['asset_id'],
|
|
filename=clean_filename
|
|
)
|
|
|
|
# 9. Delete file from Box after successful upload (unless --keep-files flag set)
|
|
if keep_files:
|
|
logger.info("--keep-files flag set - File kept in Box: {}".format(filename))
|
|
else:
|
|
try:
|
|
box_file = box.client.file(file_id)
|
|
box_file.delete()
|
|
logger.info("Deleted file from Box: {}".format(filename))
|
|
except Exception as e:
|
|
logger.warning("Could not delete file from Box: {}".format(str(e)))
|
|
|
|
# 10. Clean up local temp file
|
|
os.remove(clean_temp_file)
|
|
|
|
logger.info("✓ Success: {} → Asset ID: {}".format(filename, upload_result['asset_id']))
|
|
|
|
return {
|
|
'success': True,
|
|
'asset_id': upload_result['asset_id'],
|
|
'tracking_id': tracking_id,
|
|
'filename': filename,
|
|
'clean_filename': clean_filename,
|
|
'creativex_found': creativex_found,
|
|
'creativex_score': box_metadata.get('score', '0'),
|
|
'creativex_url': box_metadata.get('url', 'https://app.creativex.com/preflight/pretests')
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("✗ Failed: {} - {}".format(filename, str(e)))
|
|
return {
|
|
'success': False,
|
|
'error': str(e),
|
|
'filename': filename,
|
|
'tracking_id': tracking_id if 'tracking_id' in locals() else None
|
|
}
|
|
|
|
def main():
|
|
"""Main entry point - single run mode"""
|
|
# Parse command-line arguments
|
|
parser_args = argparse.ArgumentParser(description='Ferrero A2→A3 Upload Handler')
|
|
parser_args.add_argument('--auth-pfx', action='store_true',
|
|
help='Use mTLS certificate authentication instead of OAuth2')
|
|
parser_args.add_argument('--A3update', action='store_true',
|
|
help='Force update campaign status A2→A3 after upload (for testing)')
|
|
parser_args.add_argument('--keep-files', action='store_true',
|
|
help='Keep files in Box after upload (don\'t delete, for testing)')
|
|
parser_args.add_argument('--dryrun', action='store_true',
|
|
help='Build metadata but don\'t upload to DAM (shows full JSON for debugging)')
|
|
args = parser_args.parse_args()
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("Ferrero A2→A3 Upload Handler Starting (Polling Mode)")
|
|
if args.auth_pfx:
|
|
logger.info("Authentication: mTLS Certificate (--auth-pfx)")
|
|
else:
|
|
logger.info("Authentication: OAuth2 (default)")
|
|
if args.A3update:
|
|
logger.info("Mode: Auto-update campaign status A2→A3 (--A3update)")
|
|
if args.keep_files:
|
|
logger.info("Mode: Keep files in Box after upload (--keep-files)")
|
|
if args.dryrun:
|
|
logger.info("Mode: DRYRUN - Build metadata but DON'T upload (--dryrun)")
|
|
logger.info("=" * 60)
|
|
|
|
# Initialize clients
|
|
dam = DAMClient(config, use_mtls=args.auth_pfx)
|
|
# Use A2→A3 Box folder for polling
|
|
box = BoxClient(config, root_folder_id=config['box'].get('root_folder_a2_a3'))
|
|
db = Database(config)
|
|
notifier = Notifier(config)
|
|
parser = FilenameParser()
|
|
mvp_extractor = MetadataExtractorMVP(field_mappings)
|
|
|
|
# Test connections
|
|
logger.info("Testing connections...")
|
|
if not dam.test_connection():
|
|
logger.error("DAM connection failed")
|
|
sys.exit(1)
|
|
|
|
if not box.test_connection():
|
|
logger.error("Box connection failed")
|
|
sys.exit(1)
|
|
|
|
if not db.test_connection():
|
|
logger.error("Database connection failed")
|
|
sys.exit(1)
|
|
|
|
logger.info("All connections OK")
|
|
logger.info("")
|
|
|
|
try:
|
|
# Get Box folder ID to poll
|
|
box_folder_id = config['box'].get('root_folder_a2_a3', config['box'].get('root_folder_id'))
|
|
|
|
logger.info("Polling Box folder: {}".format(box_folder_id))
|
|
|
|
# List files recursively in Box folder (skips 1st level job folders, preserves 2nd+ levels)
|
|
files = box.list_folder_files_recursive(box_folder_id)
|
|
logger.info("Recursive scan complete")
|
|
|
|
if not files:
|
|
logger.info("No files found in Box folder - exiting")
|
|
db.close()
|
|
sys.exit(0)
|
|
|
|
logger.info("Found {} files in Box folder".format(len(files)))
|
|
|
|
# Show subfolder distribution
|
|
subfolders = set([f.get('subfolder_path') for f in files if f.get('subfolder_path')])
|
|
if subfolders:
|
|
logger.info("Files in {} subfolder(s): {}".format(
|
|
len(subfolders), ', '.join(sorted(subfolders))
|
|
))
|
|
|
|
files_at_root = len([f for f in files if not f.get('subfolder_path')])
|
|
if files_at_root:
|
|
logger.info("Files at job level (will go to DAM root): {}".format(files_at_root))
|
|
|
|
# Filter for V2 filenames only
|
|
valid_files = []
|
|
for file_info in files:
|
|
parsed = parser.parse_filename(file_info['name'])
|
|
if parsed['is_valid'] and parsed.get('tracking_id'):
|
|
valid_files.append(file_info)
|
|
else:
|
|
logger.info("Skipping invalid V2 file: {} - Errors: {}".format(
|
|
file_info['name'], parsed.get('validation_errors', [])
|
|
))
|
|
|
|
logger.info("Found {} valid V2 files to process".format(len(valid_files)))
|
|
|
|
if not valid_files:
|
|
logger.info("No valid V2 files to process - exiting")
|
|
db.close()
|
|
sys.exit(0)
|
|
|
|
# Process files one at a time (process first file only)
|
|
file_info = valid_files[0]
|
|
logger.info("Processing first file only (more will be processed on next run)")
|
|
logger.info("")
|
|
|
|
result = process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, keep_files=args.keep_files, dryrun=args.dryrun)
|
|
|
|
if result['success']:
|
|
logger.info("")
|
|
logger.info("=" * 60)
|
|
logger.info("✓ File processed successfully")
|
|
logger.info(" Filename: {}".format(result['filename']))
|
|
logger.info(" Clean filename: {}".format(result['clean_filename']))
|
|
logger.info(" Asset ID: {}".format(result['asset_id']))
|
|
logger.info(" Tracking ID: {}".format(result['tracking_id']))
|
|
logger.info("=" * 60)
|
|
|
|
# Send success email notification with details
|
|
# Get master asset info for email
|
|
master_asset = db.get_master_asset(result['tracking_id'])
|
|
|
|
notifier.send_email(
|
|
template_name='a2_to_a3_file_uploaded',
|
|
recipients=config['notifications']['recipients']['success'],
|
|
data={
|
|
'filename': result['filename'],
|
|
'clean_filename': result['clean_filename'],
|
|
'asset_id': result['asset_id'],
|
|
'tracking_id': result['tracking_id'],
|
|
'master_asset_name': master_asset.get('opentext_id', 'Unknown') if master_asset else 'Unknown',
|
|
'upload_folder': master_asset.get('upload_directory', 'Unknown') if master_asset else 'Unknown',
|
|
'box_folder': box_folder_id,
|
|
'creativex_found': result.get('creativex_found', False),
|
|
'creativex_score': result.get('creativex_score', '0'),
|
|
'creativex_url': result.get('creativex_url', 'https://app.creativex.com/preflight/pretests')
|
|
}
|
|
)
|
|
|
|
# Update campaign status A2→A3 if --A3update flag is set
|
|
if args.A3update and master_asset:
|
|
logger.info("")
|
|
logger.info("--A3update flag set - Attempting to update campaign status")
|
|
|
|
# Get campaign ID from master asset metadata
|
|
full_metadata = master_asset.get('full_metadata', {})
|
|
|
|
# Extract campaign ID from inherited_metadata_collections
|
|
campaign_id = None
|
|
collections = full_metadata.get('inherited_metadata_collections', [])
|
|
|
|
for collection in collections:
|
|
if collection.get('container_type_name') == 'L7+ - CAMPAIGN':
|
|
campaign_id = collection.get('container_id')
|
|
break
|
|
|
|
if campaign_id:
|
|
logger.info("Found campaign ID: {}".format(campaign_id))
|
|
logger.info("Updating campaign status A2 → A3...")
|
|
|
|
status_result = dam.update_campaign_status(campaign_id, 'A3')
|
|
|
|
if status_result['success']:
|
|
logger.info("✓ Campaign status updated successfully: A2 → A3")
|
|
else:
|
|
logger.error("✗ Campaign status update failed: {}".format(status_result.get('error')))
|
|
else:
|
|
logger.warning("⚠ Campaign ID not found in master asset metadata - cannot update status")
|
|
|
|
db.close()
|
|
sys.exit(0)
|
|
else:
|
|
logger.warning("")
|
|
logger.warning("=" * 60)
|
|
logger.warning("✗ File processing failed")
|
|
logger.warning(" Filename: {}".format(result['filename']))
|
|
logger.warning(" Error: {}".format(result['error']))
|
|
logger.warning("=" * 60)
|
|
|
|
db.close()
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
logger.critical("Script error: {}".format(str(e)))
|
|
db.close()
|
|
sys.exit(1)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|