ferrero-opentext/Python-Version/scripts/a2_to_a3_upload_polling.py

468 lines
18 KiB
Python
Executable file

#!/usr/bin/env python3
"""
A2→A3 Upload Handler - Box Folder Polling Version
Polls Box folder for new files with V2 naming, uploads to DAM
Updates status to A3 only when ALL assets for campaign uploaded
Supports --A3update flag to force status update for testing
Compatible with Python 3.6+
"""
import sys
import os
import time
import logging
import argparse
# Add shared library to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from shared.config_loader import load_config, load_field_mappings
from shared.dam_client import DAMClient
from shared.box_client import BoxClient
from shared.database import Database
from shared.notifier import Notifier
from shared.filename_parser import FilenameParser
from shared.metadata_extractor_mvp import MetadataExtractorMVP
# Load configuration
config = load_config('config/config.yaml')
field_mappings = load_field_mappings(config)
# Setup logging with rotation
from logging.handlers import RotatingFileHandler
# Create logs directory if it doesn't exist
os.makedirs('logs', exist_ok=True)
os.makedirs('logs/backup', exist_ok=True)
# Configure logging with rotation
# Keep 1 week of active logs (7 days * 10MB = 70MB)
# Backup rotates keep 4 weeks (28 backups * 10MB = 280MB total)
log_handler = RotatingFileHandler(
'logs/a2_to_a3.log',
maxBytes=10*1024*1024, # 10MB per file
backupCount=28 # Keep 28 rotated files (approximately 1 month)
)
log_handler.setLevel(logging.INFO)
log_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logging.basicConfig(
level=logging.INFO,
handlers=[log_handler, console_handler]
)
logger = logging.getLogger('A2toA3')
def process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, keep_files=False, dryrun=False):
"""
Process a single file from Box folder
Args:
keep_files: If True, don't delete file from Box after upload (for testing)
dryrun: If True, build metadata but don't upload to DAM (shows full JSON)
Returns:
dict with success, asset_id, tracking_id
"""
file_id = file_info['id']
filename = file_info['name']
logger.info("Processing: {}".format(filename))
try:
# 1. Parse V2 filename
parsed = parser.parse_filename(filename)
if not parsed['is_valid']:
raise ValueError("Invalid V2 filename: {} - {}".format(
filename, ', '.join(parsed['validation_errors'])
))
tracking_id = parsed['tracking_id']
tracking_mode = parsed.get('tracking_mode', 'full')
subfolder_path = file_info.get('subfolder_path')
if not tracking_id:
raise ValueError("No tracking ID in filename")
logger.info("Tracking ID: {} (mode: {})".format(tracking_id, tracking_mode))
if subfolder_path:
logger.info("From Box subfolder: {} -> will create in DAM".format(subfolder_path))
# 2. Load master metadata from database
master_asset = db.get_master_asset(tracking_id)
if not master_asset:
raise ValueError("No master asset for tracking ID: {}".format(tracking_id))
# 3. Get CreativeX score from database (lookup by original Box filename)
# The PDF contains the filename field with the full name (job + tracking ID)
# So we lookup using the original filename from Box, not the stripped version
creativex_data = db.get_creativex_score_by_filename(filename)
# Build box_metadata dict (for compatibility with existing code)
if creativex_data:
# Extract platforms from full_extraction_data
full_data = creativex_data.get('full_extraction_data', {})
data_obj = full_data.get('data', {}) if isinstance(full_data, dict) else {}
platforms = data_obj.get('ferrero_mapped_platforms', [])
# If legacy single platform exists, add it to list
if not platforms and data_obj.get('ferrero_mapped_platform'):
platforms = [data_obj.get('ferrero_mapped_platform')]
box_metadata = {
'score': creativex_data['quality_score'],
'url': creativex_data['creativex_url'],
'platforms': platforms
}
logger.info("CreativeX score found in database: Score={}, URL={}, Platforms={}".format(
creativex_data['quality_score'], creativex_data['creativex_url'], platforms
))
creativex_found = True
else:
# Use default values when no CreativeX score found
box_metadata = {
'score': '0',
'url': 'https://app.creativex.com/preflight/pretests'
}
logger.warning("No CreativeX score found for: {} - Using default values (Score: 0, Placeholder URL)".format(
filename
))
creativex_found = False
# 4. Download from Box
temp_file = os.path.join('temp/downloads', filename)
box.download_file(file_id, temp_file)
# 5. Get clean filename
clean_filename = parser.strip_upload_components(filename)
# 6. Build MVP asset representation with CreativeX data from database
asset_rep = mvp_extractor.build_mvp_asset_representation(
master_metadata=master_asset['full_metadata'],
clean_filename=clean_filename,
parsed_filename=parsed,
box_metadata=box_metadata, # Pass CreativeX data from database
tracking_mode=tracking_mode # Pass tracking mode for folder-only handling
)
# DRYRUN MODE: Display full asset representation and exit
if dryrun:
import json
logger.info("")
logger.info("=" * 80)
logger.info("DRYRUN MODE - Asset Representation (will NOT upload to DAM)")
logger.info("=" * 80)
logger.info("")
logger.info("FULL ASSET REPRESENTATION (JSON):")
logger.info("")
logger.info(json.dumps(asset_rep, indent=2, ensure_ascii=False))
logger.info("")
logger.info("=" * 80)
logger.info("Field Count: {} fields".format(len(asset_rep)))
logger.info("=" * 80)
logger.info("")
logger.info("CreativeX Status:")
logger.info(" Found in database: {}".format(creativex_found))
logger.info(" Score: {}".format(box_metadata.get('score')))
logger.info(" URL: {}".format(box_metadata.get('url')))
logger.info("")
logger.info("DRYRUN: No upload performed, file kept in Box")
logger.info("=" * 80)
return {
'success': True,
'asset_id': 'DRYRUN_NO_UPLOAD',
'tracking_id': tracking_id,
'filename': filename,
'clean_filename': clean_filename,
'creativex_found': creativex_found,
'creativex_score': box_metadata.get('score', '0'),
'creativex_url': box_metadata.get('url', 'https://app.creativex.com/preflight/pretests'),
'dryrun': True
}
# 7. Rename to clean filename
clean_temp_file = os.path.join('temp/downloads', clean_filename)
if os.path.exists(clean_temp_file):
os.remove(clean_temp_file)
os.rename(temp_file, clean_temp_file)
# 7. Upload to DAM (with subfolder structure if present)
upload_folder_id = master_asset['upload_directory'] # Base "01. Final Assets" folder
# If file was in a Box subfolder, create/use matching DAM subfolder
if subfolder_path:
logger.info("Creating DAM subfolder path: {}".format(subfolder_path))
upload_folder_id = dam.get_or_create_subfolder_path(
base_folder_id=upload_folder_id,
subfolder_path=subfolder_path
)
logger.info("Will upload to: 01. Final Assets/{}".format(subfolder_path))
upload_result = dam.upload_asset(
file_path=clean_temp_file,
folder_id=upload_folder_id,
asset_representation=asset_rep
)
if not upload_result['success']:
raise Exception("Upload failed: {}".format(upload_result.get('error')))
# 8. Store derivative record
db.store_derivative_asset(
tracking_id=tracking_id,
master_asset_id=None,
dam_asset_id=upload_result['asset_id'],
filename=clean_filename
)
# 9. Delete file from Box after successful upload (unless --keep-files flag set)
if keep_files:
logger.info("--keep-files flag set - File kept in Box: {}".format(filename))
else:
try:
box_file = box.client.file(file_id)
box_file.delete()
logger.info("Deleted file from Box: {}".format(filename))
except Exception as e:
logger.warning("Could not delete file from Box: {}".format(str(e)))
# 10. Clean up local temp file
os.remove(clean_temp_file)
logger.info("✓ Success: {} → Asset ID: {}".format(filename, upload_result['asset_id']))
return {
'success': True,
'asset_id': upload_result['asset_id'],
'tracking_id': tracking_id,
'filename': filename,
'clean_filename': clean_filename,
'creativex_found': creativex_found,
'creativex_score': box_metadata.get('score', '0'),
'creativex_url': box_metadata.get('url', 'https://app.creativex.com/preflight/pretests')
}
except Exception as e:
logger.error("✗ Failed: {} - {}".format(filename, str(e)))
return {
'success': False,
'error': str(e),
'filename': filename,
'tracking_id': tracking_id if 'tracking_id' in locals() else None
}
def main():
"""Main entry point - single run mode"""
# Parse command-line arguments
parser_args = argparse.ArgumentParser(description='Ferrero A2→A3 Upload Handler')
parser_args.add_argument('--auth-pfx', action='store_true',
help='Use mTLS certificate authentication (Legacy APIM)')
parser_args.add_argument('--auth-pfx-v2', action='store_true',
help='Use mTLS V2 (Hybrid) authentication')
parser_args.add_argument('--A3update', action='store_true',
help='Force update campaign status A2→A3 after upload (for testing)')
parser_args.add_argument('--keep-files', action='store_true',
help='Keep files in Box after upload (don\'t delete, for testing)')
parser_args.add_argument('--dryrun', action='store_true',
help='Build metadata but don\'t upload to DAM (shows full JSON for debugging)')
args = parser_args.parse_args()
logger.info("=" * 60)
logger.info("Ferrero A2→A3 Upload Handler Starting (Polling Mode)")
# Determine auth mode
auth_mode = 'oauth'
if args.auth_pfx_v2:
auth_mode = 'mtls_v2'
logger.info("Authentication: mTLS V2 (Hybrid)")
elif args.auth_pfx:
auth_mode = 'mtls'
logger.info("Authentication: mTLS Certificate (Legacy)")
else:
logger.info("Authentication: OAuth2 (default)")
if args.A3update:
logger.info("Mode: Auto-update campaign status A2→A3 (--A3update)")
if args.keep_files:
logger.info("Mode: Keep files in Box after upload (--keep-files)")
if args.dryrun:
logger.info("Mode: DRYRUN - Build metadata but DON'T upload (--dryrun)")
logger.info("=" * 60)
# Initialize clients
dam = DAMClient(config, auth_mode=auth_mode)
# Use A2→A3 Box folder for polling
box = BoxClient(config, root_folder_id=config['box'].get('root_folder_a2_a3'))
db = Database(config)
notifier = Notifier(config)
parser = FilenameParser()
mvp_extractor = MetadataExtractorMVP(field_mappings)
# Test connections
logger.info("Testing connections...")
if not dam.test_connection():
logger.error("DAM connection failed")
sys.exit(1)
if not box.test_connection():
logger.error("Box connection failed")
sys.exit(1)
if not db.test_connection():
logger.error("Database connection failed")
sys.exit(1)
logger.info("All connections OK")
logger.info("")
try:
# Get Box folder ID to poll
box_folder_id = config['box'].get('root_folder_a2_a3', config['box'].get('root_folder_id'))
logger.info("Polling Box folder: {}".format(box_folder_id))
# List files recursively in Box folder (skips 1st level job folders, preserves 2nd+ levels)
files = box.list_folder_files_recursive(box_folder_id)
logger.info("Recursive scan complete")
if not files:
logger.info("No files found in Box folder - exiting")
db.close()
sys.exit(0)
logger.info("Found {} files in Box folder".format(len(files)))
# Show subfolder distribution
subfolders = set([f.get('subfolder_path') for f in files if f.get('subfolder_path')])
if subfolders:
logger.info("Files in {} subfolder(s): {}".format(
len(subfolders), ', '.join(sorted(subfolders))
))
files_at_root = len([f for f in files if not f.get('subfolder_path')])
if files_at_root:
logger.info("Files at job level (will go to DAM root): {}".format(files_at_root))
# Filter for V2 filenames only
valid_files = []
for file_info in files:
parsed = parser.parse_filename(file_info['name'])
if parsed['is_valid'] and parsed.get('tracking_id'):
valid_files.append(file_info)
else:
logger.info("Skipping invalid V2 file: {} - Errors: {}".format(
file_info['name'], parsed.get('validation_errors', [])
))
logger.info("Found {} valid V2 files to process".format(len(valid_files)))
if not valid_files:
logger.info("No valid V2 files to process - exiting")
db.close()
sys.exit(0)
# Process ALL files
logger.info("Processing all {} file(s)".format(len(valid_files)))
logger.info("")
# Track results
successful_files = []
failed_files = []
for idx, file_info in enumerate(valid_files, 1):
logger.info("=" * 60)
logger.info("Processing file {}/{}".format(idx, len(valid_files)))
logger.info("=" * 60)
result = process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, keep_files=args.keep_files, dryrun=args.dryrun)
if result['success']:
successful_files.append(result)
logger.info("✓ File {} processed successfully".format(idx))
else:
failed_files.append(result)
logger.error("✗ File {} failed".format(idx))
logger.info("")
# Summary
logger.info("")
logger.info("=" * 60)
logger.info("A2→A3 Processing Summary")
logger.info("=" * 60)
logger.info(" Total files processed: {}".format(len(valid_files)))
logger.info(" Successful: {}".format(len(successful_files)))
logger.info(" Failed: {}".format(len(failed_files)))
logger.info("=" * 60)
# Send summary email notification
if len(successful_files) > 0:
# Get campaign info from first successful file for context
first_result = successful_files[0]
master_asset = db.get_master_asset(first_result['tracking_id'])
notifier.send_email(
template_name='a2_to_a3_batch_complete',
recipients=config['notifications']['recipients']['success'],
data={
'total_files': len(valid_files),
'successful_count': len(successful_files),
'failed_count': len(failed_files),
'successful_files': successful_files,
'failed_files': failed_files,
'box_folder': box_folder_id
}
)
# Update campaign status A2→A3 if --A3update flag is set
if args.A3update and master_asset:
logger.info("")
logger.info("--A3update flag set - Attempting to update campaign status")
# Get campaign ID from master asset metadata
full_metadata = master_asset.get('full_metadata', {})
# Extract campaign ID from inherited_metadata_collections
campaign_id = None
collections = full_metadata.get('inherited_metadata_collections', [])
for collection in collections:
if collection.get('container_type_name') == 'L7+ - CAMPAIGN':
campaign_id = collection.get('container_id')
break
if campaign_id:
logger.info("Found campaign ID: {}".format(campaign_id))
logger.info("Updating campaign status A2 → A3...")
status_result = dam.update_campaign_status(campaign_id, 'A3')
if status_result['success']:
logger.info("✓ Campaign status updated successfully: A2 → A3")
else:
logger.error("✗ Campaign status update failed: {}".format(status_result.get('error')))
else:
logger.warning("⚠ Campaign ID not found in master asset metadata - cannot update status")
db.close()
# Exit with success if at least one file succeeded
if len(successful_files) > 0:
sys.exit(0)
else:
sys.exit(1)
except Exception as e:
logger.critical("Script error: {}".format(str(e)))
db.close()
sys.exit(1)
if __name__ == '__main__':
main()