ferrero-opentext/Python-Version/scripts/a2_to_a3_upload_polling.py
DJP 80d5757bbb Add Box metadata extraction for CreativeX fields in A2→A3 workflow
Major Feature: Box Metadata Integration

box_client.py:
 Added get_file_metadata() method
 Reads 'Ferrero-DAM-Metadata' template from Box files
 Extracts 'CreativeX Score' and 'CreativeX URL' fields
 Returns dict with score and url

a2_to_a3_upload_polling.py:
 Calls box.get_file_metadata() before download
 Logs Box metadata retrieved
 Passes box_metadata to build_mvp_asset_representation()

metadata_extractor_mvp.py:
 Added box_metadata parameter to build_mvp_asset_representation()
 Added _update_creativex_fields() method
 Updates FERRERO.FIELD.CREATIVEX LINK with URL from Box
 Logs CreativeX Score (tabular field - needs special handling)

Flow:
1. File uploaded to Box by agency
2. Agency adds metadata using Ferrero-DAM-Metadata template
3. Script reads CreativeX Score and URL from Box metadata
4. Updates MVP fields with Box metadata values
5. Uploads to DAM with CreativeX data

Field Mapping:
- Box: 'CreativeX URL' → DAM: FERRERO.FIELD.CREATIVEX LINK
- Box: 'CreativeX Score' → DAM: FERRERO.TAB.FIELD.CREATIVEX (logged, needs structure)

Next: Test with file that has Box metadata template applied

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-03 14:26:00 -05:00

284 lines
9.7 KiB
Python
Executable file

#!/usr/bin/env python3
"""
A2→A3 Upload Handler - Box Folder Polling Version
Polls Box folder for new files with V2 naming, uploads to DAM
Updates status to A3 only when ALL assets for campaign uploaded
Compatible with Python 3.6+
"""
import sys
import os
import time
import logging
# Add shared library to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from shared.config_loader import load_config, load_field_mappings
from shared.dam_client import DAMClient
from shared.box_client import BoxClient
from shared.database import Database
from shared.notifier import Notifier
from shared.filename_parser import FilenameParser
from shared.metadata_extractor_mvp import MetadataExtractorMVP
# Load configuration
config = load_config('config/config.yaml')
field_mappings = load_field_mappings(config)
# Setup logging with rotation
from logging.handlers import RotatingFileHandler
# Create logs directory if it doesn't exist
os.makedirs('logs', exist_ok=True)
os.makedirs('logs/backup', exist_ok=True)
# Configure logging with rotation
# Keep 1 week of active logs (7 days * 10MB = 70MB)
# Backup rotates keep 4 weeks (28 backups * 10MB = 280MB total)
log_handler = RotatingFileHandler(
'logs/a2_to_a3.log',
maxBytes=10*1024*1024, # 10MB per file
backupCount=28 # Keep 28 rotated files (approximately 1 month)
)
log_handler.setLevel(logging.INFO)
log_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logging.basicConfig(
level=logging.INFO,
handlers=[log_handler, console_handler]
)
logger = logging.getLogger('A2toA3')
def process_box_file(file_info, dam, box, db, parser, mvp_extractor, config):
"""
Process a single file from Box folder
Returns:
dict with success, asset_id, tracking_id
"""
file_id = file_info['id']
filename = file_info['name']
logger.info("Processing: {}".format(filename))
try:
# 1. Parse V2 filename
parsed = parser.parse_filename(filename)
if not parsed['is_valid']:
raise ValueError("Invalid V2 filename: {} - {}".format(
filename, ', '.join(parsed['validation_errors'])
))
tracking_id = parsed['tracking_id']
if not tracking_id:
raise ValueError("No tracking ID in filename")
# 2. Load master metadata from database
master_asset = db.get_master_asset(tracking_id)
if not master_asset:
raise ValueError("No master asset for tracking ID: {}".format(tracking_id))
# 3. Get Box metadata (CreativeX Score and URL from Ferrero-DAM-Metadata template)
box_metadata = box.get_file_metadata(file_id, template_name='Ferrero-DAM-Metadata')
logger.info("Box metadata retrieved: {}".format(box_metadata))
# 4. Download from Box
temp_file = os.path.join('temp/downloads', filename)
box.download_file(file_id, temp_file)
# 5. Get clean filename
clean_filename = parser.strip_upload_components(filename)
# 6. Build MVP asset representation with Box metadata
asset_rep = mvp_extractor.build_mvp_asset_representation(
master_metadata=master_asset['full_metadata'],
clean_filename=clean_filename,
parsed_filename=parsed,
box_metadata=box_metadata # Pass CreativeX data from Box
)
# 6. Rename to clean filename
clean_temp_file = os.path.join('temp/downloads', clean_filename)
if os.path.exists(clean_temp_file):
os.remove(clean_temp_file)
os.rename(temp_file, clean_temp_file)
# 7. Upload to DAM
upload_result = dam.upload_asset(
file_path=clean_temp_file,
folder_id=master_asset['upload_directory'],
asset_representation=asset_rep
)
if not upload_result['success']:
raise Exception("Upload failed: {}".format(upload_result.get('error')))
# 8. Store derivative record
db.store_derivative_asset(
tracking_id=tracking_id,
master_asset_id=None,
dam_asset_id=upload_result['asset_id'],
filename=clean_filename
)
# 9. Delete file from Box after successful upload
try:
box_file = box.client.file(file_id)
box_file.delete()
logger.info("Deleted file from Box: {}".format(filename))
except Exception as e:
logger.warning("Could not delete file from Box: {}".format(str(e)))
# 10. Clean up local temp file
os.remove(clean_temp_file)
logger.info("✓ Success: {} → Asset ID: {}".format(filename, upload_result['asset_id']))
return {
'success': True,
'asset_id': upload_result['asset_id'],
'tracking_id': tracking_id,
'filename': filename,
'clean_filename': clean_filename
}
except Exception as e:
logger.error("✗ Failed: {} - {}".format(filename, str(e)))
return {
'success': False,
'error': str(e),
'filename': filename,
'tracking_id': tracking_id if 'tracking_id' in locals() else None
}
def main():
"""Main entry point - single run mode"""
logger.info("=" * 60)
logger.info("Ferrero A2→A3 Upload Handler Starting (Polling Mode)")
logger.info("=" * 60)
# Initialize clients
dam = DAMClient(config)
# Use A2→A3 Box folder for polling
box = BoxClient(config, root_folder_id=config['box'].get('root_folder_a2_a3'))
db = Database(config)
notifier = Notifier(config)
parser = FilenameParser()
mvp_extractor = MetadataExtractorMVP(field_mappings)
# Test connections
logger.info("Testing connections...")
if not dam.test_connection():
logger.error("DAM connection failed")
sys.exit(1)
if not box.test_connection():
logger.error("Box connection failed")
sys.exit(1)
if not db.test_connection():
logger.error("Database connection failed")
sys.exit(1)
logger.info("All connections OK")
logger.info("")
try:
# Get Box folder ID to poll
box_folder_id = config['box'].get('root_folder_a2_a3', config['box'].get('root_folder_id'))
logger.info("Polling Box folder: {}".format(box_folder_id))
# List files in Box folder
files = box.list_folder_files(box_folder_id)
if not files:
logger.info("No files found in Box folder - exiting")
db.close()
sys.exit(0)
logger.info("Found {} files in Box folder".format(len(files)))
# Filter for V2 filenames only
valid_files = []
for file_info in files:
parsed = parser.parse_filename(file_info['name'])
if parsed['is_valid'] and parsed.get('tracking_id'):
valid_files.append(file_info)
else:
logger.debug("Skipping invalid file: {}".format(file_info['name']))
logger.info("Found {} valid V2 files to process".format(len(valid_files)))
if not valid_files:
logger.info("No valid V2 files to process - exiting")
db.close()
sys.exit(0)
# Process files one at a time (process first file only)
file_info = valid_files[0]
logger.info("Processing first file only (more will be processed on next run)")
logger.info("")
result = process_box_file(file_info, dam, box, db, parser, mvp_extractor, config)
if result['success']:
logger.info("")
logger.info("=" * 60)
logger.info("✓ File processed successfully")
logger.info(" Filename: {}".format(result['filename']))
logger.info(" Clean filename: {}".format(result['clean_filename']))
logger.info(" Asset ID: {}".format(result['asset_id']))
logger.info(" Tracking ID: {}".format(result['tracking_id']))
logger.info("=" * 60)
# Send success email notification with details
# Get master asset info for email
master_asset = db.get_master_asset(result['tracking_id'])
notifier.send_email(
template_name='a2_to_a3_file_uploaded',
recipients=config['notifications']['recipients']['success'],
data={
'filename': result['filename'],
'clean_filename': result['clean_filename'],
'asset_id': result['asset_id'],
'tracking_id': result['tracking_id'],
'master_asset_name': master_asset.get('opentext_id', 'Unknown') if master_asset else 'Unknown',
'upload_folder': master_asset.get('upload_directory', 'Unknown') if master_asset else 'Unknown',
'box_folder': box_folder_id
}
)
# TODO: Check if all campaign assets uploaded and update status A2→A3
# Would need to track campaign_id in master_assets table
db.close()
sys.exit(0)
else:
logger.warning("")
logger.warning("=" * 60)
logger.warning("✗ File processing failed")
logger.warning(" Filename: {}".format(result['filename']))
logger.warning(" Error: {}".format(result['error']))
logger.warning("=" * 60)
db.close()
sys.exit(1)
except Exception as e:
logger.critical("Script error: {}".format(str(e)))
db.close()
sys.exit(1)
if __name__ == '__main__':
main()