ferrero-opentext/Python-Version/scripts/a2_to_a3_upload_polling.py
DJP fb8f17d9b2 Add --keep-files flag to A2→A3 script to preserve Box files
Allows keeping uploaded files in Box for testing/debugging purposes.

NEW FEATURE: --keep-files Flag
- Optional flag for testing/debugging
- Prevents deletion of files from Box after upload
- Files remain in Box folder after successful DAM upload

USAGE:
  Default (delete files after upload):
    python scripts/a2_to_a3_upload_polling.py

  Keep files in Box (testing):
    python scripts/a2_to_a3_upload_polling.py --keep-files

  Combined with A3update:
    python scripts/a2_to_a3_upload_polling.py --keep-files --A3update

HOW IT WORKS:
1. Upload file to DAM (always happens)
2. Store in database (always happens)
3. If --keep-files flag:
   - Skip Box file deletion
   - Log: "--keep-files flag set - File kept in Box: filename.jpg"
4. If no flag (default):
   - Delete file from Box
   - Log: "Deleted file from Box: filename.jpg"

LOGGING:
```
With flag:
  --keep-files flag set - File kept in Box: my_file.jpg

Without flag:
  Deleted file from Box: my_file.jpg
```

USE CASES:
- Testing: Upload multiple times without re-uploading to Box
- Debugging: Keep files to inspect Box metadata
- Development: Test upload logic without losing files
- Backup: Maintain Box copies during initial testing

PRODUCTION NOTE:
For production, don't use this flag - files should be deleted
after successful upload to avoid duplicates on next run.

BOTH FLAGS TOGETHER:
  python scripts/a2_to_a3_upload_polling.py --keep-files --A3update
  - Uploads file to DAM
  - Keeps file in Box
  - Updates campaign A2→A3
  - Perfect for end-to-end testing

Changes:
- scripts/a2_to_a3_upload_polling.py
  - Added --keep-files flag
  - Added keep_files parameter to process_box_file()
  - Conditional Box file deletion
  - Enhanced logging for both modes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 13:17:11 -05:00

331 lines
12 KiB
Python
Executable file

#!/usr/bin/env python3
"""
A2→A3 Upload Handler - Box Folder Polling Version
Polls Box folder for new files with V2 naming, uploads to DAM
Updates status to A3 only when ALL assets for campaign uploaded
Supports --A3update flag to force status update for testing
Compatible with Python 3.6+
"""
import sys
import os
import time
import logging
import argparse
# Add shared library to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from shared.config_loader import load_config, load_field_mappings
from shared.dam_client import DAMClient
from shared.box_client import BoxClient
from shared.database import Database
from shared.notifier import Notifier
from shared.filename_parser import FilenameParser
from shared.metadata_extractor_mvp import MetadataExtractorMVP
# Load configuration
config = load_config('config/config.yaml')
field_mappings = load_field_mappings(config)
# Setup logging with rotation
from logging.handlers import RotatingFileHandler
# Create logs directory if it doesn't exist
os.makedirs('logs', exist_ok=True)
os.makedirs('logs/backup', exist_ok=True)
# Configure logging with rotation
# Keep 1 week of active logs (7 days * 10MB = 70MB)
# Backup rotates keep 4 weeks (28 backups * 10MB = 280MB total)
log_handler = RotatingFileHandler(
'logs/a2_to_a3.log',
maxBytes=10*1024*1024, # 10MB per file
backupCount=28 # Keep 28 rotated files (approximately 1 month)
)
log_handler.setLevel(logging.INFO)
log_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logging.basicConfig(
level=logging.INFO,
handlers=[log_handler, console_handler]
)
logger = logging.getLogger('A2toA3')
def process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, keep_files=False):
"""
Process a single file from Box folder
Args:
keep_files: If True, don't delete file from Box after upload (for testing)
Returns:
dict with success, asset_id, tracking_id
"""
file_id = file_info['id']
filename = file_info['name']
logger.info("Processing: {}".format(filename))
try:
# 1. Parse V2 filename
parsed = parser.parse_filename(filename)
if not parsed['is_valid']:
raise ValueError("Invalid V2 filename: {} - {}".format(
filename, ', '.join(parsed['validation_errors'])
))
tracking_id = parsed['tracking_id']
if not tracking_id:
raise ValueError("No tracking ID in filename")
# 2. Load master metadata from database
master_asset = db.get_master_asset(tracking_id)
if not master_asset:
raise ValueError("No master asset for tracking ID: {}".format(tracking_id))
# 3. Get Box metadata (CreativeX Score and URL from Ferrero-DAM-Metadata template)
box_metadata = box.get_file_metadata(file_id, template_name='Ferrero-DAM-Metadata')
logger.info("Box metadata retrieved: {}".format(box_metadata))
# 4. Download from Box
temp_file = os.path.join('temp/downloads', filename)
box.download_file(file_id, temp_file)
# 5. Get clean filename
clean_filename = parser.strip_upload_components(filename)
# 6. Build MVP asset representation with Box metadata
asset_rep = mvp_extractor.build_mvp_asset_representation(
master_metadata=master_asset['full_metadata'],
clean_filename=clean_filename,
parsed_filename=parsed,
box_metadata=box_metadata # Pass CreativeX data from Box
)
# 6. Rename to clean filename
clean_temp_file = os.path.join('temp/downloads', clean_filename)
if os.path.exists(clean_temp_file):
os.remove(clean_temp_file)
os.rename(temp_file, clean_temp_file)
# 7. Upload to DAM
upload_result = dam.upload_asset(
file_path=clean_temp_file,
folder_id=master_asset['upload_directory'],
asset_representation=asset_rep
)
if not upload_result['success']:
raise Exception("Upload failed: {}".format(upload_result.get('error')))
# 8. Store derivative record
db.store_derivative_asset(
tracking_id=tracking_id,
master_asset_id=None,
dam_asset_id=upload_result['asset_id'],
filename=clean_filename
)
# 9. Delete file from Box after successful upload (unless --keep-files flag set)
if keep_files:
logger.info("--keep-files flag set - File kept in Box: {}".format(filename))
else:
try:
box_file = box.client.file(file_id)
box_file.delete()
logger.info("Deleted file from Box: {}".format(filename))
except Exception as e:
logger.warning("Could not delete file from Box: {}".format(str(e)))
# 10. Clean up local temp file
os.remove(clean_temp_file)
logger.info("✓ Success: {} → Asset ID: {}".format(filename, upload_result['asset_id']))
return {
'success': True,
'asset_id': upload_result['asset_id'],
'tracking_id': tracking_id,
'filename': filename,
'clean_filename': clean_filename
}
except Exception as e:
logger.error("✗ Failed: {} - {}".format(filename, str(e)))
return {
'success': False,
'error': str(e),
'filename': filename,
'tracking_id': tracking_id if 'tracking_id' in locals() else None
}
def main():
"""Main entry point - single run mode"""
# Parse command-line arguments
parser_args = argparse.ArgumentParser(description='Ferrero A2→A3 Upload Handler')
parser_args.add_argument('--A3update', action='store_true',
help='Force update campaign status A2→A3 after upload (for testing)')
parser_args.add_argument('--keep-files', action='store_true',
help='Keep files in Box after upload (don\'t delete, for testing)')
args = parser_args.parse_args()
logger.info("=" * 60)
logger.info("Ferrero A2→A3 Upload Handler Starting (Polling Mode)")
if args.A3update:
logger.info("Mode: Auto-update campaign status A2→A3 (--A3update)")
if args.keep_files:
logger.info("Mode: Keep files in Box after upload (--keep-files)")
logger.info("=" * 60)
# Initialize clients
dam = DAMClient(config)
# Use A2→A3 Box folder for polling
box = BoxClient(config, root_folder_id=config['box'].get('root_folder_a2_a3'))
db = Database(config)
notifier = Notifier(config)
parser = FilenameParser()
mvp_extractor = MetadataExtractorMVP(field_mappings)
# Test connections
logger.info("Testing connections...")
if not dam.test_connection():
logger.error("DAM connection failed")
sys.exit(1)
if not box.test_connection():
logger.error("Box connection failed")
sys.exit(1)
if not db.test_connection():
logger.error("Database connection failed")
sys.exit(1)
logger.info("All connections OK")
logger.info("")
try:
# Get Box folder ID to poll
box_folder_id = config['box'].get('root_folder_a2_a3', config['box'].get('root_folder_id'))
logger.info("Polling Box folder: {}".format(box_folder_id))
# List files in Box folder
files = box.list_folder_files(box_folder_id)
if not files:
logger.info("No files found in Box folder - exiting")
db.close()
sys.exit(0)
logger.info("Found {} files in Box folder".format(len(files)))
# Filter for V2 filenames only
valid_files = []
for file_info in files:
parsed = parser.parse_filename(file_info['name'])
if parsed['is_valid'] and parsed.get('tracking_id'):
valid_files.append(file_info)
else:
logger.debug("Skipping invalid file: {}".format(file_info['name']))
logger.info("Found {} valid V2 files to process".format(len(valid_files)))
if not valid_files:
logger.info("No valid V2 files to process - exiting")
db.close()
sys.exit(0)
# Process files one at a time (process first file only)
file_info = valid_files[0]
logger.info("Processing first file only (more will be processed on next run)")
logger.info("")
result = process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, keep_files=args.keep_files)
if result['success']:
logger.info("")
logger.info("=" * 60)
logger.info("✓ File processed successfully")
logger.info(" Filename: {}".format(result['filename']))
logger.info(" Clean filename: {}".format(result['clean_filename']))
logger.info(" Asset ID: {}".format(result['asset_id']))
logger.info(" Tracking ID: {}".format(result['tracking_id']))
logger.info("=" * 60)
# Send success email notification with details
# Get master asset info for email
master_asset = db.get_master_asset(result['tracking_id'])
notifier.send_email(
template_name='a2_to_a3_file_uploaded',
recipients=config['notifications']['recipients']['success'],
data={
'filename': result['filename'],
'clean_filename': result['clean_filename'],
'asset_id': result['asset_id'],
'tracking_id': result['tracking_id'],
'master_asset_name': master_asset.get('opentext_id', 'Unknown') if master_asset else 'Unknown',
'upload_folder': master_asset.get('upload_directory', 'Unknown') if master_asset else 'Unknown',
'box_folder': box_folder_id
}
)
# Update campaign status A2→A3 if --A3update flag is set
if args.A3update and master_asset:
logger.info("")
logger.info("--A3update flag set - Attempting to update campaign status")
# Get campaign ID from master asset metadata
full_metadata = master_asset.get('full_metadata', {})
# Extract campaign ID from inherited_metadata_collections
campaign_id = None
collections = full_metadata.get('inherited_metadata_collections', [])
for collection in collections:
if collection.get('container_type_name') == 'L7+ - CAMPAIGN':
campaign_id = collection.get('container_id')
break
if campaign_id:
logger.info("Found campaign ID: {}".format(campaign_id))
logger.info("Updating campaign status A2 → A3...")
status_result = dam.update_campaign_status(campaign_id, 'A3')
if status_result['success']:
logger.info("✓ Campaign status updated successfully: A2 → A3")
else:
logger.error("✗ Campaign status update failed: {}".format(status_result.get('error')))
else:
logger.warning("⚠ Campaign ID not found in master asset metadata - cannot update status")
db.close()
sys.exit(0)
else:
logger.warning("")
logger.warning("=" * 60)
logger.warning("✗ File processing failed")
logger.warning(" Filename: {}".format(result['filename']))
logger.warning(" Error: {}".format(result['error']))
logger.warning("=" * 60)
db.close()
sys.exit(1)
except Exception as e:
logger.critical("Script error: {}".format(str(e)))
db.close()
sys.exit(1)
if __name__ == '__main__':
main()