Adds EOL as a new asset type with field overrides for both PPR and PROD: - Asset type maps to 'externallegalopinion' in DAM - Agency Name = "-", Production House = "-" - Main Languages = "Global" - IP Rights = "Yes", Licensing = "No" - Validity dates removed Also adds VOD platform code and removes OLV asset type. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
328 lines
13 KiB
Python
328 lines
13 KiB
Python
"""
|
|
Filename Parser - V2.1 Naming Convention Parser
|
|
Updated November 2025 with new field positions
|
|
Compatible with Python 3.6+
|
|
"""
|
|
|
|
import re
|
|
import logging
|
|
|
|
logger = logging.getLogger('FilenameParser')
|
|
|
|
class FilenameParser:
|
|
"""
|
|
Parse V2.1 naming convention filenames:
|
|
[JOB]_[BRAND]_[SUBJECT]_[ASSET]_[DUR]_[RATIO]_[SPOT]_[COUNTRY]_[LANG]_[SOCIAL]_[TRACKING]
|
|
|
|
Example: 1234567_RAF_ME-MOMENT_OLV_6S_1x1_REF_GL_it_IGF_pOiJ9s
|
|
|
|
PPR Environment: Supports multiple tracking IDs (e.g., pOiJ9s+BqB8vo+laRJo0)
|
|
PROD Environment: Single tracking ID only (backward compatible)
|
|
"""
|
|
|
|
# Known social media platform codes (from Ferrero naming tool data.json)
|
|
SOCIAL_MEDIA_CODES = [
|
|
# Facebook
|
|
'FBD', 'FGF', 'FBR', 'FRO', 'FBS', 'FBF', 'FBP', 'FIA', 'FIV',
|
|
'FMP', 'FPF', 'FRC', 'FSE', 'FSS', 'FSV', 'FUK', 'FVF',
|
|
# Instagram
|
|
'IGF', 'IGE', 'IGG', 'IGT', 'IPF', 'IPR', 'IGR', 'IGO', 'IGS', 'ISH', 'IST',
|
|
# Audience Network
|
|
'ANC', 'ANI', 'ANR',
|
|
# Messenger
|
|
'MSI', 'MSS',
|
|
# YouTube
|
|
'YTA', 'YTB', 'YTS',
|
|
# Other platforms
|
|
'AMZ', 'DV3', 'GOO', 'PIN', 'SNA', 'TIK', 'TWI', 'VOD',
|
|
]
|
|
|
|
def __init__(self, dam_base_url=None):
|
|
"""
|
|
Initialize parser with optional environment detection
|
|
|
|
Args:
|
|
dam_base_url: DAM base URL for environment detection (optional)
|
|
"""
|
|
self.dam_base_url = dam_base_url
|
|
self.is_ppr = self._is_ppr_environment()
|
|
|
|
def _is_ppr_environment(self):
|
|
"""Check if running in PPR environment"""
|
|
if not self.dam_base_url:
|
|
return False
|
|
return 'ppr.dam.ferrero.com' in self.dam_base_url.lower()
|
|
|
|
def parse_filename(self, filename):
|
|
"""
|
|
Parse V2.1 filename into components
|
|
|
|
New Structure (V2.1):
|
|
[JOB]_[BRAND]_[SUBJECT]_[ASSET]_[DUR]_[RATIO]_[SPOT]_[COUNTRY]_[LANG]_[SOCIAL]_[TRACKING]
|
|
|
|
Args:
|
|
filename: Filename to parse (with or without extension)
|
|
|
|
Returns:
|
|
dict with parsed components and validation results
|
|
"""
|
|
validation_errors = []
|
|
warnings = []
|
|
|
|
# Remove extension
|
|
if '.' in filename:
|
|
filename_without_ext, extension = filename.rsplit('.', 1)
|
|
extension = '.' + extension
|
|
else:
|
|
filename_without_ext = filename
|
|
extension = ''
|
|
|
|
# Split by underscore
|
|
parts = filename_without_ext.split('_')
|
|
|
|
# Minimum 7 parts: JOB + BRAND + SUBJECT + ASSET + RATIO + COUNTRY + LANG
|
|
if len(parts) < 7:
|
|
validation_errors.append("Invalid structure: expected min 7 parts, got {}".format(len(parts)))
|
|
|
|
parsed = {
|
|
'original_filename': filename,
|
|
'filename_without_ext': filename_without_ext,
|
|
'extension': extension,
|
|
'omg_job_number': None,
|
|
'brand_code': None,
|
|
'subject_title': None,
|
|
'asset_type': None,
|
|
'seconds': None,
|
|
'aspect_ratio': None,
|
|
'spot_version': None,
|
|
'country_code': None,
|
|
'language_code': None,
|
|
'social_media_version': None,
|
|
'tracking_id': None,
|
|
'tracking_mode': 'full',
|
|
'tracking_id_with_suffix': None,
|
|
'has_master': False,
|
|
'validation_errors': [],
|
|
'warnings': [],
|
|
'is_valid': False
|
|
}
|
|
|
|
if len(parts) < 7:
|
|
parsed['validation_errors'] = validation_errors
|
|
return parsed
|
|
|
|
index = 0
|
|
|
|
# ===================================================================
|
|
# FIXED POSITIONS (Always in these positions)
|
|
# ===================================================================
|
|
|
|
# 1. OMG Job Number (digits only, max 10)
|
|
if index < len(parts) and parts[index].isdigit():
|
|
omg = parts[index]
|
|
if len(omg) > 10:
|
|
validation_errors.append("OMG Job Number too long: {} (max 10)".format(omg))
|
|
else:
|
|
parsed['omg_job_number'] = omg
|
|
index += 1
|
|
else:
|
|
if index < len(parts):
|
|
validation_errors.append("OMG Job Number missing or invalid: {}".format(parts[index]))
|
|
index += 1
|
|
|
|
# 2. Brand Code (2-5 chars, uppercase)
|
|
if index < len(parts):
|
|
brand = parts[index].upper()
|
|
if 2 <= len(brand) <= 5:
|
|
parsed['brand_code'] = brand
|
|
else:
|
|
validation_errors.append("Brand Code invalid: {} (must be 2-5 chars)".format(brand))
|
|
index += 1
|
|
|
|
# 3. Subject Title (NEW POSITION - was 5, now 3)
|
|
if index < len(parts):
|
|
subject = parts[index]
|
|
if len(subject) > 15:
|
|
warnings.append("Subject title exceeds 15 chars: {}".format(subject))
|
|
parsed['subject_title'] = subject
|
|
index += 1
|
|
|
|
# 4. Asset Type (NEW POSITION - was 6, now 4)
|
|
if index < len(parts):
|
|
asset = parts[index].upper()
|
|
if len(asset) == 3:
|
|
parsed['asset_type'] = asset
|
|
else:
|
|
validation_errors.append("Asset Type invalid: {} (must be 3 chars)".format(asset))
|
|
index += 1
|
|
|
|
# ===================================================================
|
|
# VARIABLE/OPTIONAL POSITIONS (Pattern-based detection)
|
|
# ===================================================================
|
|
|
|
# Now parse remaining parts using pattern detection
|
|
# Fields can appear in this order but some may be missing:
|
|
# [DURATION] [RATIO] [SPOT] [COUNTRY] [LANG] [SOCIAL] [TRACKING]
|
|
|
|
found_ratio = False
|
|
found_country = False
|
|
found_language = False
|
|
|
|
while index < len(parts):
|
|
part = parts[index]
|
|
|
|
# Duration: Digits + 'S' (e.g., "6S", "30S") - BEFORE ratio
|
|
if not found_ratio and re.match(r'^\d+S$', part, re.IGNORECASE):
|
|
parsed['seconds'] = part[:-1] # Remove 'S'
|
|
logger.debug("Found duration: {}".format(part))
|
|
index += 1
|
|
|
|
# Aspect Ratio: Contains 'x' or ':' (e.g., "16x9", "1x1")
|
|
elif not found_ratio and ('x' in part.lower() or ':' in part):
|
|
parsed['aspect_ratio'] = part
|
|
found_ratio = True
|
|
logger.debug("Found aspect ratio: {}".format(part))
|
|
index += 1
|
|
|
|
# Spot Version: Exactly "MST" or "REF" - AFTER ratio, BEFORE country
|
|
elif found_ratio and not found_country and part.upper() in ['MST', 'REF']:
|
|
parsed['spot_version'] = part.upper()
|
|
parsed['has_master'] = (part.upper() == 'MST')
|
|
logger.debug("Found spot version: {}".format(part))
|
|
index += 1
|
|
|
|
# Country Code: 2 uppercase alpha - AFTER ratio/spot
|
|
elif found_ratio and not found_country and len(part) == 2 and part.isalpha() and part.isupper():
|
|
parsed['country_code'] = part.upper()
|
|
found_country = True
|
|
logger.debug("Found country: {}".format(part))
|
|
index += 1
|
|
|
|
# Language Code: 2-3 lowercase alpha - AFTER country
|
|
elif found_country and not found_language and len(part) in [2, 3] and part.isalpha() and part.islower():
|
|
parsed['language_code'] = part.lower()
|
|
found_language = True
|
|
logger.debug("Found language: {}".format(part))
|
|
index += 1
|
|
|
|
# Social Media: One of known codes - AFTER language
|
|
elif found_language and part.upper() in self.SOCIAL_MEDIA_CODES:
|
|
parsed['social_media_version'] = part.upper()
|
|
logger.debug("Found social media: {}".format(part))
|
|
index += 1
|
|
|
|
# Tracking ID(s): 6 alphanumeric, optionally with -N suffix
|
|
# PPR: Supports multiple IDs (e.g., "BqB8vo+SfUQ7m+laRJo0")
|
|
# PROD: Single ID only (backward compatible)
|
|
elif re.match(r'^[a-zA-Z0-9]{6}(-N)?(\+[a-zA-Z0-9]{6}(-N)?)*$', part):
|
|
# Check if multiple IDs provided
|
|
if '+' in part and self.is_ppr:
|
|
# PPR ONLY: Parse multiple tracking IDs
|
|
tracking_ids = []
|
|
tracking_modes = []
|
|
tracking_ids_with_suffix = []
|
|
|
|
id_parts = part.split('+')
|
|
logger.info("PPR Environment - Multiple tracking IDs detected: {}".format(len(id_parts)))
|
|
|
|
for tracking in id_parts:
|
|
tracking_mode = 'full'
|
|
base_tracking_id = tracking
|
|
|
|
if tracking.endswith('-N'):
|
|
tracking_mode = 'folder_only'
|
|
base_tracking_id = tracking[:-2]
|
|
logger.info("Folder-only tracking ID: {} (base: {})".format(tracking, base_tracking_id))
|
|
|
|
tracking_ids.append(base_tracking_id)
|
|
tracking_modes.append(tracking_mode)
|
|
tracking_ids_with_suffix.append(tracking)
|
|
|
|
# Store primary (first) for backward compatibility
|
|
parsed['tracking_id'] = tracking_ids[0]
|
|
parsed['tracking_mode'] = tracking_modes[0]
|
|
parsed['tracking_id_with_suffix'] = tracking_ids_with_suffix[0]
|
|
|
|
# Store all IDs for multi-master support
|
|
parsed['tracking_ids'] = tracking_ids
|
|
parsed['tracking_modes'] = tracking_modes
|
|
parsed['tracking_ids_with_suffix'] = tracking_ids_with_suffix
|
|
parsed['has_multiple_masters'] = True
|
|
|
|
logger.info("Parsed {} tracking IDs: {}".format(len(tracking_ids), ', '.join(tracking_ids)))
|
|
else:
|
|
# PROD or Single ID: Use only first tracking ID
|
|
if '+' in part:
|
|
logger.warning("PROD Environment - Multiple tracking IDs not supported, using first ID only")
|
|
part = part.split('+')[0] # Take only first ID
|
|
|
|
tracking = part
|
|
tracking_mode = 'full'
|
|
base_tracking_id = tracking
|
|
|
|
if tracking.endswith('-N'):
|
|
tracking_mode = 'folder_only'
|
|
base_tracking_id = tracking[:-2]
|
|
logger.info("Folder-only tracking ID: {} (base: {})".format(tracking, base_tracking_id))
|
|
|
|
parsed['tracking_id'] = base_tracking_id
|
|
parsed['tracking_mode'] = tracking_mode
|
|
parsed['tracking_id_with_suffix'] = tracking
|
|
parsed['tracking_ids'] = [base_tracking_id] # Single item list for compatibility
|
|
parsed['has_multiple_masters'] = False
|
|
|
|
logger.debug("Found tracking ID: {}".format(tracking))
|
|
|
|
index += 1
|
|
|
|
# Unknown part - could be aspect ratio fallback
|
|
elif not found_ratio:
|
|
# Might be aspect ratio in unexpected format
|
|
parsed['aspect_ratio'] = part
|
|
found_ratio = True
|
|
warnings.append("Aspect ratio in unexpected format: {}".format(part))
|
|
index += 1
|
|
else:
|
|
# Unknown component - skip it
|
|
warnings.append("Unknown component skipped: {}".format(part))
|
|
index += 1
|
|
|
|
# Set validation status
|
|
parsed['validation_errors'] = validation_errors
|
|
parsed['warnings'] = warnings
|
|
parsed['is_valid'] = len(validation_errors) == 0
|
|
|
|
return parsed
|
|
|
|
def strip_upload_components(self, filename):
|
|
"""
|
|
Strip OMG Job Number from front and Tracking ID from back of filename.
|
|
Keeps everything else as-is (including social media codes, DV3, etc.)
|
|
|
|
Args:
|
|
filename: Original filename
|
|
|
|
Returns:
|
|
Clean filename for upload (no job number, no tracking ID)
|
|
|
|
Example:
|
|
Input: 6662777_NUT_XMAS-SHARETHELOVE-GLAS_OLV_6S_16X9_PL_pl_YTA_EvQJrM.mp4
|
|
Output: NUT_XMAS-SHARETHELOVE-GLAS_OLV_6S_16X9_PL_pl_YTA.mp4
|
|
"""
|
|
import os
|
|
|
|
base, ext = os.path.splitext(filename)
|
|
parts = base.split('_')
|
|
|
|
if len(parts) < 3:
|
|
return filename
|
|
|
|
# Strip job number from front (digits only)
|
|
if parts[0].isdigit():
|
|
parts = parts[1:]
|
|
|
|
# Strip tracking ID(s) from back (6 alphanumeric chars, optionally with +joined IDs or -N suffix)
|
|
if parts and re.match(r'^[a-zA-Z0-9]{6}(-N)?(\+[a-zA-Z0-9]{6}(-N)?)*$', parts[-1]):
|
|
parts = parts[:-1]
|
|
|
|
return '_'.join(parts) + ext
|