Fix: CreativeX tracking ID fallback, filename stripping, and social media codes

CreativeX lookup now falls back to tracking ID search when filename match fails
(handles mismatched naming from CreativeX PDFs). strip_upload_components now
only removes job number and tracking ID, keeping social media codes (YTA, DV3,
etc.) in the clean filename. Updated SOCIAL_MEDIA_CODES from 4 to 39 codes
sourced from the Ferrero naming tool.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
nickviljoen 2026-02-13 13:24:23 +02:00
parent 6517a4f83f
commit 98826d51c4
3 changed files with 56 additions and 40 deletions

View file

@ -170,10 +170,8 @@ def process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, not
if not master_asset:
raise ValueError("No master asset for tracking ID: {}".format(tracking_id))
# 3. Get CreativeX score from database (lookup by original Box filename)
# The PDF contains the filename field with the full name (job + tracking ID)
# So we lookup using the original filename from Box, not the stripped version
creativex_data = db.get_creativex_score_by_filename(filename)
# 3. Get CreativeX score from database (lookup by filename, fallback to tracking ID)
creativex_data = db.get_creativex_score_by_filename(filename, tracking_id=tracking_id)
# Build box_metadata dict (for compatibility with existing code)
if creativex_data:

View file

@ -875,15 +875,18 @@ class Database:
cursor.close()
self.put_connection(conn)
def get_creativex_score_by_filename(self, filename):
def get_creativex_score_by_filename(self, filename, tracking_id=None):
"""
Get CreativeX score data by filename
Performs extension-agnostic lookup: if exact filename not found,
tries common video/image extensions (.mp4, .jpg, .png, .mov, etc.)
If still not found and tracking_id provided, falls back to LIKE search
on tracking ID (handles mismatched naming from CreativeX PDFs).
Args:
filename: Filename to search for
tracking_id: Optional tracking ID for fallback lookup
Returns:
dict with creativex data or None if not found
@ -930,6 +933,24 @@ class Database:
if row:
break # Found with alternative extension
# If still not found, try tracking ID fallback
# CreativeX PDFs sometimes have different naming (extra text, stripped hyphens)
# but tracking ID is always consistent
if not row and tracking_id:
cursor.execute("""
SELECT filename, creativex_id, creativex_url, quality_score,
box_file_id, full_extraction_data, extracted_at
FROM creativex_scores
WHERE filename LIKE %s AND status = 'active'
ORDER BY extracted_at DESC
LIMIT 1
""", ('%' + tracking_id + '%',))
row = cursor.fetchone()
if row:
logger.info("CreativeX: Found score via tracking ID fallback '{}' -> {}".format(
tracking_id, row[0]))
if not row:
return None

View file

@ -20,8 +20,22 @@ class FilenameParser:
PROD Environment: Single tracking ID only (backward compatible)
"""
# Known social media platform codes
SOCIAL_MEDIA_CODES = ['FBP', 'FBR', 'IGF', 'IGR'] # Expandable
# Known social media platform codes (from Ferrero naming tool data.json)
SOCIAL_MEDIA_CODES = [
# Facebook
'FBD', 'FGF', 'FBR', 'FRO', 'FBS', 'FBF', 'FBP', 'FIA', 'FIV',
'FMP', 'FPF', 'FRC', 'FSE', 'FSS', 'FSV', 'FUK', 'FVF',
# Instagram
'IGF', 'IGE', 'IGG', 'IGT', 'IPF', 'IPR', 'IGR', 'IGO', 'IGS', 'ISH', 'IST',
# Audience Network
'ANC', 'ANI', 'ANR',
# Messenger
'MSI', 'MSS',
# YouTube
'YTA', 'YTB', 'YTS',
# Other platforms
'AMZ', 'DV3', 'GOO', 'PIN', 'SNA', 'TIK', 'TWI',
]
def __init__(self, dam_base_url=None):
"""
@ -282,8 +296,8 @@ class FilenameParser:
def strip_upload_components(self, filename):
"""
Strip OMG Job Number and Tracking ID from filename
Returns clean filename in V2.1 order
Strip OMG Job Number from front and Tracking ID from back of filename.
Keeps everything else as-is (including social media codes, DV3, etc.)
Args:
filename: Original filename
@ -292,40 +306,23 @@ class FilenameParser:
Clean filename for upload (no job number, no tracking ID)
Example:
Input: 1234567_RAF_TEST_OLV_6S_1x1_REF_GL_it_IGF_abc123.mp4
Output: RAF_TEST_OLV_6S_1x1_REF_GL_it_IGF.mp4
Input: 6662777_NUT_XMAS-SHARETHELOVE-GLAS_OLV_6S_16X9_PL_pl_YTA_EvQJrM.mp4
Output: NUT_XMAS-SHARETHELOVE-GLAS_OLV_6S_16X9_PL_pl_YTA.mp4
"""
parsed = self.parse_filename(filename)
import os
if not parsed:
base, ext = os.path.splitext(filename)
parts = base.split('_')
if len(parts) < 3:
return filename
# Build clean filename in V2.1 order
# [BRAND]_[SUBJECT]_[ASSET]_[DUR]_[RATIO]_[SPOT]_[COUNTRY]_[LANG]_[SOCIAL]
clean_parts = []
# Strip job number from front (digits only)
if parts[0].isdigit():
parts = parts[1:]
if parsed['brand_code']:
clean_parts.append(parsed['brand_code'])
if parsed['subject_title']:
clean_parts.append(parsed['subject_title'])
if parsed['asset_type']:
clean_parts.append(parsed['asset_type'])
if parsed['seconds']:
clean_parts.append(parsed['seconds'] + 'S')
if parsed['aspect_ratio']:
clean_parts.append(parsed['aspect_ratio'])
if parsed['spot_version']:
clean_parts.append(parsed['spot_version'])
if parsed['country_code']:
clean_parts.append(parsed['country_code'])
if parsed['language_code']:
clean_parts.append(parsed['language_code'])
if parsed['social_media_version']:
clean_parts.append(parsed['social_media_version'])
# Strip tracking ID(s) from back (6 alphanumeric chars, optionally with +joined IDs or -N suffix)
if parts and re.match(r'^[a-zA-Z0-9]{6}(-N)?(\+[a-zA-Z0-9]{6}(-N)?)*$', parts[-1]):
parts = parts[:-1]
clean_filename = '_'.join(clean_parts)
if parsed['extension']:
clean_filename += parsed['extension']
return clean_filename
return '_'.join(parts) + ext