From 98826d51c41ca2fb8ed751cfa3282b67f4ff5fcc Mon Sep 17 00:00:00 2001 From: nickviljoen Date: Fri, 13 Feb 2026 13:24:23 +0200 Subject: [PATCH] Fix: CreativeX tracking ID fallback, filename stripping, and social media codes CreativeX lookup now falls back to tracking ID search when filename match fails (handles mismatched naming from CreativeX PDFs). strip_upload_components now only removes job number and tracking ID, keeping social media codes (YTA, DV3, etc.) in the clean filename. Updated SOCIAL_MEDIA_CODES from 4 to 39 codes sourced from the Ferrero naming tool. Co-Authored-By: Claude Opus 4.6 --- .../scripts/a2_to_a3_upload_polling.py | 6 +- Python-Version/scripts/shared/database.py | 23 ++++++- .../scripts/shared/filename_parser.py | 67 +++++++++---------- 3 files changed, 56 insertions(+), 40 deletions(-) diff --git a/Python-Version/scripts/a2_to_a3_upload_polling.py b/Python-Version/scripts/a2_to_a3_upload_polling.py index 4c37470..63cfe3f 100755 --- a/Python-Version/scripts/a2_to_a3_upload_polling.py +++ b/Python-Version/scripts/a2_to_a3_upload_polling.py @@ -170,10 +170,8 @@ def process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, not if not master_asset: raise ValueError("No master asset for tracking ID: {}".format(tracking_id)) - # 3. Get CreativeX score from database (lookup by original Box filename) - # The PDF contains the filename field with the full name (job + tracking ID) - # So we lookup using the original filename from Box, not the stripped version - creativex_data = db.get_creativex_score_by_filename(filename) + # 3. Get CreativeX score from database (lookup by filename, fallback to tracking ID) + creativex_data = db.get_creativex_score_by_filename(filename, tracking_id=tracking_id) # Build box_metadata dict (for compatibility with existing code) if creativex_data: diff --git a/Python-Version/scripts/shared/database.py b/Python-Version/scripts/shared/database.py index e2ce4fb..0a3fc1a 100644 --- a/Python-Version/scripts/shared/database.py +++ b/Python-Version/scripts/shared/database.py @@ -875,15 +875,18 @@ class Database: cursor.close() self.put_connection(conn) - def get_creativex_score_by_filename(self, filename): + def get_creativex_score_by_filename(self, filename, tracking_id=None): """ Get CreativeX score data by filename Performs extension-agnostic lookup: if exact filename not found, tries common video/image extensions (.mp4, .jpg, .png, .mov, etc.) + If still not found and tracking_id provided, falls back to LIKE search + on tracking ID (handles mismatched naming from CreativeX PDFs). Args: filename: Filename to search for + tracking_id: Optional tracking ID for fallback lookup Returns: dict with creativex data or None if not found @@ -930,6 +933,24 @@ class Database: if row: break # Found with alternative extension + # If still not found, try tracking ID fallback + # CreativeX PDFs sometimes have different naming (extra text, stripped hyphens) + # but tracking ID is always consistent + if not row and tracking_id: + cursor.execute(""" + SELECT filename, creativex_id, creativex_url, quality_score, + box_file_id, full_extraction_data, extracted_at + FROM creativex_scores + WHERE filename LIKE %s AND status = 'active' + ORDER BY extracted_at DESC + LIMIT 1 + """, ('%' + tracking_id + '%',)) + + row = cursor.fetchone() + if row: + logger.info("CreativeX: Found score via tracking ID fallback '{}' -> {}".format( + tracking_id, row[0])) + if not row: return None diff --git a/Python-Version/scripts/shared/filename_parser.py b/Python-Version/scripts/shared/filename_parser.py index 4629019..f522b01 100644 --- a/Python-Version/scripts/shared/filename_parser.py +++ b/Python-Version/scripts/shared/filename_parser.py @@ -20,8 +20,22 @@ class FilenameParser: PROD Environment: Single tracking ID only (backward compatible) """ - # Known social media platform codes - SOCIAL_MEDIA_CODES = ['FBP', 'FBR', 'IGF', 'IGR'] # Expandable + # Known social media platform codes (from Ferrero naming tool data.json) + SOCIAL_MEDIA_CODES = [ + # Facebook + 'FBD', 'FGF', 'FBR', 'FRO', 'FBS', 'FBF', 'FBP', 'FIA', 'FIV', + 'FMP', 'FPF', 'FRC', 'FSE', 'FSS', 'FSV', 'FUK', 'FVF', + # Instagram + 'IGF', 'IGE', 'IGG', 'IGT', 'IPF', 'IPR', 'IGR', 'IGO', 'IGS', 'ISH', 'IST', + # Audience Network + 'ANC', 'ANI', 'ANR', + # Messenger + 'MSI', 'MSS', + # YouTube + 'YTA', 'YTB', 'YTS', + # Other platforms + 'AMZ', 'DV3', 'GOO', 'PIN', 'SNA', 'TIK', 'TWI', + ] def __init__(self, dam_base_url=None): """ @@ -282,8 +296,8 @@ class FilenameParser: def strip_upload_components(self, filename): """ - Strip OMG Job Number and Tracking ID from filename - Returns clean filename in V2.1 order + Strip OMG Job Number from front and Tracking ID from back of filename. + Keeps everything else as-is (including social media codes, DV3, etc.) Args: filename: Original filename @@ -292,40 +306,23 @@ class FilenameParser: Clean filename for upload (no job number, no tracking ID) Example: - Input: 1234567_RAF_TEST_OLV_6S_1x1_REF_GL_it_IGF_abc123.mp4 - Output: RAF_TEST_OLV_6S_1x1_REF_GL_it_IGF.mp4 + Input: 6662777_NUT_XMAS-SHARETHELOVE-GLAS_OLV_6S_16X9_PL_pl_YTA_EvQJrM.mp4 + Output: NUT_XMAS-SHARETHELOVE-GLAS_OLV_6S_16X9_PL_pl_YTA.mp4 """ - parsed = self.parse_filename(filename) + import os - if not parsed: + base, ext = os.path.splitext(filename) + parts = base.split('_') + + if len(parts) < 3: return filename - # Build clean filename in V2.1 order - # [BRAND]_[SUBJECT]_[ASSET]_[DUR]_[RATIO]_[SPOT]_[COUNTRY]_[LANG]_[SOCIAL] - clean_parts = [] + # Strip job number from front (digits only) + if parts[0].isdigit(): + parts = parts[1:] - if parsed['brand_code']: - clean_parts.append(parsed['brand_code']) - if parsed['subject_title']: - clean_parts.append(parsed['subject_title']) - if parsed['asset_type']: - clean_parts.append(parsed['asset_type']) - if parsed['seconds']: - clean_parts.append(parsed['seconds'] + 'S') - if parsed['aspect_ratio']: - clean_parts.append(parsed['aspect_ratio']) - if parsed['spot_version']: - clean_parts.append(parsed['spot_version']) - if parsed['country_code']: - clean_parts.append(parsed['country_code']) - if parsed['language_code']: - clean_parts.append(parsed['language_code']) - if parsed['social_media_version']: - clean_parts.append(parsed['social_media_version']) + # Strip tracking ID(s) from back (6 alphanumeric chars, optionally with +joined IDs or -N suffix) + if parts and re.match(r'^[a-zA-Z0-9]{6}(-N)?(\+[a-zA-Z0-9]{6}(-N)?)*$', parts[-1]): + parts = parts[:-1] - clean_filename = '_'.join(clean_parts) - - if parsed['extension']: - clean_filename += parsed['extension'] - - return clean_filename + return '_'.join(parts) + ext