Fix: CreativeX tracking ID fallback, filename stripping, and social media codes
CreativeX lookup now falls back to tracking ID search when filename match fails (handles mismatched naming from CreativeX PDFs). strip_upload_components now only removes job number and tracking ID, keeping social media codes (YTA, DV3, etc.) in the clean filename. Updated SOCIAL_MEDIA_CODES from 4 to 39 codes sourced from the Ferrero naming tool. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
6517a4f83f
commit
98826d51c4
3 changed files with 56 additions and 40 deletions
|
|
@ -170,10 +170,8 @@ def process_box_file(file_info, dam, box, db, parser, mvp_extractor, config, not
|
|||
if not master_asset:
|
||||
raise ValueError("No master asset for tracking ID: {}".format(tracking_id))
|
||||
|
||||
# 3. Get CreativeX score from database (lookup by original Box filename)
|
||||
# The PDF contains the filename field with the full name (job + tracking ID)
|
||||
# So we lookup using the original filename from Box, not the stripped version
|
||||
creativex_data = db.get_creativex_score_by_filename(filename)
|
||||
# 3. Get CreativeX score from database (lookup by filename, fallback to tracking ID)
|
||||
creativex_data = db.get_creativex_score_by_filename(filename, tracking_id=tracking_id)
|
||||
|
||||
# Build box_metadata dict (for compatibility with existing code)
|
||||
if creativex_data:
|
||||
|
|
|
|||
|
|
@ -875,15 +875,18 @@ class Database:
|
|||
cursor.close()
|
||||
self.put_connection(conn)
|
||||
|
||||
def get_creativex_score_by_filename(self, filename):
|
||||
def get_creativex_score_by_filename(self, filename, tracking_id=None):
|
||||
"""
|
||||
Get CreativeX score data by filename
|
||||
|
||||
Performs extension-agnostic lookup: if exact filename not found,
|
||||
tries common video/image extensions (.mp4, .jpg, .png, .mov, etc.)
|
||||
If still not found and tracking_id provided, falls back to LIKE search
|
||||
on tracking ID (handles mismatched naming from CreativeX PDFs).
|
||||
|
||||
Args:
|
||||
filename: Filename to search for
|
||||
tracking_id: Optional tracking ID for fallback lookup
|
||||
|
||||
Returns:
|
||||
dict with creativex data or None if not found
|
||||
|
|
@ -930,6 +933,24 @@ class Database:
|
|||
if row:
|
||||
break # Found with alternative extension
|
||||
|
||||
# If still not found, try tracking ID fallback
|
||||
# CreativeX PDFs sometimes have different naming (extra text, stripped hyphens)
|
||||
# but tracking ID is always consistent
|
||||
if not row and tracking_id:
|
||||
cursor.execute("""
|
||||
SELECT filename, creativex_id, creativex_url, quality_score,
|
||||
box_file_id, full_extraction_data, extracted_at
|
||||
FROM creativex_scores
|
||||
WHERE filename LIKE %s AND status = 'active'
|
||||
ORDER BY extracted_at DESC
|
||||
LIMIT 1
|
||||
""", ('%' + tracking_id + '%',))
|
||||
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
logger.info("CreativeX: Found score via tracking ID fallback '{}' -> {}".format(
|
||||
tracking_id, row[0]))
|
||||
|
||||
if not row:
|
||||
return None
|
||||
|
||||
|
|
|
|||
|
|
@ -20,8 +20,22 @@ class FilenameParser:
|
|||
PROD Environment: Single tracking ID only (backward compatible)
|
||||
"""
|
||||
|
||||
# Known social media platform codes
|
||||
SOCIAL_MEDIA_CODES = ['FBP', 'FBR', 'IGF', 'IGR'] # Expandable
|
||||
# Known social media platform codes (from Ferrero naming tool data.json)
|
||||
SOCIAL_MEDIA_CODES = [
|
||||
# Facebook
|
||||
'FBD', 'FGF', 'FBR', 'FRO', 'FBS', 'FBF', 'FBP', 'FIA', 'FIV',
|
||||
'FMP', 'FPF', 'FRC', 'FSE', 'FSS', 'FSV', 'FUK', 'FVF',
|
||||
# Instagram
|
||||
'IGF', 'IGE', 'IGG', 'IGT', 'IPF', 'IPR', 'IGR', 'IGO', 'IGS', 'ISH', 'IST',
|
||||
# Audience Network
|
||||
'ANC', 'ANI', 'ANR',
|
||||
# Messenger
|
||||
'MSI', 'MSS',
|
||||
# YouTube
|
||||
'YTA', 'YTB', 'YTS',
|
||||
# Other platforms
|
||||
'AMZ', 'DV3', 'GOO', 'PIN', 'SNA', 'TIK', 'TWI',
|
||||
]
|
||||
|
||||
def __init__(self, dam_base_url=None):
|
||||
"""
|
||||
|
|
@ -282,8 +296,8 @@ class FilenameParser:
|
|||
|
||||
def strip_upload_components(self, filename):
|
||||
"""
|
||||
Strip OMG Job Number and Tracking ID from filename
|
||||
Returns clean filename in V2.1 order
|
||||
Strip OMG Job Number from front and Tracking ID from back of filename.
|
||||
Keeps everything else as-is (including social media codes, DV3, etc.)
|
||||
|
||||
Args:
|
||||
filename: Original filename
|
||||
|
|
@ -292,40 +306,23 @@ class FilenameParser:
|
|||
Clean filename for upload (no job number, no tracking ID)
|
||||
|
||||
Example:
|
||||
Input: 1234567_RAF_TEST_OLV_6S_1x1_REF_GL_it_IGF_abc123.mp4
|
||||
Output: RAF_TEST_OLV_6S_1x1_REF_GL_it_IGF.mp4
|
||||
Input: 6662777_NUT_XMAS-SHARETHELOVE-GLAS_OLV_6S_16X9_PL_pl_YTA_EvQJrM.mp4
|
||||
Output: NUT_XMAS-SHARETHELOVE-GLAS_OLV_6S_16X9_PL_pl_YTA.mp4
|
||||
"""
|
||||
parsed = self.parse_filename(filename)
|
||||
import os
|
||||
|
||||
if not parsed:
|
||||
base, ext = os.path.splitext(filename)
|
||||
parts = base.split('_')
|
||||
|
||||
if len(parts) < 3:
|
||||
return filename
|
||||
|
||||
# Build clean filename in V2.1 order
|
||||
# [BRAND]_[SUBJECT]_[ASSET]_[DUR]_[RATIO]_[SPOT]_[COUNTRY]_[LANG]_[SOCIAL]
|
||||
clean_parts = []
|
||||
# Strip job number from front (digits only)
|
||||
if parts[0].isdigit():
|
||||
parts = parts[1:]
|
||||
|
||||
if parsed['brand_code']:
|
||||
clean_parts.append(parsed['brand_code'])
|
||||
if parsed['subject_title']:
|
||||
clean_parts.append(parsed['subject_title'])
|
||||
if parsed['asset_type']:
|
||||
clean_parts.append(parsed['asset_type'])
|
||||
if parsed['seconds']:
|
||||
clean_parts.append(parsed['seconds'] + 'S')
|
||||
if parsed['aspect_ratio']:
|
||||
clean_parts.append(parsed['aspect_ratio'])
|
||||
if parsed['spot_version']:
|
||||
clean_parts.append(parsed['spot_version'])
|
||||
if parsed['country_code']:
|
||||
clean_parts.append(parsed['country_code'])
|
||||
if parsed['language_code']:
|
||||
clean_parts.append(parsed['language_code'])
|
||||
if parsed['social_media_version']:
|
||||
clean_parts.append(parsed['social_media_version'])
|
||||
# Strip tracking ID(s) from back (6 alphanumeric chars, optionally with +joined IDs or -N suffix)
|
||||
if parts and re.match(r'^[a-zA-Z0-9]{6}(-N)?(\+[a-zA-Z0-9]{6}(-N)?)*$', parts[-1]):
|
||||
parts = parts[:-1]
|
||||
|
||||
clean_filename = '_'.join(clean_parts)
|
||||
|
||||
if parsed['extension']:
|
||||
clean_filename += parsed['extension']
|
||||
|
||||
return clean_filename
|
||||
return '_'.join(parts) + ext
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue