""" Filename Parser - V2 Naming Convention Parser Ported from PHP FilenameParser.php Compatible with Python 3.6+ """ import re import logging logger = logging.getLogger('FilenameParser') class FilenameParser: """ Parse V2 naming convention filenames: [OMG_JOB]_[BRAND]_[COUNTRY]_[LANG]_[TITLE]_[TYPE]_[VERSION]_[SEC]S_[RATIO]_[TRACKING] Example: 1234567_RAF_DE_de_TEST-JOB_OLV_001_6S_16x9_TaNu6a.mp4 """ def parse_filename(self, filename): """ Parse V2 filename into components Args: filename: Filename to parse (with or without extension) Returns: dict with parsed components and validation results """ validation_errors = [] warnings = [] # Remove extension if '.' in filename: filename_without_ext, extension = filename.rsplit('.', 1) extension = '.' + extension else: filename_without_ext = filename extension = '' # Split by underscore parts = filename_without_ext.split('_') # Minimum 7 parts: OMG + BRAND + COUNTRY + LANG + TITLE + TYPE + RATIO # Optional: SPOT_VERSION, DURATION, TRACKING_ID if len(parts) < 7: validation_errors.append("Invalid structure: expected min 7 parts, got {}".format(len(parts))) parsed = { 'original_filename': filename, 'filename_without_ext': filename_without_ext, 'extension': extension, 'omg_job_number': None, 'brand_code': None, 'country_code': None, 'language_code': None, 'subject_title': None, 'asset_type': None, 'spot_version': None, 'has_master': False, 'seconds': None, 'aspect_ratio': None, 'tracking_id': None, 'tracking_mode': 'full', 'tracking_id_with_suffix': None, 'validation_errors': [], 'warnings': [], 'is_valid': False } if len(parts) < 7: parsed['validation_errors'] = validation_errors return parsed index = 0 # 1. OMG Job Number (digits only, max 10) if index < len(parts) and parts[index].isdigit(): omg = parts[index] if len(omg) > 10: validation_errors.append("OMG Job Number too long: {} (max 10)".format(omg)) else: parsed['omg_job_number'] = omg index += 1 else: if index < len(parts): validation_errors.append("OMG Job Number missing or invalid: {}".format(parts[index])) # 2. Brand Code (2-5 chars) if index < len(parts): brand = parts[index].upper() if 2 <= len(brand) <= 5: parsed['brand_code'] = brand else: validation_errors.append("Brand Code invalid: {} (must be 2-5 chars)".format(brand)) index += 1 # 3. Country Code (2 chars) if index < len(parts): country = parts[index].upper() if len(country) == 2: parsed['country_code'] = country else: validation_errors.append("Country Code invalid: {} (must be 2 chars)".format(country)) index += 1 # 4. Language Code (2-3 chars) if index < len(parts): lang = parts[index].lower() if 2 <= len(lang) <= 3: parsed['language_code'] = lang else: validation_errors.append("Language Code invalid: {} (must be 2-3 chars)".format(lang)) index += 1 # 5. Subject Title (find asset type to know where title ends) # Asset type is 3 uppercase letters subject_parts = [] asset_type_found = False for i in range(index, len(parts)): part = parts[i] # Check if this looks like asset type (3 uppercase letters) if len(part) == 3 and part.isalpha() and part.isupper(): # Check if next part could be spot version (MST/REF), duration (XS), or aspect ratio (XxY) # This helps distinguish asset type from subject title if i + 1 < len(parts): next_part = parts[i + 1].upper() # Next could be: MST, REF, duration (15S), aspect ratio (16x9), or tracking ID is_likely_asset_type = ( next_part in ['MST', 'REF'] or # Spot version re.match(r'^\d+S$', next_part) or # Duration re.match(r'^\d+[xX]\d+$', next_part) or # Aspect ratio (len(next_part) == 6 and next_part.replace('-N', '').isalnum()) # Tracking ID ) if is_likely_asset_type: # Found asset type index = i asset_type_found = True break subject_parts.append(part) if subject_parts: parsed['subject_title'] = '_'.join(subject_parts) if len(parsed['subject_title']) > 15: warnings.append("Subject title exceeds 15 chars: {}".format(parsed['subject_title'])) # 6. Asset Type (3 uppercase letters) if index < len(parts) and len(parts[index]) == 3: parsed['asset_type'] = parts[index].upper() index += 1 else: validation_errors.append("Asset Type missing or invalid") # 7. Spot Version (MST or REF - OPTIONAL) if index < len(parts): spot = parts[index].upper() if spot == 'MST': parsed['spot_version'] = 'MST' parsed['has_master'] = True index += 1 elif spot == 'REF': parsed['spot_version'] = 'REF' parsed['has_master'] = False index += 1 else: # Not a spot version - this field is optional, continue to duration parsed['spot_version'] = None # Don't increment index # 8. Duration (format: 6S, 15S, etc.) - OPTIONAL if index < len(parts): duration = parts[index] match = re.match(r'^(\d+)S$', duration, re.IGNORECASE) if match: parsed['seconds'] = match.group(1) index += 1 else: # Duration not present - this field is optional, continue to aspect ratio parsed['seconds'] = None # Don't increment index # 9. Aspect Ratio (format: 16x9, 4x3, etc.) if index < len(parts): ratio = parts[index] if re.match(r'^\d+x\d+$', ratio, re.IGNORECASE): parsed['aspect_ratio'] = ratio else: validation_errors.append("Aspect Ratio invalid: {} (must be format: 16x9)".format(ratio)) index += 1 # 10. Tracking ID (6 alphanumeric chars, optional -N suffix) if index < len(parts): tracking = parts[index] # Check for -N suffix (folder-only mode) tracking_mode = 'full' base_tracking_id = tracking if tracking.endswith('-N'): tracking_mode = 'folder_only' base_tracking_id = tracking[:-2] # Strip -N suffix logger.info("Detected folder-only tracking ID: {} (base: {})".format(tracking, base_tracking_id)) # Validate base tracking ID (6 alphanumeric chars) if len(base_tracking_id) == 6 and base_tracking_id.isalnum(): parsed['tracking_id'] = base_tracking_id parsed['tracking_mode'] = tracking_mode parsed['tracking_id_with_suffix'] = tracking else: warnings.append("Tracking ID invalid: {} (should be 6 alphanumeric, optionally with -N)".format(tracking)) parsed['tracking_id'] = base_tracking_id parsed['tracking_mode'] = tracking_mode parsed['tracking_id_with_suffix'] = tracking # Set validation status parsed['validation_errors'] = validation_errors parsed['warnings'] = warnings parsed['is_valid'] = len(validation_errors) == 0 return parsed def strip_upload_components(self, filename): """ Strip OMG Job Number and Tracking ID from filename Args: filename: Original filename Returns: Clean filename for upload """ parsed = self.parse_filename(filename) if not parsed: return filename # Build clean filename clean_parts = [] if parsed['brand_code']: clean_parts.append(parsed['brand_code']) if parsed['country_code']: clean_parts.append(parsed['country_code']) if parsed['language_code']: clean_parts.append(parsed['language_code']) if parsed['subject_title']: clean_parts.append(parsed['subject_title']) if parsed['asset_type']: clean_parts.append(parsed['asset_type']) if parsed['spot_version']: clean_parts.append(parsed['spot_version']) if parsed['seconds']: clean_parts.append(parsed['seconds'] + 'S') if parsed['aspect_ratio']: clean_parts.append(parsed['aspect_ratio']) clean_filename = '_'.join(clean_parts) if parsed['extension']: clean_filename += parsed['extension'] return clean_filename