Major changes: 1. Updated filename_parser.py for new V2 naming convention: - Spot version now accepts only MST or REF (optional) - Duration field is now optional - Tracking ID supports -N suffix for folder-only mode - Reduced minimum required parts from 9 to 7 - Improved asset type detection logic 2. Added recursive folder scanning to box_client.py: - New list_folder_files_recursive() method - Skips first-level job/batch folders - Preserves folder structure from 2nd level onwards - Skips hidden folders (starting with . or _) 3. Updated A2→A3 upload workflow: - Uses recursive folder scanning - Extracts and logs tracking mode (full vs folder_only) - Handles subfolder paths for DAM uploads - Shows folder distribution in logs 4. Added folder-only mode to metadata_extractor_mvp.py: - New tracking_mode parameter (full/folder_only) - folder_only mode builds metadata entirely from filename - New _build_fields_from_filename() method 5. Added DAM subfolder creation to dam_client.py: - New get_or_create_subfolder_path() method - Creates matching folder structure in DAM - Helper methods _find_subfolder_by_name() and _create_folder() Folder structure behavior: - Box: DAM-UPLOAD/1234567/Europe/Germany/file.mp4 - DAM: 01. Final Assets/Europe/Germany/file.mp4 - Job folder (1234567) is skipped, structure preserved from 2nd level 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
264 lines
9.5 KiB
Python
264 lines
9.5 KiB
Python
"""
|
|
Filename Parser - V2 Naming Convention Parser
|
|
Ported from PHP FilenameParser.php
|
|
Compatible with Python 3.6+
|
|
"""
|
|
|
|
import re
|
|
import logging
|
|
|
|
logger = logging.getLogger('FilenameParser')
|
|
|
|
class FilenameParser:
|
|
"""
|
|
Parse V2 naming convention filenames:
|
|
[OMG_JOB]_[BRAND]_[COUNTRY]_[LANG]_[TITLE]_[TYPE]_[VERSION]_[SEC]S_[RATIO]_[TRACKING]
|
|
|
|
Example: 1234567_RAF_DE_de_TEST-JOB_OLV_001_6S_16x9_TaNu6a.mp4
|
|
"""
|
|
|
|
def parse_filename(self, filename):
|
|
"""
|
|
Parse V2 filename into components
|
|
|
|
Args:
|
|
filename: Filename to parse (with or without extension)
|
|
|
|
Returns:
|
|
dict with parsed components and validation results
|
|
"""
|
|
validation_errors = []
|
|
warnings = []
|
|
|
|
# Remove extension
|
|
if '.' in filename:
|
|
filename_without_ext, extension = filename.rsplit('.', 1)
|
|
extension = '.' + extension
|
|
else:
|
|
filename_without_ext = filename
|
|
extension = ''
|
|
|
|
# Split by underscore
|
|
parts = filename_without_ext.split('_')
|
|
|
|
# Minimum 7 parts: OMG + BRAND + COUNTRY + LANG + TITLE + TYPE + RATIO
|
|
# Optional: SPOT_VERSION, DURATION, TRACKING_ID
|
|
if len(parts) < 7:
|
|
validation_errors.append("Invalid structure: expected min 7 parts, got {}".format(len(parts)))
|
|
|
|
parsed = {
|
|
'original_filename': filename,
|
|
'filename_without_ext': filename_without_ext,
|
|
'extension': extension,
|
|
'omg_job_number': None,
|
|
'brand_code': None,
|
|
'country_code': None,
|
|
'language_code': None,
|
|
'subject_title': None,
|
|
'asset_type': None,
|
|
'spot_version': None,
|
|
'has_master': False,
|
|
'seconds': None,
|
|
'aspect_ratio': None,
|
|
'tracking_id': None,
|
|
'tracking_mode': 'full',
|
|
'tracking_id_with_suffix': None,
|
|
'validation_errors': [],
|
|
'warnings': [],
|
|
'is_valid': False
|
|
}
|
|
|
|
if len(parts) < 7:
|
|
parsed['validation_errors'] = validation_errors
|
|
return parsed
|
|
|
|
index = 0
|
|
|
|
# 1. OMG Job Number (digits only, max 10)
|
|
if index < len(parts) and parts[index].isdigit():
|
|
omg = parts[index]
|
|
if len(omg) > 10:
|
|
validation_errors.append("OMG Job Number too long: {} (max 10)".format(omg))
|
|
else:
|
|
parsed['omg_job_number'] = omg
|
|
index += 1
|
|
else:
|
|
if index < len(parts):
|
|
validation_errors.append("OMG Job Number missing or invalid: {}".format(parts[index]))
|
|
|
|
# 2. Brand Code (2-5 chars)
|
|
if index < len(parts):
|
|
brand = parts[index].upper()
|
|
if 2 <= len(brand) <= 5:
|
|
parsed['brand_code'] = brand
|
|
else:
|
|
validation_errors.append("Brand Code invalid: {} (must be 2-5 chars)".format(brand))
|
|
index += 1
|
|
|
|
# 3. Country Code (2 chars)
|
|
if index < len(parts):
|
|
country = parts[index].upper()
|
|
if len(country) == 2:
|
|
parsed['country_code'] = country
|
|
else:
|
|
validation_errors.append("Country Code invalid: {} (must be 2 chars)".format(country))
|
|
index += 1
|
|
|
|
# 4. Language Code (2-3 chars)
|
|
if index < len(parts):
|
|
lang = parts[index].lower()
|
|
if 2 <= len(lang) <= 3:
|
|
parsed['language_code'] = lang
|
|
else:
|
|
validation_errors.append("Language Code invalid: {} (must be 2-3 chars)".format(lang))
|
|
index += 1
|
|
|
|
# 5. Subject Title (find asset type to know where title ends)
|
|
# Asset type is 3 uppercase letters
|
|
subject_parts = []
|
|
asset_type_found = False
|
|
|
|
for i in range(index, len(parts)):
|
|
part = parts[i]
|
|
# Check if this looks like asset type (3 uppercase letters)
|
|
if len(part) == 3 and part.isalpha() and part.isupper():
|
|
# Check if next part could be spot version (MST/REF), duration (XS), or aspect ratio (XxY)
|
|
# This helps distinguish asset type from subject title
|
|
if i + 1 < len(parts):
|
|
next_part = parts[i + 1].upper()
|
|
# Next could be: MST, REF, duration (15S), aspect ratio (16x9), or tracking ID
|
|
is_likely_asset_type = (
|
|
next_part in ['MST', 'REF'] or # Spot version
|
|
re.match(r'^\d+S$', next_part) or # Duration
|
|
re.match(r'^\d+[xX]\d+$', next_part) or # Aspect ratio
|
|
(len(next_part) == 6 and next_part.replace('-N', '').isalnum()) # Tracking ID
|
|
)
|
|
if is_likely_asset_type:
|
|
# Found asset type
|
|
index = i
|
|
asset_type_found = True
|
|
break
|
|
|
|
subject_parts.append(part)
|
|
|
|
if subject_parts:
|
|
parsed['subject_title'] = '_'.join(subject_parts)
|
|
if len(parsed['subject_title']) > 15:
|
|
warnings.append("Subject title exceeds 15 chars: {}".format(parsed['subject_title']))
|
|
|
|
# 6. Asset Type (3 uppercase letters)
|
|
if index < len(parts) and len(parts[index]) == 3:
|
|
parsed['asset_type'] = parts[index].upper()
|
|
index += 1
|
|
else:
|
|
validation_errors.append("Asset Type missing or invalid")
|
|
|
|
# 7. Spot Version (MST or REF - OPTIONAL)
|
|
if index < len(parts):
|
|
spot = parts[index].upper()
|
|
if spot == 'MST':
|
|
parsed['spot_version'] = 'MST'
|
|
parsed['has_master'] = True
|
|
index += 1
|
|
elif spot == 'REF':
|
|
parsed['spot_version'] = 'REF'
|
|
parsed['has_master'] = False
|
|
index += 1
|
|
else:
|
|
# Not a spot version - this field is optional, continue to duration
|
|
parsed['spot_version'] = None
|
|
# Don't increment index
|
|
|
|
# 8. Duration (format: 6S, 15S, etc.) - OPTIONAL
|
|
if index < len(parts):
|
|
duration = parts[index]
|
|
match = re.match(r'^(\d+)S$', duration, re.IGNORECASE)
|
|
if match:
|
|
parsed['seconds'] = match.group(1)
|
|
index += 1
|
|
else:
|
|
# Duration not present - this field is optional, continue to aspect ratio
|
|
parsed['seconds'] = None
|
|
# Don't increment index
|
|
|
|
# 9. Aspect Ratio (format: 16x9, 4x3, etc.)
|
|
if index < len(parts):
|
|
ratio = parts[index]
|
|
if re.match(r'^\d+x\d+$', ratio, re.IGNORECASE):
|
|
parsed['aspect_ratio'] = ratio
|
|
else:
|
|
validation_errors.append("Aspect Ratio invalid: {} (must be format: 16x9)".format(ratio))
|
|
index += 1
|
|
|
|
# 10. Tracking ID (6 alphanumeric chars, optional -N suffix)
|
|
if index < len(parts):
|
|
tracking = parts[index]
|
|
|
|
# Check for -N suffix (folder-only mode)
|
|
tracking_mode = 'full'
|
|
base_tracking_id = tracking
|
|
|
|
if tracking.endswith('-N'):
|
|
tracking_mode = 'folder_only'
|
|
base_tracking_id = tracking[:-2] # Strip -N suffix
|
|
logger.info("Detected folder-only tracking ID: {} (base: {})".format(tracking, base_tracking_id))
|
|
|
|
# Validate base tracking ID (6 alphanumeric chars)
|
|
if len(base_tracking_id) == 6 and base_tracking_id.isalnum():
|
|
parsed['tracking_id'] = base_tracking_id
|
|
parsed['tracking_mode'] = tracking_mode
|
|
parsed['tracking_id_with_suffix'] = tracking
|
|
else:
|
|
warnings.append("Tracking ID invalid: {} (should be 6 alphanumeric, optionally with -N)".format(tracking))
|
|
parsed['tracking_id'] = base_tracking_id
|
|
parsed['tracking_mode'] = tracking_mode
|
|
parsed['tracking_id_with_suffix'] = tracking
|
|
|
|
# Set validation status
|
|
parsed['validation_errors'] = validation_errors
|
|
parsed['warnings'] = warnings
|
|
parsed['is_valid'] = len(validation_errors) == 0
|
|
|
|
return parsed
|
|
|
|
def strip_upload_components(self, filename):
|
|
"""
|
|
Strip OMG Job Number and Tracking ID from filename
|
|
|
|
Args:
|
|
filename: Original filename
|
|
|
|
Returns:
|
|
Clean filename for upload
|
|
"""
|
|
parsed = self.parse_filename(filename)
|
|
|
|
if not parsed:
|
|
return filename
|
|
|
|
# Build clean filename
|
|
clean_parts = []
|
|
|
|
if parsed['brand_code']:
|
|
clean_parts.append(parsed['brand_code'])
|
|
if parsed['country_code']:
|
|
clean_parts.append(parsed['country_code'])
|
|
if parsed['language_code']:
|
|
clean_parts.append(parsed['language_code'])
|
|
if parsed['subject_title']:
|
|
clean_parts.append(parsed['subject_title'])
|
|
if parsed['asset_type']:
|
|
clean_parts.append(parsed['asset_type'])
|
|
if parsed['spot_version']:
|
|
clean_parts.append(parsed['spot_version'])
|
|
if parsed['seconds']:
|
|
clean_parts.append(parsed['seconds'] + 'S')
|
|
if parsed['aspect_ratio']:
|
|
clean_parts.append(parsed['aspect_ratio'])
|
|
|
|
clean_filename = '_'.join(clean_parts)
|
|
|
|
if parsed['extension']:
|
|
clean_filename += parsed['extension']
|
|
|
|
return clean_filename
|