Major changes: 1. Updated filename_parser.py for new V2 naming convention: - Spot version now accepts only MST or REF (optional) - Duration field is now optional - Tracking ID supports -N suffix for folder-only mode - Reduced minimum required parts from 9 to 7 - Improved asset type detection logic 2. Added recursive folder scanning to box_client.py: - New list_folder_files_recursive() method - Skips first-level job/batch folders - Preserves folder structure from 2nd level onwards - Skips hidden folders (starting with . or _) 3. Updated A2→A3 upload workflow: - Uses recursive folder scanning - Extracts and logs tracking mode (full vs folder_only) - Handles subfolder paths for DAM uploads - Shows folder distribution in logs 4. Added folder-only mode to metadata_extractor_mvp.py: - New tracking_mode parameter (full/folder_only) - folder_only mode builds metadata entirely from filename - New _build_fields_from_filename() method 5. Added DAM subfolder creation to dam_client.py: - New get_or_create_subfolder_path() method - Creates matching folder structure in DAM - Helper methods _find_subfolder_by_name() and _create_folder() Folder structure behavior: - Box: DAM-UPLOAD/1234567/Europe/Germany/file.mp4 - DAM: 01. Final Assets/Europe/Germany/file.mp4 - Job folder (1234567) is skipped, structure preserved from 2nd level 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
333 lines
13 KiB
Python
333 lines
13 KiB
Python
"""
|
|
Metadata Extractor MVP - Extract MVP fields from master metadata
|
|
Ported from PHP MetadataExtractorMVP.php
|
|
Compatible with Python 3.6+
|
|
"""
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger('MetadataExtractorMVP')
|
|
|
|
class MetadataExtractorMVP:
|
|
def __init__(self, field_mappings):
|
|
"""
|
|
Initialize with field mappings from config
|
|
|
|
Args:
|
|
field_mappings: dict from field_mappings.yaml
|
|
"""
|
|
self.mvp_field_ids = field_mappings['mvp_fields']
|
|
self.filename_updates = field_mappings.get('filename_updates', {})
|
|
self.forced_values = field_mappings.get('forced_values', {})
|
|
self.defaults = field_mappings.get('defaults', {})
|
|
|
|
def extract_mvp_fields(self, master_metadata):
|
|
"""
|
|
Extract only MVP fields from full master metadata
|
|
|
|
Args:
|
|
master_metadata: Complete DAM asset metadata
|
|
|
|
Returns:
|
|
List of MVP field objects
|
|
"""
|
|
extracted_fields = []
|
|
found_field_ids = []
|
|
|
|
# Navigate to metadata structure
|
|
# master_metadata is the full asset, need to go to: metadata.metadata_element_list
|
|
metadata_list = []
|
|
|
|
if isinstance(master_metadata, dict):
|
|
if 'metadata' in master_metadata and 'metadata_element_list' in master_metadata['metadata']:
|
|
metadata_list = master_metadata['metadata']['metadata_element_list']
|
|
logger.info("Using master_metadata['metadata']['metadata_element_list']")
|
|
|
|
logger.info("Searching through {} categories for MVP fields".format(len(metadata_list)))
|
|
|
|
# Search through categories for MVP fields
|
|
for item in metadata_list:
|
|
if 'metadata_element_list' in item:
|
|
# Category with nested fields
|
|
for field in item['metadata_element_list']:
|
|
field_id = field.get('id')
|
|
if field_id in self.mvp_field_ids:
|
|
extracted_fields.append(field)
|
|
found_field_ids.append(field_id)
|
|
logger.debug("Found MVP field: {}".format(field_id))
|
|
elif 'id' in item and item['id'] in self.mvp_field_ids:
|
|
# Direct field
|
|
extracted_fields.append(item)
|
|
found_field_ids.append(item['id'])
|
|
logger.debug("Found direct MVP field: {}".format(item['id']))
|
|
|
|
# Log results
|
|
missing = [f for f in self.mvp_field_ids if f not in found_field_ids]
|
|
logger.info("Found {}/{} MVP fields".format(len(found_field_ids), len(self.mvp_field_ids)))
|
|
|
|
if missing:
|
|
logger.info("Missing fields: {}".format(', '.join(missing[:5])))
|
|
|
|
return extracted_fields
|
|
|
|
def build_mvp_asset_representation(self, master_metadata, clean_filename, parsed_filename, box_metadata=None, tracking_mode='full'):
|
|
"""
|
|
Build asset representation with MVP fields + updates from filename
|
|
|
|
Args:
|
|
master_metadata: Full master asset metadata
|
|
clean_filename: Clean filename (stripped)
|
|
parsed_filename: Parsed V2 filename dict
|
|
box_metadata: Optional Box metadata
|
|
tracking_mode: 'full' (inherit all metadata) or 'folder_only' (only use folder)
|
|
|
|
Returns:
|
|
Asset representation dict ready for upload
|
|
"""
|
|
if tracking_mode == 'full':
|
|
# FULL INHERITANCE MODE - Standard behavior
|
|
logger.info("Full inheritance mode - using master metadata")
|
|
# Extract MVP fields from master
|
|
mvp_fields = self.extract_mvp_fields(master_metadata)
|
|
|
|
# Update fields from filename and forced values
|
|
mvp_fields = self._update_fields(mvp_fields, clean_filename, parsed_filename)
|
|
|
|
elif tracking_mode == 'folder_only':
|
|
# FOLDER ONLY MODE - New asset, only use upload folder
|
|
logger.info("Folder-only mode (-N suffix) - building metadata from filename only")
|
|
logger.warning("Note: Upload folder comes from master, all other metadata from filename")
|
|
|
|
# Start with empty fields, build from filename
|
|
mvp_fields = []
|
|
mvp_fields = self._build_fields_from_filename(parsed_filename, clean_filename)
|
|
|
|
# Add missing MVP fields with defaults (both modes)
|
|
mvp_fields = self._add_missing_fields(mvp_fields, parsed_filename)
|
|
|
|
# Update CreativeX fields from Box metadata if provided
|
|
if box_metadata:
|
|
mvp_fields = self._update_creativex_fields(mvp_fields, box_metadata)
|
|
|
|
# Build asset representation
|
|
asset_rep = {
|
|
'asset_resource': {
|
|
'asset': {
|
|
'metadata': {
|
|
'metadata_element_list': mvp_fields
|
|
},
|
|
'metadata_model_id': 'ECOMMERCE',
|
|
'security_policy_list': [
|
|
{'id': 1594}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
|
|
logger.info("Built MVP asset representation with {} fields".format(len(mvp_fields)))
|
|
|
|
return asset_rep
|
|
|
|
def _update_fields(self, mvp_fields, clean_filename, parsed_filename):
|
|
"""Update specific fields from filename and forced values"""
|
|
|
|
# Update ASSET NAME
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'ARTESIA.FIELD.ASSET NAME':
|
|
self._set_field_value(field, clean_filename)
|
|
logger.info("Updated ASSET NAME: {}".format(clean_filename))
|
|
|
|
# Update DESCRIPTION from subject_title
|
|
if parsed_filename and parsed_filename.get('subject_title'):
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'ARTESIA.FIELD.ASSET DESCRIPTION':
|
|
self._set_field_value(field, parsed_filename['subject_title'])
|
|
logger.info("Updated DESCRIPTION: {}".format(parsed_filename['subject_title']))
|
|
|
|
# Force STATE to Local
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'FERRERO.FIELD.STATE':
|
|
self._set_field_value(field, 'Local')
|
|
logger.info("Set STATE to Local")
|
|
|
|
return mvp_fields
|
|
|
|
def _add_missing_fields(self, mvp_fields, parsed_filename):
|
|
"""Add missing MVP fields from filename or defaults"""
|
|
field_ids = [f.get('id') for f in mvp_fields]
|
|
|
|
# Add MAIN_LANGUAGES if missing
|
|
if 'MAIN_LANGUAGES' not in field_ids and parsed_filename:
|
|
if parsed_filename.get('language_code'):
|
|
language = parsed_filename['language_code'].upper()
|
|
logger.info("Adding MAIN_LANGUAGES: {}".format(language))
|
|
|
|
mvp_fields.append({
|
|
'id': 'MAIN_LANGUAGES',
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.MAIN LANGUAGES',
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': language
|
|
},
|
|
'type': 'com.artesia.metadata.DomainValue'
|
|
}
|
|
}
|
|
]
|
|
})
|
|
|
|
# Add other missing fields with defaults
|
|
field_ids = [f.get('id') for f in mvp_fields]
|
|
|
|
for field_id, default_value in self.defaults.items():
|
|
if field_id not in field_ids:
|
|
logger.info("Adding {} with default: {}".format(field_id, default_value))
|
|
|
|
# Check if it's a tabular field (contains .TABULAR. in parent table ID)
|
|
is_tabular = 'TABULAR' in field_id or field_id in [
|
|
'FERRERO.FIELD.ASSETCOMPLIANCE', 'MARKETING_TAG'
|
|
]
|
|
|
|
if is_tabular:
|
|
mvp_fields.append({
|
|
'id': field_id,
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.' + field_id.split('.')[-1],
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': default_value
|
|
},
|
|
'type': 'com.artesia.metadata.DomainValue'
|
|
}
|
|
}
|
|
]
|
|
})
|
|
else:
|
|
mvp_fields.append({
|
|
'id': field_id,
|
|
'type': 'com.artesia.metadata.MetadataField',
|
|
'value': {
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'type': 'string',
|
|
'value': default_value
|
|
}
|
|
}
|
|
})
|
|
|
|
return mvp_fields
|
|
|
|
def _build_fields_from_filename(self, parsed_filename, clean_filename):
|
|
"""
|
|
Build ALL metadata fields from parsed filename
|
|
Used in folder-only mode (tracking ID with -N suffix)
|
|
|
|
Note: Uses codes directly for now. Can add lookup tables later
|
|
for brand_code->brand_name, country_code->country_name, etc.
|
|
"""
|
|
fields = []
|
|
|
|
# ASSET NAME
|
|
fields.append({
|
|
'id': 'ARTESIA.FIELD.ASSET NAME',
|
|
'value': {'value': {'value': clean_filename}}
|
|
})
|
|
|
|
# DESCRIPTION (from subject_title)
|
|
if parsed_filename.get('subject_title'):
|
|
fields.append({
|
|
'id': 'ARTESIA.FIELD.ASSET DESCRIPTION',
|
|
'value': {'value': {'value': parsed_filename['subject_title']}}
|
|
})
|
|
|
|
# BRAND (use code for now, could add lookup later)
|
|
if parsed_filename.get('brand_code'):
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.BRAND',
|
|
'value': {'value': {'value': parsed_filename['brand_code']}}
|
|
})
|
|
|
|
# COUNTRY (use code for now)
|
|
if parsed_filename.get('country_code'):
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.COUNTRY',
|
|
'value': {'value': {'value': parsed_filename['country_code']}}
|
|
})
|
|
|
|
# LANGUAGE (use code for now)
|
|
if parsed_filename.get('language_code'):
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.LANGUAGES',
|
|
'value': {'value': {'value': parsed_filename['language_code']}}
|
|
})
|
|
|
|
# ASSET TYPE (use code for now)
|
|
if parsed_filename.get('asset_type'):
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.ASSET TYPE',
|
|
'value': {'value': {'value': parsed_filename['asset_type']}}
|
|
})
|
|
|
|
# STATE (force to Local)
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.STATE',
|
|
'value': {'value': {'value': 'Local'}}
|
|
})
|
|
|
|
logger.info("Built {} fields from filename (folder-only mode)".format(len(fields)))
|
|
|
|
return fields
|
|
|
|
def _set_field_value(self, field, value):
|
|
"""Set field value handling different structures"""
|
|
if 'value' in field:
|
|
if isinstance(field['value'], dict):
|
|
if 'value' in field['value'] and isinstance(field['value']['value'], dict):
|
|
if 'value' in field['value']['value']:
|
|
field['value']['value']['value'] = value
|
|
elif 'field_value' in field['value']['value']:
|
|
field['value']['value']['field_value']['value'] = value
|
|
|
|
def _update_creativex_fields(self, mvp_fields, box_metadata):
|
|
"""
|
|
Update CreativeX fields from Box metadata template
|
|
|
|
Args:
|
|
mvp_fields: List of MVP fields
|
|
box_metadata: dict with 'score' and 'url' from Box template
|
|
|
|
Returns:
|
|
Updated mvp_fields list
|
|
"""
|
|
# Map Box metadata to DAM field IDs (need to confirm exact field IDs)
|
|
creativex_mapping = {
|
|
'score': 'FERRERO.TAB.FIELD.CREATIVEX', # Platform > Rating (%)
|
|
'url': 'FERRERO.FIELD.CREATIVEX LINK' # CreativeX Hyperlink
|
|
}
|
|
|
|
if box_metadata.get('score'):
|
|
# Update CreativeX Score field
|
|
logger.info("Updating CreativeX Score from Box: {}".format(box_metadata['score']))
|
|
# Note: This may need special handling for tabular field structure
|
|
|
|
if box_metadata.get('url'):
|
|
# Update CreativeX URL field
|
|
logger.info("Updating CreativeX URL from Box: {}".format(box_metadata['url']))
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'FERRERO.FIELD.CREATIVEX LINK':
|
|
self._set_field_value(field, box_metadata['url'])
|
|
logger.info("Set CREATIVEX LINK to: {}".format(box_metadata['url']))
|
|
break
|
|
|
|
return mvp_fields
|