ferrero-opentext/Python-Version/scripts/shared/metadata_extractor_mvp.py
nickviljoen 2ec22c62a5 Fix: Folder-only mode metadata format for PROD DAM compatibility
Folder-only mode (-N suffix files) was sending simplified metadata that
PROD DAM rejected with "unmarshalling parameter" error. Updated to use
DomainValue format for domained fields, correct asset type field ID
(FERRERO.FIELD.MKTG.ASSET TYPE), asset type code mapping (e.g. SND→sound),
validity dates, and forced values from config.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 12:24:44 +02:00

1030 lines
45 KiB
Python

"""
Metadata Extractor MVP - Extract MVP fields from master metadata
Ported from PHP MetadataExtractorMVP.php
Compatible with Python 3.6+
"""
import logging
from datetime import datetime, timedelta
import os
from shared.config_loader import load_country_code_mappings
logger = logging.getLogger('MetadataExtractorMVP')
class MetadataExtractorMVP:
def __init__(self, field_mappings):
"""
Initialize with field mappings from config
Args:
field_mappings: dict from field_mappings.yaml
"""
self.mvp_field_ids = field_mappings['mvp_fields']
self.filename_updates = field_mappings.get('filename_updates', {})
self.forced_values = field_mappings.get('forced_values', {})
self.defaults = field_mappings.get('defaults', {})
self.asset_type_overrides = field_mappings.get('asset_type_overrides', {})
# Load country code mappings (ISO -> DAM codes)
self.country_mappings = load_country_code_mappings()
if self.country_mappings:
logger.info("Loaded {} country code mappings (ISO->DAM)".format(len(self.country_mappings)))
# Load asset type mappings (3-letter codes -> DAM codes)
self.asset_type_mappings = self._load_asset_type_mappings()
if self.asset_type_mappings:
logger.info("Loaded {} asset type mappings (3-letter->DAM)".format(len(self.asset_type_mappings)))
def extract_mvp_fields(self, master_metadata):
"""
Extract only MVP fields from full master metadata
Args:
master_metadata: Complete DAM asset metadata
Returns:
List of MVP field objects
"""
extracted_fields = []
found_field_ids = []
# Navigate to metadata structure
# master_metadata is the full asset, need to go to: metadata.metadata_element_list
metadata_list = []
if isinstance(master_metadata, dict):
if 'metadata' in master_metadata and 'metadata_element_list' in master_metadata['metadata']:
metadata_list = master_metadata['metadata']['metadata_element_list']
logger.info("Using master_metadata['metadata']['metadata_element_list']")
logger.info("Searching through {} categories for MVP fields".format(len(metadata_list)))
# Search through categories for MVP fields
for item in metadata_list:
if 'metadata_element_list' in item:
# Category with nested fields
for field in item['metadata_element_list']:
field_id = field.get('id')
if field_id in self.mvp_field_ids:
extracted_fields.append(field)
found_field_ids.append(field_id)
logger.debug("Found MVP field: {}".format(field_id))
# Check if this is a MetadataTable containing nested fields
if field.get('type') == 'com.artesia.metadata.MetadataTable' and 'metadata_element_list' in field:
logger.debug("Found MetadataTable: {}, searching inside...".format(field_id))
# Search inside the table for MVP fields
for nested_field in field['metadata_element_list']:
nested_field_id = nested_field.get('id')
if nested_field_id in self.mvp_field_ids:
extracted_fields.append(nested_field)
found_field_ids.append(nested_field_id)
logger.info("Found MVP field inside MetadataTable: {}".format(nested_field_id))
elif 'id' in item and item['id'] in self.mvp_field_ids:
# Direct field
extracted_fields.append(item)
found_field_ids.append(item['id'])
logger.debug("Found direct MVP field: {}".format(item['id']))
# Log results
missing = [f for f in self.mvp_field_ids if f not in found_field_ids]
logger.info("Found {}/{} MVP fields".format(len(found_field_ids), len(self.mvp_field_ids)))
if missing:
logger.info("Missing fields: {}".format(', '.join(missing[:5])))
return extracted_fields
def build_mvp_asset_representation(self, master_metadata, clean_filename, parsed_filename, box_metadata=None, tracking_mode='full', master_opentext_id=None, master_opentext_ids=None):
"""
Build asset representation with MVP fields + updates from filename
Args:
master_metadata: Full master asset metadata
clean_filename: Clean filename (stripped)
parsed_filename: Parsed V2 filename dict
box_metadata: Optional Box metadata
tracking_mode: 'full' (inherit all metadata) or 'folder_only' (only use folder)
master_opentext_id: Optional DAM Asset ID of master asset (for derivative tracking)
Returns:
Asset representation dict ready for upload
"""
if tracking_mode == 'full':
# FULL INHERITANCE MODE - Standard behavior
logger.info("Full inheritance mode - using master metadata")
# Extract MVP fields from master
mvp_fields = self.extract_mvp_fields(master_metadata)
# Update fields from filename and forced values
mvp_fields = self._update_fields(mvp_fields, clean_filename, parsed_filename)
elif tracking_mode == 'folder_only':
# FOLDER ONLY MODE - New asset, only use upload folder
logger.info("Folder-only mode (-N suffix) - building metadata from filename only")
logger.warning("Note: Upload folder comes from master, all other metadata from filename")
# Start with empty fields, build from filename
mvp_fields = []
mvp_fields = self._build_fields_from_filename(parsed_filename, clean_filename)
# Apply forced values from config (e.g., AGENCY NAME)
# STATE is already handled in _build_fields_from_filename
mvp_fields = self._apply_forced_values(mvp_fields)
# Add missing MVP fields with defaults (both modes)
mvp_fields = self._add_missing_fields(mvp_fields, parsed_filename)
# Apply asset type overrides (e.g., ELO) - takes final precedence over forced values/defaults
mvp_fields = self._apply_asset_type_overrides(mvp_fields, parsed_filename)
# Update CreativeX fields from Box metadata if provided
if box_metadata:
mvp_fields = self._update_creativex_fields(mvp_fields, box_metadata)
# Add MASTERASSETIDS field with all master IDs (PPR: multiple, PROD: single)
# Priority: Use master_opentext_ids if provided (multiple IDs), otherwise fall back to single master_opentext_id
if master_opentext_ids and len(master_opentext_ids) > 0:
mvp_fields = self._add_master_asset_ids_field(mvp_fields, master_opentext_ids)
if len(master_opentext_ids) > 1:
logger.info("PPR - Added MASTERASSETIDS field with {} master IDs".format(len(master_opentext_ids)))
else:
logger.info("Added MASTERASSETIDS field with 1 master ID")
elif master_opentext_id:
# Fallback to single master ID if master_opentext_ids not provided
mvp_fields = self._add_master_asset_id_field(mvp_fields, master_opentext_id)
logger.info("Added Master Asset ID field: {}".format(master_opentext_id))
# Build asset representation
asset_rep = {
'asset_resource': {
'asset': {
'metadata': {
'metadata_element_list': mvp_fields
},
'metadata_model_id': 'ECOMMERCE',
'security_policy_list': [
{'id': 1594}
]
}
}
}
logger.info("Built MVP asset representation with {} fields".format(len(mvp_fields)))
return asset_rep
def _update_fields(self, mvp_fields, clean_filename, parsed_filename):
"""Update specific fields from filename and forced values"""
# Process filename_updates from configuration
for field_id, config in self.filename_updates.items():
source = config.get('source')
transform = config.get('transform', '')
# Get value from appropriate source
if source == 'clean_filename':
value = clean_filename
elif source and parsed_filename:
value = parsed_filename.get(source)
else:
continue
if not value:
continue
# Apply transform if specified
if transform == 'uppercase':
value = value.upper()
elif transform == 'lowercase':
value = value.lower()
# Apply asset type mapping if this is the asset type field
if field_id == 'FERRERO.FIELD.MKTG.ASSET TYPE' and source == 'asset_type':
value = self._map_asset_type(value)
# Update the field
for field in mvp_fields:
if field.get('id') == field_id:
# For tabular fields (like MAIN_LANGUAGES), update the 'values' array
# The DAM reads from 'values' (plural), not 'value' (singular)
if field.get('type') == 'com.artesia.metadata.MetadataTableField' or 'values' in field:
field['values'] = [
{
'cascading_domain_value': False,
'domain_value': True,
'is_locked': False,
'value': {
'expired_value': False,
'field_value': {
'type': 'string',
'value': value
},
'type': 'com.artesia.metadata.DomainValue'
}
}
]
logger.info("Updated tabular field {} values array from filename: {}".format(field_id, value))
else:
self._set_field_value(field, value)
logger.info("Updated {} from filename: {}".format(field_id, value))
break
# Apply country code mapping (ISO -> DAM codes)
for field in mvp_fields:
if field.get('id') == 'FERRERO.FIELD.COUNTRY':
current_value = self._get_field_value(field)
if current_value:
mapped_value = self._map_country_code(current_value)
if mapped_value != current_value:
self._set_field_value(field, mapped_value)
logger.info("Mapped country code: {} -> {}".format(current_value, mapped_value))
# Apply forced values from configuration
for field_id, forced_value in self.forced_values.items():
for field in mvp_fields:
if field.get('id') == field_id:
self._set_field_value(field, forced_value)
logger.info("Set {} to {}".format(field_id, forced_value))
break
# Apply defaults to empty existing fields
for field in mvp_fields:
field_id = field.get('id')
if field_id in self.defaults:
current_value = self._get_field_value(field)
if not current_value: # Field exists but is empty/None
default_value = self.defaults[field_id]
self._set_field_value(field, default_value)
logger.info("Applied default to empty field {}: {}".format(field_id, default_value))
# Set Asset Validity Dates (Start = Today, End = Today + 1 Year)
# Field 4: Date the asset was uploaded
# Field 5: Add 1 year from date provided above
try:
today = datetime.now()
one_year_later = today + timedelta(days=365)
# Convert to US Date Format (MM/DD/YYYY)
# This is the format the DAM expects for date fields
start_date_str = today.strftime('%m/%d/%Y')
end_date_str = one_year_later.strftime('%m/%d/%Y')
date_fields = {
'FERRERO.FIELD.ASSET VALIDITY START PERIOD': start_date_str,
'FERRERO.FIELD.ASSET VALIDITY END PERIOD': end_date_str
}
for field_id, value in date_fields.items():
field_found = False
for field in mvp_fields:
if field.get('id') == field_id:
# Use specialized method for date fields
self._set_date_field_value(field, value)
logger.info("Set {} to {} ms (Upload Date Logic)".format(field_id, value))
field_found = True
break
if not field_found:
# Add new date field with proper structure
mvp_fields.append({
'id': field_id,
'type': 'com.artesia.metadata.MetadataField',
'value': {
'value': {
'type': 'string',
'value': value
}
}
})
logger.info("Added {} with value {} (Upload Date Logic)".format(field_id, value))
except Exception as e:
logger.error("Failed to set validity dates: {}".format(str(e)))
return mvp_fields
def _apply_asset_type_overrides(self, mvp_fields, parsed_filename):
"""
Apply asset type overrides when a matching asset type (e.g., ELO) is detected in the filename.
These overrides take final precedence over forced values and defaults.
Args:
mvp_fields: List of MVP field objects
parsed_filename: Parsed filename dict (must contain 'asset_type' key)
Returns:
Updated mvp_fields list
"""
if not parsed_filename:
return mvp_fields
asset_type = parsed_filename.get('asset_type')
if not asset_type:
return mvp_fields
overrides = self.asset_type_overrides.get(asset_type)
if not overrides:
return mvp_fields
logger.info("Applying {} asset type overrides for '{}'".format(len(overrides), asset_type))
for field_id, override_value in overrides.items():
field_found = False
for field in mvp_fields:
if field.get('id') == field_id:
field_found = True
# For tabular fields (like MAIN_LANGUAGES), update the 'values' array
if field.get('type') == 'com.artesia.metadata.MetadataTableField' or 'values' in field:
field['values'] = [
{
'cascading_domain_value': False,
'domain_value': True,
'is_locked': False,
'value': {
'expired_value': False,
'field_value': {
'type': 'string',
'value': override_value
},
'type': 'com.artesia.metadata.DomainValue'
}
}
]
logger.info("Asset type override: {} = {} (tabular)".format(field_id, override_value))
else:
self._set_field_value(field, override_value)
logger.info("Asset type override: {} = {}".format(field_id, override_value))
break
if not field_found:
logger.warning("Asset type override field '{}' not found in MVP fields - skipping".format(field_id))
return mvp_fields
def _add_missing_fields(self, mvp_fields, parsed_filename):
"""Add missing MVP fields from filename or defaults"""
field_ids = [f.get('id') for f in mvp_fields]
# Add MAIN_LANGUAGES if missing
if 'MAIN_LANGUAGES' not in field_ids and parsed_filename:
if parsed_filename.get('language_code'):
language = parsed_filename['language_code'].upper()
logger.info("Adding MAIN_LANGUAGES: {}".format(language))
mvp_fields.append({
'id': 'MAIN_LANGUAGES',
'parent_table_id': 'FERRERO.TABULAR.FIELD.MAIN LANGUAGES',
'type': 'com.artesia.metadata.MetadataTableField',
'values': [
{
'cascading_domain_value': False,
'domain_value': True,
'value': {
'field_value': {
'type': 'string',
'value': language
},
'type': 'com.artesia.metadata.DomainValue'
}
}
]
})
# Add other missing fields with defaults
field_ids = [f.get('id') for f in mvp_fields]
for field_id, default_value in self.defaults.items():
if field_id not in field_ids:
logger.info("Adding {} with default: {}".format(field_id, default_value))
# Check if it's a tabular field (contains .TABULAR. in parent table ID)
is_tabular = 'TABULAR' in field_id or field_id in [
'FERRERO.FIELD.ASSETCOMPLIANCE', 'MARKETING_TAG'
]
if is_tabular:
mvp_fields.append({
'id': field_id,
'parent_table_id': 'FERRERO.TABULAR.FIELD.' + field_id.split('.')[-1],
'type': 'com.artesia.metadata.MetadataTableField',
'values': [
{
'cascading_domain_value': False,
'domain_value': True,
'value': {
'field_value': {
'type': 'string',
'value': default_value
},
'type': 'com.artesia.metadata.DomainValue'
}
}
]
})
else:
# Domained fields need DomainValue wrapper format
domained_defaults = {
'FERRERO.FIELD.FISCAL YEAR',
'FERRERO.MARKETING.FIELD.AGENCY NAME',
'FERRERO.MARKET.PROD_COMPANY',
}
if field_id in domained_defaults:
mvp_fields.append({
'id': field_id,
'type': 'com.artesia.metadata.MetadataField',
'value': {
'cascading_domain_value': False,
'domain_value': True,
'value': {
'field_value': {
'type': 'string',
'value': default_value
},
'type': 'com.artesia.metadata.DomainValue'
}
}
})
else:
mvp_fields.append({
'id': field_id,
'type': 'com.artesia.metadata.MetadataField',
'value': {
'value': {
'type': 'string',
'value': default_value
}
}
})
return mvp_fields
def _apply_forced_values(self, mvp_fields):
"""
Apply forced values from config to existing fields.
For fields not yet present, adds them with DomainValue format.
Used in folder-only mode where _update_fields is not called.
"""
field_ids = [f.get('id') for f in mvp_fields]
for field_id, forced_value in self.forced_values.items():
if field_id in field_ids:
# Field exists - update via _set_field_value
for field in mvp_fields:
if field.get('id') == field_id:
self._set_field_value(field, forced_value)
logger.info("Forced value applied: {} = {}".format(field_id, forced_value))
break
else:
# Field not present - add with DomainValue format
mvp_fields.append({
'id': field_id,
'type': 'com.artesia.metadata.MetadataField',
'value': {
'cascading_domain_value': False,
'domain_value': True,
'value': {
'field_value': {'type': 'string', 'value': forced_value},
'type': 'com.artesia.metadata.DomainValue'
}
}
})
logger.info("Forced value added: {} = {}".format(field_id, forced_value))
return mvp_fields
def _map_country_code(self, iso_code):
"""
Map ISO country code to DAM country code
Args:
iso_code: ISO 3166-1 Alpha-2 code (e.g., 'BD', 'DE')
Returns:
str: DAM country code (e.g., 'BG' for BD, 'DE' for DE)
"""
if not iso_code:
return iso_code
iso_upper = iso_code.upper()
# Check if we have a mapping
if iso_upper in self.country_mappings:
dam_code = self.country_mappings[iso_upper]
if dam_code != iso_upper:
logger.info("Country code mapping: {} (ISO) -> {} (DAM)".format(iso_upper, dam_code))
return dam_code
else:
# No mapping found, use ISO code as-is
logger.debug("No mapping for country code: {} (using as-is)".format(iso_upper))
return iso_upper
def _load_asset_type_mappings(self):
"""
Load asset type mappings: 3-letter codes -> DAM codes
Returns:
dict: 3-letter code -> DAM code mapping
"""
import yaml
mapping_path = 'config/asset_type_mappings.yaml'
try:
with open(mapping_path, 'r') as f:
mappings = yaml.safe_load(f)
return mappings if mappings else {}
except Exception as e:
logger.warning("Could not load asset type mappings: {}".format(str(e)))
return {}
def _map_asset_type(self, three_letter_code):
"""
Map 3-letter asset type code to DAM code
Args:
three_letter_code: 3-letter code (e.g., 'EHI', 'IMG', 'TVC')
Returns:
DAM code (e.g., 'heroimage', 'keyvisual', 'tvc')
"""
if not three_letter_code:
return three_letter_code
code_upper = three_letter_code.upper()
# Check if we have a mapping
if code_upper in self.asset_type_mappings:
dam_code = self.asset_type_mappings[code_upper]
logger.info("Asset type mapping: {} -> {}".format(code_upper, dam_code))
return dam_code
# No mapping - return as-is
logger.warning("No mapping for asset type: {} - using as-is (may fail DAM validation)".format(code_upper))
return three_letter_code
def _build_fields_from_filename(self, parsed_filename, clean_filename):
"""
Build ALL metadata fields from parsed filename
Used in folder-only mode (tracking ID with -N suffix)
Uses DomainValue format for domained fields to match PROD DAM API requirements.
Maps asset type codes and country codes via lookup tables.
"""
fields = []
# ASSET NAME (non-domained, plain string)
fields.append({
'id': 'ARTESIA.FIELD.ASSET NAME',
'type': 'com.artesia.metadata.MetadataField',
'value': {'value': {'type': 'string', 'value': clean_filename}}
})
# DESCRIPTION (non-domained, plain string)
if parsed_filename.get('subject_title'):
fields.append({
'id': 'ARTESIA.FIELD.ASSET DESCRIPTION',
'type': 'com.artesia.metadata.MetadataField',
'value': {'value': {'type': 'string', 'value': parsed_filename['subject_title']}}
})
# BRAND (domained)
if parsed_filename.get('brand_code'):
fields.append({
'id': 'FERRERO.FIELD.BRAND',
'type': 'com.artesia.metadata.MetadataField',
'value': {
'cascading_domain_value': False,
'domain_value': True,
'value': {
'field_value': {'type': 'string', 'value': parsed_filename['brand_code']},
'type': 'com.artesia.metadata.DomainValue'
}
}
})
# COUNTRY (domained, map ISO code to DAM code)
if parsed_filename.get('country_code'):
dam_country_code = self._map_country_code(parsed_filename['country_code'])
fields.append({
'id': 'FERRERO.FIELD.COUNTRY',
'type': 'com.artesia.metadata.MetadataField',
'value': {
'cascading_domain_value': False,
'domain_value': True,
'value': {
'field_value': {'type': 'string', 'value': dam_country_code},
'type': 'com.artesia.metadata.DomainValue'
}
}
})
# LANGUAGE (non-domained, plain string)
if parsed_filename.get('language_code'):
fields.append({
'id': 'FERRERO.FIELD.LANGUAGES',
'type': 'com.artesia.metadata.MetadataField',
'value': {'value': {'type': 'string', 'value': parsed_filename['language_code']}}
})
# ASSET TYPE (domained, use config field ID and map code via lookup)
if parsed_filename.get('asset_type'):
# Use field ID from config (FERRERO.FIELD.MKTG.ASSET TYPE on PROD)
asset_type_field_id = 'FERRERO.FIELD.ASSET TYPE'
for field_id, config in self.filename_updates.items():
if config.get('source') == 'asset_type':
asset_type_field_id = field_id
break
mapped_asset_type = self._map_asset_type(parsed_filename['asset_type'])
fields.append({
'id': asset_type_field_id,
'type': 'com.artesia.metadata.MetadataField',
'value': {
'cascading_domain_value': False,
'domain_value': True,
'value': {
'field_value': {'type': 'string', 'value': mapped_asset_type},
'type': 'com.artesia.metadata.DomainValue'
}
}
})
# STATE (domained, forced to Local)
fields.append({
'id': 'FERRERO.FIELD.STATE',
'type': 'com.artesia.metadata.MetadataField',
'value': {
'cascading_domain_value': False,
'domain_value': True,
'value': {
'field_value': {'type': 'string', 'value': 'Local'},
'type': 'com.artesia.metadata.DomainValue'
}
}
})
# VALIDITY DATES (Start = Today, End = Today + 1 Year)
try:
today = datetime.now()
one_year_later = today + timedelta(days=365)
start_date_str = today.strftime('%m/%d/%Y')
end_date_str = one_year_later.strftime('%m/%d/%Y')
fields.append({
'id': 'FERRERO.FIELD.ASSET VALIDITY START PERIOD',
'type': 'com.artesia.metadata.MetadataField',
'value': {'value': {'type': 'string', 'value': start_date_str}}
})
fields.append({
'id': 'FERRERO.FIELD.ASSET VALIDITY END PERIOD',
'type': 'com.artesia.metadata.MetadataField',
'value': {'value': {'type': 'string', 'value': end_date_str}}
})
except Exception as e:
logger.error("Failed to set validity dates in folder-only mode: {}".format(str(e)))
logger.info("Built {} fields from filename (folder-only mode)".format(len(fields)))
return fields
def _get_field_value(self, field):
"""Get field value handling different structures"""
if 'value' in field:
if isinstance(field['value'], dict):
if 'value' in field['value'] and isinstance(field['value']['value'], dict):
if 'value' in field['value']['value']:
return field['value']['value']['value']
elif 'field_value' in field['value']['value']:
return field['value']['value']['field_value'].get('value')
return None
def _set_field_value(self, field, value):
"""Set field value handling different structures"""
import json
field_id = field.get('id', 'UNKNOWN')
logger.info("_set_field_value called for: {} with value: {}".format(field_id, value))
logger.info("Current field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None'))
if 'value' in field:
if isinstance(field['value'], dict):
# Try nested structure first (most common)
if 'value' in field['value'] and isinstance(field['value']['value'], dict):
if 'value' in field['value']['value']:
field['value']['value']['value'] = value
# Ensure type is set for CreativeX URL field
if field_id == 'FERRERO.FIELD.CREATIVEX LINK' and 'type' not in field['value']['value']:
field['value']['value']['type'] = 'string'
logger.info("Set via field['value']['value']['value']")
elif 'field_value' in field['value']['value']:
# DomainValue structure - update field_value
field['value']['value']['field_value']['value'] = value
# Also update display_value to match
if 'display_value' in field['value']['value']:
field['value']['value']['display_value'] = value
logger.info("Set via field['value']['value']['field_value']['value'] (DomainValue)")
else:
# If nested dict is empty, create the value structure with type
field['value']['value'] = {'type': 'string', 'value': value}
logger.info("Created field['value']['value'] = {{'type': 'string', 'value': {}}}".format(value))
else:
# If value dict is empty or doesn't have nested value, create it with type
field['value'] = {'value': {'type': 'string', 'value': value}}
logger.info("Created field['value'] = {{'value': {{'type': 'string', 'value': {}}}}}".format(value))
logger.info("After setting, field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None'))
def _set_date_field_value(self, field, date_string):
"""
Set date field value with proper type for DAM API
Args:
field: Field dict to update
date_string: Date as ISO string (YYYY-MM-DDTHH:mm:ss)
"""
field_id = field.get('id', 'UNKNOWN')
logger.info("_set_date_field_value called for: {} with value: {}".format(
field_id, date_string
))
if 'value' in field:
if isinstance(field['value'], dict):
if 'value' in field['value'] and isinstance(field['value']['value'], dict):
# Update existing nested structure
field['value']['value']['type'] = 'string'
field['value']['value']['value'] = date_string
logger.info("Set via field['value']['value'] with type 'string'")
else:
# Create nested structure
field['value'] = {
'value': {
'type': 'string',
'value': date_string
}
}
logger.info("Created field['value'] with type 'string'")
else:
# Create value structure from scratch
field['value'] = {
'value': {
'type': 'string',
'value': date_string
}
}
logger.info("Created field['value'] from scratch with type 'string'")
def _update_creativex_fields(self, mvp_fields, box_metadata):
"""
Update CreativeX fields from Box metadata template
Args:
mvp_fields: List of MVP fields
box_metadata: dict with 'score' and 'url' from Box template
Returns:
Updated mvp_fields list
"""
# Map Box metadata to DAM field IDs
creativex_mapping = {
'score': 'FERRERO.TAB.FIELD.CREATIVEX', # Platform > Rating (%)
'url': 'FERRERO.FIELD.CREATIVEX LINK' # CreativeX Hyperlink
}
if box_metadata.get('score'):
# Update CreativeX Score field (tabular field structure)
# New structure: Platform^Score (e.g., "Google Ads^100")
score_val = box_metadata['score']
platforms = box_metadata.get('platforms', [])
# If no platforms, skip adding the CREATIVEX field
# "Unknown" is not a valid platform in the DAM's cascading domain
if not platforms:
logger.warning("No Platforms mapped for CreativeX score - skipping CREATIVEX field (not a required field)")
else:
# Construct value objects for each platform
value_objects = []
for platform in platforms:
combined_value = "{}^{}".format(platform, score_val)
value_obj = {
"cascading_domain_value": True,
"domain_value": False,
"is_locked": False,
"value": {
"field_value": {
"type": "string",
"value": combined_value
},
"type": "com.artesia.metadata.CascadingDomainValue"
}
}
value_objects.append(value_obj)
logger.info("Constructed CreativeX value: {}".format(combined_value))
score_field_found = False
for field in mvp_fields:
if field.get('id') == 'FERRERO.TAB.FIELD.CREATIVEX':
score_field_found = True
try:
# Replace values list with new list of objects
field['values'] = value_objects
logger.info("Set CREATIVEX field with {} values".format(len(value_objects)))
except Exception as e:
logger.error("Failed to set CreativeX Score: {}".format(str(e)))
import traceback
logger.error(traceback.format_exc())
break
if not score_field_found:
logger.warning("CREATIVEX Score field not found in master metadata - adding it now")
# Create the field structure (tabular field)
creativex_score_field = {
"type": "com.artesia.metadata.MetadataTableField",
"id": "FERRERO.TAB.FIELD.CREATIVEX",
"parent_table_id": "FERRERO.TABULAR.FIELD.CREATIVEX",
"values": value_objects
}
mvp_fields.append(creativex_score_field)
logger.info("Added CREATIVEX Score field with {} values".format(len(value_objects)))
if box_metadata.get('url'):
# Update CreativeX URL field
logger.info("Updating CreativeX URL from database: {}".format(box_metadata['url']))
url_field_found = False
for field in mvp_fields:
if field.get('id') == 'FERRERO.FIELD.CREATIVEX LINK':
url_field_found = True
try:
# Log field structure before setting
logger.info("CREATIVEX URL field structure: {}".format(field.get('value', {}).keys() if isinstance(field.get('value'), dict) else 'not a dict'))
self._set_field_value(field, box_metadata['url'])
logger.info("Set CREATIVEX LINK to: {}".format(box_metadata['url']))
except Exception as e:
logger.error("Failed to set CreativeX URL: {}".format(str(e)))
import traceback
logger.error(traceback.format_exc())
break
if not url_field_found:
logger.warning("CREATIVEX URL field not found in master metadata - adding it now")
# Create the field structure (text field)
creativex_url_field = {
'id': 'FERRERO.FIELD.CREATIVEX LINK',
'name': 'CreativeX Hyperlink',
'type': 'com.artesia.metadata.MetadataField',
'value': {
'value': {
'type': 'string',
'value': box_metadata['url']
}
},
'data_type': 'CHAR',
'required': False
}
mvp_fields.append(creativex_url_field)
logger.info("Added CREATIVEX URL field with value: {}".format(box_metadata['url']))
return mvp_fields
def _add_master_asset_id_field(self, mvp_fields, master_opentext_id):
"""
Add Master Asset ID field (configurable via MASTER_ASSET_ID_FIELD in .env)
Args:
mvp_fields: List of MVP fields
master_opentext_id: DAM Asset ID of the master asset
Returns:
Updated mvp_fields list
"""
# Read configured field ID from environment, default to legacy ARTESIA field
master_field_id = os.environ.get('MASTER_ASSET_ID_FIELD', 'ARTESIA.FIELD.ASSET_ID')
logger.info("Using Master Asset ID field: {} (Value: {})".format(master_field_id, master_opentext_id))
# Check if field already exists in MVP fields (update scenario)
for field in mvp_fields:
field_id = self._get_field_id(field)
if field_id == master_field_id:
# Update existing field value
# If tabular, we need special handling, but _add_missing_fields usually won't add this
# so we assume if it exists, we just update the value
if 'TABULAR' in master_field_id:
# Tabular field update logic would go here if needed
# For now, assuming we are creating it new mostly
pass
self._set_field_value(field, master_opentext_id)
logger.info("Updated existing Master Asset ID field: {}".format(master_opentext_id))
return mvp_fields
# Field doesn't exist - add new field
# Check if it's a tabular field
if 'TABULAR' in master_field_id:
# Construct tabular field structure using SIMPLE structure (no MetadataTableFieldRow wrapper)
# Logic updated based on Staging Definition:
# Parent: FERRERO.TABULAR.FIELD.MASTERASSETIDS
# Child Column: FERRERO.MASTERASSETIDS
# Determine child column ID
if master_field_id == 'FERRERO.TABULAR.FIELD.MASTERASSETIDS':
child_column_id = 'FERRERO.MASTERASSETIDS'
else:
# Fallback for other potential tabular fields
child_column_id = master_field_id
new_field = {
'id': child_column_id,
'parent_table_id': master_field_id,
'type': 'com.artesia.metadata.MetadataTableField',
'values': [
{
'cascading_domain_value': False,
'domain_value': True,
'is_locked': False,
'value': {
'field_value': {
'type': 'string',
'value': master_opentext_id
},
'type': 'com.artesia.metadata.DomainValue'
}
}
]
}
mvp_fields.append(new_field)
logger.info("Added new TABULAR Master Asset ID field: {} (Parent: {})".format(child_column_id, master_field_id))
else:
# Standard Text Field
mvp_fields.append({
'id': master_field_id,
'type': 'com.artesia.metadata.MetadataField',
'value': {
'value': {
'type': 'string',
'value': master_opentext_id
}
}
})
logger.info("Added new Master Asset ID field: {}".format(master_field_id))
return mvp_fields
def _add_master_asset_ids_field(self, mvp_fields, master_opentext_ids):
"""
Add FERRERO.MASTERASSETIDS tabular field with multiple master asset IDs
Supports Many-to-Many relationship between derivatives and masters (PPR ONLY)
Args:
mvp_fields: List of MVP fields
master_opentext_ids: List of DAM Asset IDs of master assets
Returns:
Updated mvp_fields list with FERRERO.MASTERASSETIDS
"""
if not master_opentext_ids or len(master_opentext_ids) == 0:
logger.info("No master_opentext_ids provided - skipping FERRERO.MASTERASSETIDS field")
return mvp_fields
# Check if field already exists
for field in mvp_fields:
if self._get_field_id(field) == 'FERRERO.MASTERASSETIDS':
logger.info("FERRERO.MASTERASSETIDS already present - skipping")
return mvp_fields
# Build values array with all master asset IDs
values = []
for master_id in master_opentext_ids:
values.append({
'cascading_domain_value': False,
'domain_value': False,
'is_locked': False,
'value': {
'type': 'string',
'value': master_id
}
})
# Create tabular field
new_field = {
'id': 'FERRERO.MASTERASSETIDS',
'parent_table_id': 'FERRERO.TABULAR.FIELD.MASTERASSETIDS',
'type': 'com.artesia.metadata.MetadataTableField',
'values': values
}
mvp_fields.append(new_field)
logger.info("Added FERRERO.MASTERASSETIDS field with {} master asset ID(s): {}".format(
len(values), ', '.join(master_opentext_ids[:3]) + ('...' if len(master_opentext_ids) > 3 else '')))
return mvp_fields
def _get_field_id(self, field):
"""Extract field ID from field dict"""
if isinstance(field, dict):
return field.get('id', '')
return ''