""" Metadata Extractor MVP - Extract MVP fields from master metadata Ported from PHP MetadataExtractorMVP.php Compatible with Python 3.6+ """ import logging import json import copy from datetime import datetime, timedelta import os from shared.config_loader import load_country_code_mappings logger = logging.getLogger('MetadataExtractorMVP') # Editor field name -> DAM metadata field ID. # Mirrors the canonical mapping in the naming tool's public-v2/Database.php # so that pre-upload overrides saved via the metadata editor are applied to # the matching DAM fields on upload. OVERRIDE_FIELD_MAP = { 'validity_start': 'FERRERO.FIELD.ASSET VALIDITY START PERIOD', 'validity_end': 'FERRERO.FIELD.ASSET VALIDITY END PERIOD', 'marketing_tag': 'MARKETING_TAG', 'agency_name': 'FERRERO.MARKETING.FIELD.AGENCY NAME', 'spot_version': 'FERRERO.MARKETING.FIELD.SPOT_VERSION', 'director_name': 'FERRERO.MARKETING.FIELD.DIRECTOR_NAME', 'video_post_prod_company': 'FERRERO.MARKETING.FIELD.VIDEO_POST_PROD_COMPANY', 'video_post_prod_contact': 'FERRERO.MARKETING.FIELD.VID_POST_PROD_CONTACT', 'audio_post_prod_company': 'FERRERO.MARKETING.FIELD.AUDIO_POST_PROD_COMPANY', 'audio_post_prod_contact': 'FERRERO.MARKETING.FIELD.AUDIO_POST_PROD_CONTACT', 'video_type': 'FERRERO.MARKET.FIELD.TYPE_VID', 'ip_rights': 'FERRERO.MARKET.FIELD.IPRIGHT', 'production_company': 'FERRERO.MARKET.PROD_COMPANY', 'licensing': 'FERRERO.MARKET.FIELD.LICENSIN', 'buyout': 'FERRERO.MARKET.FIELD.BUYOUT', 'ferrero_property': 'FERRERO.MARKET.FIELD.FERRERO PROPERTY', 'video_status': 'FERRERO.MARKET.VID_N_STAT', 'license': 'FERRERO.MARKET.FIELD.LICENSE', 'creativex_score': 'FERRERO.TAB.FIELD.CREATIVEX', 'creativex_link': 'FERRERO.FIELD.CREATIVEX LINK', } DATE_OVERRIDE_FIELDS = {'validity_start', 'validity_end'} class MetadataExtractorMVP: def __init__(self, field_mappings): """ Initialize with field mappings from config Args: field_mappings: dict from field_mappings.yaml """ self.mvp_field_ids = field_mappings['mvp_fields'] self.filename_updates = field_mappings.get('filename_updates', {}) self.forced_values = field_mappings.get('forced_values', {}) self.defaults = field_mappings.get('defaults', {}) self.asset_type_overrides = field_mappings.get('asset_type_overrides', {}) # Load country code mappings (ISO -> DAM codes) self.country_mappings = load_country_code_mappings() if self.country_mappings: logger.info("Loaded {} country code mappings (ISO->DAM)".format(len(self.country_mappings))) # Load asset type mappings (3-letter codes -> DAM codes) self.asset_type_mappings = self._load_asset_type_mappings() if self.asset_type_mappings: logger.info("Loaded {} asset type mappings (3-letter->DAM)".format(len(self.asset_type_mappings))) # Load asset representation template for folder-only mode self.template_fields = self._load_asset_representation_template() if self.template_fields: logger.info("Loaded asset representation template with {} fields".format(len(self.template_fields))) def _load_asset_representation_template(self): """Load the asset representation template JSON for folder-only mode""" template_path = 'config/asset_representation_template.json' try: with open(template_path, 'r') as f: data = json.load(f) return data['asset_resource']['asset']['metadata']['metadata_element_list'] except Exception as e: logger.warning("Could not load asset representation template: {}".format(str(e))) return [] def extract_mvp_fields(self, master_metadata): """ Extract only MVP fields from full master metadata Args: master_metadata: Complete DAM asset metadata Returns: List of MVP field objects """ extracted_fields = [] found_field_ids = [] # Navigate to metadata structure # master_metadata is the full asset, need to go to: metadata.metadata_element_list metadata_list = [] if isinstance(master_metadata, dict): if 'metadata' in master_metadata and 'metadata_element_list' in master_metadata['metadata']: metadata_list = master_metadata['metadata']['metadata_element_list'] logger.info("Using master_metadata['metadata']['metadata_element_list']") logger.info("Searching through {} categories for MVP fields".format(len(metadata_list))) # Search through categories for MVP fields for item in metadata_list: if 'metadata_element_list' in item: # Category with nested fields for field in item['metadata_element_list']: field_id = field.get('id') if field_id in self.mvp_field_ids: extracted_fields.append(field) found_field_ids.append(field_id) logger.debug("Found MVP field: {}".format(field_id)) # Check if this is a MetadataTable containing nested fields if field.get('type') == 'com.artesia.metadata.MetadataTable' and 'metadata_element_list' in field: logger.debug("Found MetadataTable: {}, searching inside...".format(field_id)) # Search inside the table for MVP fields for nested_field in field['metadata_element_list']: nested_field_id = nested_field.get('id') if nested_field_id in self.mvp_field_ids: extracted_fields.append(nested_field) found_field_ids.append(nested_field_id) logger.info("Found MVP field inside MetadataTable: {}".format(nested_field_id)) elif 'id' in item and item['id'] in self.mvp_field_ids: # Direct field extracted_fields.append(item) found_field_ids.append(item['id']) logger.debug("Found direct MVP field: {}".format(item['id'])) # Log results missing = [f for f in self.mvp_field_ids if f not in found_field_ids] logger.info("Found {}/{} MVP fields".format(len(found_field_ids), len(self.mvp_field_ids))) if missing: logger.info("Missing fields: {}".format(', '.join(missing[:5]))) return extracted_fields def build_mvp_asset_representation(self, master_metadata, clean_filename, parsed_filename, box_metadata=None, tracking_mode='full', master_opentext_id=None, master_opentext_ids=None, override_fields=None): """ Build asset representation with MVP fields + updates from filename Args: master_metadata: Full master asset metadata clean_filename: Clean filename (stripped) parsed_filename: Parsed V2 filename dict box_metadata: Optional Box metadata tracking_mode: 'full' (inherit all metadata) or 'folder_only' (only use folder) master_opentext_id: Optional DAM Asset ID of master asset (for derivative tracking) override_fields: Optional dict of pre-upload metadata overrides keyed by editor field name (e.g. {'validity_end': '...', 'ip_rights': 'Yes'}). Applied after master/filename/forced values but before asset-type overrides so EOL/LTD compliance still wins. Empty values are skipped. Returns: Asset representation dict ready for upload """ if tracking_mode == 'full': # FULL INHERITANCE MODE - Standard behavior logger.info("Full inheritance mode - using master metadata") # Extract MVP fields from master mvp_fields = self.extract_mvp_fields(master_metadata) # Update fields from filename and forced values mvp_fields = self._update_fields(mvp_fields, clean_filename, parsed_filename) elif tracking_mode == 'folder_only': # FOLDER ONLY MODE - New asset, only use upload folder logger.info("Folder-only mode (-N suffix) - building metadata from filename only") logger.warning("Note: Upload folder comes from master, all other metadata from filename") # Start with empty fields, build from filename mvp_fields = [] mvp_fields = self._build_fields_from_filename(parsed_filename, clean_filename) # Apply forced values from config (e.g., AGENCY NAME) # STATE is already handled in _build_fields_from_filename mvp_fields = self._apply_forced_values(mvp_fields) # Add missing MVP fields with defaults (both modes) mvp_fields = self._add_missing_fields(mvp_fields, parsed_filename) # Add empty required fields that DAM expects (even if empty) - folder-only mode needs these mvp_fields = self._add_empty_required_fields(mvp_fields) # Update CreativeX fields from Box metadata if provided if box_metadata: mvp_fields = self._update_creativex_fields(mvp_fields, box_metadata) # Apply pre-upload metadata overrides from the naming tool's editor. # Runs after master/filename/forced/default/CreativeX values so it wins # over them, but before asset_type_overrides so EOL/LTD compliance rules # still take final precedence. if override_fields: mvp_fields = self._apply_override_fields(mvp_fields, override_fields) # Apply asset type overrides (e.g., EOL, LTD) - takes final precedence over # forced values, defaults, and CreativeX (LTD removes CreativeX entirely). mvp_fields = self._apply_asset_type_overrides(mvp_fields, parsed_filename) # Add MASTERASSETIDS field with all master IDs # Priority: Use master_opentext_ids if provided (multiple IDs), otherwise fall back to single master_opentext_id if master_opentext_ids and len(master_opentext_ids) > 0: mvp_fields = self._add_master_asset_ids_field(mvp_fields, master_opentext_ids) if len(master_opentext_ids) > 1: logger.info("PPR - Added MASTERASSETIDS field with {} master IDs".format(len(master_opentext_ids))) else: logger.info("Added MASTERASSETIDS field with 1 master ID") elif master_opentext_id: # Fallback to single master ID if master_opentext_ids not provided mvp_fields = self._add_master_asset_id_field(mvp_fields, master_opentext_id) logger.info("Added Master Asset ID field: {}".format(master_opentext_id)) # Build asset representation asset_rep = { 'asset_resource': { 'asset': { 'metadata': { 'metadata_element_list': mvp_fields }, 'metadata_model_id': 'ECOMMERCE', 'security_policy_list': [ {'id': 1594} ] } } } logger.info("Built MVP asset representation with {} fields".format(len(mvp_fields))) return asset_rep def _update_fields(self, mvp_fields, clean_filename, parsed_filename): """Update specific fields from filename and forced values""" # Process filename_updates from configuration for field_id, config in self.filename_updates.items(): source = config.get('source') transform = config.get('transform', '') # Get value from appropriate source if source == 'clean_filename': value = clean_filename elif source and parsed_filename: value = parsed_filename.get(source) else: continue if not value: continue # Apply transform if specified if transform == 'uppercase': value = value.upper() elif transform == 'lowercase': value = value.lower() # Apply asset type mapping if this is the asset type field if field_id == 'FERRERO.FIELD.MKTG.ASSET TYPE' and source == 'asset_type': value = self._map_asset_type(value) # Update the field for field in mvp_fields: if field.get('id') == field_id: # For tabular fields (like MAIN_LANGUAGES), update the 'values' array # The DAM reads from 'values' (plural), not 'value' (singular) if field.get('type') == 'com.artesia.metadata.MetadataTableField' or 'values' in field: field['values'] = [ { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'expired_value': False, 'field_value': { 'type': 'string', 'value': value }, 'type': 'com.artesia.metadata.DomainValue' } } ] logger.info("Updated tabular field {} values array from filename: {}".format(field_id, value)) else: self._set_field_value(field, value) logger.info("Updated {} from filename: {}".format(field_id, value)) break # Apply country code mapping (ISO -> DAM codes) for field in mvp_fields: if field.get('id') == 'FERRERO.FIELD.COUNTRY': current_value = self._get_field_value(field) if current_value: mapped_value = self._map_country_code(current_value) if mapped_value != current_value: self._set_field_value(field, mapped_value) logger.info("Mapped country code: {} -> {}".format(current_value, mapped_value)) # Apply forced values from configuration for field_id, forced_value in self.forced_values.items(): for field in mvp_fields: if field.get('id') == field_id: self._set_field_value(field, forced_value) logger.info("Set {} to {}".format(field_id, forced_value)) break # Apply defaults to empty existing fields for field in mvp_fields: field_id = field.get('id') if field_id in self.defaults: current_value = self._get_field_value(field) if not current_value: # Field exists but is empty/None default_value = self.defaults[field_id] self._set_field_value(field, default_value) logger.info("Applied default to empty field {}: {}".format(field_id, default_value)) # Set Asset Validity Dates (Start = Today, End = Today + 1 Year) # Field 4: Date the asset was uploaded # Field 5: Add 1 year from date provided above try: today = datetime.now() one_year_later = today + timedelta(days=365) # Convert to US Date Format (MM/DD/YYYY) # This is the format the DAM expects for date fields start_date_str = today.strftime('%m/%d/%Y') end_date_str = one_year_later.strftime('%m/%d/%Y') date_fields = { 'FERRERO.FIELD.ASSET VALIDITY START PERIOD': start_date_str, 'FERRERO.FIELD.ASSET VALIDITY END PERIOD': end_date_str } for field_id, value in date_fields.items(): field_found = False for field in mvp_fields: if field.get('id') == field_id: # Use specialized method for date fields self._set_date_field_value(field, value) logger.info("Set {} to {} ms (Upload Date Logic)".format(field_id, value)) field_found = True break if not field_found: # Add new date field with proper structure mvp_fields.append({ 'id': field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'value': { 'type': 'string', 'value': value } } }) logger.info("Added {} with value {} (Upload Date Logic)".format(field_id, value)) except Exception as e: logger.error("Failed to set validity dates: {}".format(str(e))) return mvp_fields def _apply_asset_type_overrides(self, mvp_fields, parsed_filename): """ Apply asset type overrides when a matching asset type (e.g., EOL) is detected in the filename. These overrides take final precedence over forced values and defaults. Args: mvp_fields: List of MVP field objects parsed_filename: Parsed filename dict (must contain 'asset_type' key) Returns: Updated mvp_fields list """ if not parsed_filename: return mvp_fields asset_type = parsed_filename.get('asset_type') if not asset_type: return mvp_fields overrides = self.asset_type_overrides.get(asset_type) if not overrides: return mvp_fields logger.info("Applying {} asset type overrides for '{}'".format(len(overrides), asset_type)) for field_id, override_value in overrides.items(): # Empty string means remove the field entirely if override_value == '': before_count = len(mvp_fields) mvp_fields = [f for f in mvp_fields if f.get('id') != field_id] if len(mvp_fields) < before_count: logger.info("Asset type override: removed field {}".format(field_id)) else: logger.debug("Asset type override: field {} not present (nothing to remove)".format(field_id)) continue field_found = False for field in mvp_fields: if field.get('id') == field_id: field_found = True # For tabular fields (like MAIN_LANGUAGES), update both 'value' and 'values' if field.get('type') == 'com.artesia.metadata.MetadataTableField' or 'values' in field: domain_value_obj = { 'type': 'com.artesia.metadata.DomainValue', 'field_value': {'type': 'string', 'value': override_value}, 'display_value': override_value, 'expired_value': False, 'active_to': '', 'active_from': '' } field['value'] = { 'value': domain_value_obj, 'is_locked': False, 'domain_value': True, 'cascading_domain_value': False } field['values'] = [ { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'expired_value': False, 'field_value': { 'type': 'string', 'value': override_value }, 'type': 'com.artesia.metadata.DomainValue' } } ] logger.info("Asset type override: {} = {} (tabular)".format(field_id, override_value)) else: self._set_field_value(field, override_value) logger.info("Asset type override: {} = {}".format(field_id, override_value)) break if not field_found: # Field not present yet (e.g. description has no subject_title from filename). # Append as a simple string field so the override still takes effect. Tabular # / domained overrides aren't supported here — they should already be in # mvp_fields via _add_missing_fields. mvp_fields.append({ 'id': field_id, 'value': {'value': {'type': 'string', 'value': override_value}} }) logger.info("Asset type override: {} = {} (added missing field)".format(field_id, override_value)) return mvp_fields def _add_missing_fields(self, mvp_fields, parsed_filename): """Add missing MVP fields from filename or defaults""" field_ids = [f.get('id') for f in mvp_fields] # Add MAIN_LANGUAGES if missing if 'MAIN_LANGUAGES' not in field_ids and parsed_filename: if parsed_filename.get('language_code'): language = parsed_filename['language_code'].upper() logger.info("Adding MAIN_LANGUAGES: {}".format(language)) domain_value_obj = { 'type': 'com.artesia.metadata.DomainValue', 'field_value': {'type': 'string', 'value': language}, 'display_value': language, 'expired_value': False, 'active_to': '', 'active_from': '' } mvp_fields.append({ 'id': 'MAIN_LANGUAGES', 'name': 'MAIN LANGUAGES', 'parent_table_id': 'FERRERO.TABULAR.FIELD.MAIN LANGUAGES', 'type': 'com.artesia.metadata.MetadataTableField', 'value': { 'value': domain_value_obj, 'is_locked': False, 'domain_value': True, 'cascading_domain_value': False }, 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'expired_value': False, 'field_value': {'type': 'string', 'value': language}, 'type': 'com.artesia.metadata.DomainValue' } } ], 'tabular': True, 'domained': True, 'required': True, 'domain_id': 'FERRERO.DOMAIN.MAIN LAGUAGES_LU' }) # Add other missing fields with defaults field_ids = [f.get('id') for f in mvp_fields] for field_id, default_value in self.defaults.items(): if field_id in field_ids: # Field exists (e.g. from template) - check if value is empty and set default for field in mvp_fields: if field.get('id') == field_id: # Tabular fields use 'values' array - skip if already populated if field.get('type') == 'com.artesia.metadata.MetadataTableField': if field.get('values'): break # Already has values # Empty tabular - fall through to add as new below break # Regular field - check if it has an actual value set val = field.get('value', {}) has_value = 'value' in val and isinstance(val.get('value'), dict) and 'value' in val['value'] if not has_value: # Use DomainValue format for domained fields if field.get('domained', False): field['value'] = { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'active_from': '', 'active_to': '', 'display_value': default_value, 'expired_value': False, 'field_value': {'type': 'string', 'value': default_value}, 'type': 'com.artesia.metadata.DomainValue' } } else: field['value'] = {'value': {'type': 'string', 'value': default_value}} logger.info("Set default on template field {} = {}".format(field_id, default_value)) break continue if field_id not in field_ids: logger.info("Adding {} with default: {}".format(field_id, default_value)) # Check if it's a tabular field (contains .TABULAR. in parent table ID) is_tabular = 'TABULAR' in field_id or field_id in [ 'FERRERO.FIELD.ASSETCOMPLIANCE', 'MARKETING_TAG' ] if is_tabular: # Map field IDs to correct parent table IDs parent_table_map = { 'FERRERO.FIELD.ASSETCOMPLIANCE': 'FERRERO.TABULAR.FIELD.ASSETCOMPLIANCE', 'MARKETING_TAG': 'FERRERO.TABULAR.FIELD.MARKETING.TAG', } parent_table_id = parent_table_map.get(field_id, 'FERRERO.TABULAR.FIELD.' + field_id.split('.')[-1]) domain_value_obj = { 'type': 'com.artesia.metadata.DomainValue', 'field_value': {'type': 'string', 'value': default_value}, 'display_value': default_value, 'expired_value': False, 'active_to': '', 'active_from': '' } mvp_fields.append({ 'id': field_id, 'parent_table_id': parent_table_id, 'type': 'com.artesia.metadata.MetadataTableField', 'value': { 'value': domain_value_obj, 'is_locked': False, 'domain_value': True, 'cascading_domain_value': False }, 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'expired_value': False, 'field_value': { 'type': 'string', 'value': default_value }, 'type': 'com.artesia.metadata.DomainValue' } } ], 'tabular': True, 'domained': True }) else: # Non-domain fields use simple value structure non_domain_fields = [ 'FERRERO.MARKETING.FIELD.VIDEO_POST_PROD_COMPANY', 'FERRERO.MARKETING.FIELD.AUDIO_POST_PROD_COMPANY', ] if field_id in non_domain_fields: mvp_fields.append({ 'id': field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'value': { 'type': 'string', 'value': default_value } } }) else: mvp_fields.append({ 'id': field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'field_value': { 'type': 'string', 'value': default_value }, 'type': 'com.artesia.metadata.DomainValue' } } }) return mvp_fields def _apply_forced_values(self, mvp_fields): """ Apply forced values from config to existing fields. For fields not yet present, adds them with DomainValue format. Used in folder-only mode where _update_fields is not called. """ field_ids = [f.get('id') for f in mvp_fields] for field_id, forced_value in self.forced_values.items(): if field_id in field_ids: # Field exists - set value with proper format based on field type for field in mvp_fields: if field.get('id') == field_id: if field.get('domained', False): field['value'] = { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'active_from': '', 'active_to': '', 'display_value': forced_value, 'expired_value': False, 'field_value': {'type': 'string', 'value': forced_value}, 'type': 'com.artesia.metadata.DomainValue' } } else: self._set_field_value(field, forced_value) logger.info("Forced value applied: {} = {}".format(field_id, forced_value)) break else: # Field not present - add with DomainValue format mvp_fields.append({ 'id': field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'field_value': {'type': 'string', 'value': forced_value}, 'type': 'com.artesia.metadata.DomainValue' } } }) logger.info("Forced value added: {} = {}".format(field_id, forced_value)) return mvp_fields def _add_empty_required_fields(self, mvp_fields): """ Add fields that the DAM expects to be present even if empty. In full-inheritance mode these come from the master asset. In folder-only mode they must be explicitly added. Only adds fields not already present. """ field_ids = [f.get('id') for f in mvp_fields] # Empty value structure for domained fields with no value set empty_domained_value = { 'is_locked': False, 'domain_value': False, 'cascading_domain_value': False } # Fields with empty domained values empty_domained_fields = [ 'FERRERO.FIELD.MARKETING.FLAVOUR', 'FERRERO.FIELD.MARKETING.SIZE', 'FERRERO.FIELD.SUB BRAND', 'FERRERO.MARKET.FIELD.BUYOUT', 'FERRERO.MARKET.FIELD.FERRERO PROPERTY', 'FERRERO.MARKET.VID_N_STAT', 'FERRERO.MARKETING.FIELD.SPOT_VERSION', ] for field_id in empty_domained_fields: if field_id not in field_ids: mvp_fields.append({ 'id': field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': dict(empty_domained_value) }) # Fields with empty non-domained values empty_plain_fields = [ 'FERRERO.MARKETING.FIELD.DIRECTOR_NAME', 'FERRERO.MARKETING.FIELD.VID_POST_PROD_CONTACT', 'FERRERO.MARKETING.FIELD.AUDIO_POST_PROD_CONTACT', 'FERRERO.MARKET.FIELD.LICENSE', ] for field_id in empty_plain_fields: if field_id not in field_ids: mvp_fields.append({ 'id': field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'is_locked': False, 'domain_value': False, 'cascading_domain_value': False } }) # Domained fields with default "No" value no_value_fields = [ 'FERRERO.MARKET.FIELD.IPRIGHT', 'FERRERO.MARKET.FIELD.LICENSIN', ] for field_id in no_value_fields: if field_id not in field_ids: mvp_fields.append({ 'id': field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'value': { 'type': 'com.artesia.metadata.DomainValue', 'field_value': {'type': 'string', 'value': 'No'}, 'display_value': 'No', 'expired_value': False, 'active_to': '', 'active_from': '' }, 'is_locked': False, 'domain_value': True, 'cascading_domain_value': False } }) # Empty tabular field: Type of Video & Static Right if 'FERRERO.MARKET.FIELD.TYPE_VID' not in field_ids: mvp_fields.append({ 'id': 'FERRERO.MARKET.FIELD.TYPE_VID', 'parent_table_id': 'FERRERO.TABULAR.VID_STAT_TYPE', 'type': 'com.artesia.metadata.MetadataTableField', 'values': [], 'tabular': True, 'domained': True }) added_count = len(mvp_fields) - len(field_ids) if added_count > 0: logger.info("Added {} empty required fields for DAM compatibility".format(added_count)) return mvp_fields def _map_country_code(self, iso_code): """ Map ISO country code to DAM country code Args: iso_code: ISO 3166-1 Alpha-2 code (e.g., 'BD', 'DE') Returns: str: DAM country code (e.g., 'BG' for BD, 'DE' for DE) """ if not iso_code: return iso_code iso_upper = iso_code.upper() # Check if we have a mapping if iso_upper in self.country_mappings: dam_code = self.country_mappings[iso_upper] if dam_code != iso_upper: logger.info("Country code mapping: {} (ISO) -> {} (DAM)".format(iso_upper, dam_code)) return dam_code else: # No mapping found, use ISO code as-is logger.debug("No mapping for country code: {} (using as-is)".format(iso_upper)) return iso_upper def _load_asset_type_mappings(self): """ Load asset type mappings: 3-letter codes -> DAM codes Returns: dict: 3-letter code -> DAM code mapping """ import yaml mapping_path = 'config/asset_type_mappings.yaml' try: with open(mapping_path, 'r') as f: mappings = yaml.safe_load(f) return mappings if mappings else {} except Exception as e: logger.warning("Could not load asset type mappings: {}".format(str(e))) return {} def _map_asset_type(self, three_letter_code): """ Map 3-letter asset type code to DAM code Args: three_letter_code: 3-letter code (e.g., 'EHI', 'IMG', 'TVC') Returns: DAM code (e.g., 'heroimage', 'keyvisual', 'tvc') """ if not three_letter_code: return three_letter_code code_upper = three_letter_code.upper() # Check if we have a mapping if code_upper in self.asset_type_mappings: dam_code = self.asset_type_mappings[code_upper] logger.info("Asset type mapping: {} -> {}".format(code_upper, dam_code)) return dam_code # No mapping - return as-is logger.warning("No mapping for asset type: {} - using as-is (may fail DAM validation)".format(code_upper)) return three_letter_code def _build_fields_from_filename(self, parsed_filename, clean_filename): """ Build ALL metadata fields from parsed filename using the reference template. Used in folder-only mode (tracking ID with -N suffix). Deep copies the asset representation template and populates values from the parsed filename. This ensures all fields have the full metadata structure (column_name, data_type, etc.) that the DAM API requires. """ if not self.template_fields: logger.error("No asset representation template loaded - folder-only mode cannot proceed") return [] # Deep copy the template so we don't modify the original fields = copy.deepcopy(self.template_fields) # Build lookup for quick access fields_by_id = {f['id']: f for f in fields} # Helper to set a domained field value with DomainValue structure def set_domained_value(field, value): field['value'] = { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'active_from': '', 'active_to': '', 'display_value': value, 'expired_value': False, 'field_value': {'type': 'string', 'value': value}, 'type': 'com.artesia.metadata.DomainValue' } } # Helper to set a plain string field value def set_string_value(field, value): field['value'] = {'value': {'type': 'string', 'value': value}} # --- Populate fields from filename --- # ASSET NAME if 'ARTESIA.FIELD.ASSET NAME' in fields_by_id: set_string_value(fields_by_id['ARTESIA.FIELD.ASSET NAME'], clean_filename) # DESCRIPTION if parsed_filename.get('subject_title') and 'ARTESIA.FIELD.ASSET DESCRIPTION' in fields_by_id: set_string_value(fields_by_id['ARTESIA.FIELD.ASSET DESCRIPTION'], parsed_filename['subject_title']) # Note: BRAND and COUNTRY are NOT set in the metadata payload. # They are inherited from the DAM folder structure. # ASSET TYPE (use config field ID, map code via lookup) if parsed_filename.get('asset_type'): asset_type_field_id = 'FERRERO.FIELD.ASSET TYPE' for field_id, config in self.filename_updates.items(): if config.get('source') == 'asset_type': asset_type_field_id = field_id break mapped_asset_type = self._map_asset_type(parsed_filename['asset_type']) if asset_type_field_id in fields_by_id: set_domained_value(fields_by_id[asset_type_field_id], mapped_asset_type) # STATE (forced to Local) if 'FERRERO.FIELD.STATE' in fields_by_id: set_domained_value(fields_by_id['FERRERO.FIELD.STATE'], 'Local') # MAIN_LANGUAGES (tabular field — populate values array from language_code) if parsed_filename.get('language_code') and 'MAIN_LANGUAGES' in fields_by_id: language = parsed_filename['language_code'].upper() fields_by_id['MAIN_LANGUAGES']['values'] = [ { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'expired_value': False, 'field_value': {'type': 'string', 'value': language}, 'type': 'com.artesia.metadata.DomainValue' } } ] logger.info("Set MAIN_LANGUAGES (folder-only mode): {}".format(language)) # VALIDITY DATES (Start = Today, End = Today + 1 Year) try: today = datetime.now() one_year_later = today + timedelta(days=365) start_date_str = today.strftime('%m/%d/%Y') end_date_str = one_year_later.strftime('%m/%d/%Y') if 'FERRERO.FIELD.ASSET VALIDITY START PERIOD' in fields_by_id: set_string_value(fields_by_id['FERRERO.FIELD.ASSET VALIDITY START PERIOD'], start_date_str) if 'FERRERO.FIELD.ASSET VALIDITY END PERIOD' in fields_by_id: set_string_value(fields_by_id['FERRERO.FIELD.ASSET VALIDITY END PERIOD'], end_date_str) except Exception as e: logger.error("Failed to set validity dates in folder-only mode: {}".format(str(e))) logger.info("Built {} fields from template (folder-only mode)".format(len(fields))) return fields def _get_field_value(self, field): """Get field value handling different structures""" if 'value' in field: if isinstance(field['value'], dict): if 'value' in field['value'] and isinstance(field['value']['value'], dict): if 'value' in field['value']['value']: return field['value']['value']['value'] elif 'field_value' in field['value']['value']: return field['value']['value']['field_value'].get('value') return None def _apply_override_fields(self, mvp_fields, override_fields): """ Apply pre-upload metadata overrides from the naming tool. For each non-empty entry in override_fields, map the editor field name to its DAM field ID via OVERRIDE_FIELD_MAP and write the value into the matching field in mvp_fields. Empty strings are skipped (treat as "user didn't set this, leave inherited value alone"). Validity dates from the editor arrive as ISO 8601 strings and are normalised to the MM/DD/YYYY format DAM expects. """ if not override_fields: return mvp_fields applied = 0 for editor_field, raw_value in override_fields.items(): if raw_value is None or raw_value == '': continue dam_field_id = OVERRIDE_FIELD_MAP.get(editor_field) if not dam_field_id: logger.debug("Override: no DAM mapping for editor field '{}' - skipping".format(editor_field)) continue value = raw_value if editor_field in DATE_OVERRIDE_FIELDS: value = self._normalize_iso_date(raw_value) if not value: continue target = None for field in mvp_fields: if field.get('id') == dam_field_id: target = field break if target is None: logger.warning("Override: field {} (DAM id {}) not present in mvp_fields - skipping".format( editor_field, dam_field_id )) continue if editor_field in DATE_OVERRIDE_FIELDS: self._set_date_field_value(target, value) else: self._set_field_value(target, value) logger.info("Override applied: {} ({}) = {}".format(editor_field, dam_field_id, value)) applied += 1 if applied: logger.info("Applied {} pre-upload override field(s) from naming tool".format(applied)) return mvp_fields def _normalize_iso_date(self, iso_str): """Convert an ISO 8601 date string (with or without time/timezone) to MM/DD/YYYY.""" if not iso_str: return None try: date_part = iso_str.split('T')[0] dt = datetime.strptime(date_part, '%Y-%m-%d') return dt.strftime('%m/%d/%Y') except Exception as e: logger.warning("Could not normalize override date '{}': {}".format(iso_str, str(e))) return None def _set_field_value(self, field, value): """Set field value handling different structures""" import json field_id = field.get('id', 'UNKNOWN') logger.info("_set_field_value called for: {} with value: {}".format(field_id, value)) logger.info("Current field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None')) if 'value' in field: if isinstance(field['value'], dict): # Try nested structure first (most common) if 'value' in field['value'] and isinstance(field['value']['value'], dict): if 'value' in field['value']['value']: field['value']['value']['value'] = value # Ensure type is set for CreativeX URL field if field_id == 'FERRERO.FIELD.CREATIVEX LINK' and 'type' not in field['value']['value']: field['value']['value']['type'] = 'string' logger.info("Set via field['value']['value']['value']") elif 'field_value' in field['value']['value']: # DomainValue structure - update field_value field['value']['value']['field_value']['value'] = value # Also update display_value to match if 'display_value' in field['value']['value']: field['value']['value']['display_value'] = value logger.info("Set via field['value']['value']['field_value']['value'] (DomainValue)") else: # If nested dict is empty, create the value structure with type field['value']['value'] = {'type': 'string', 'value': value} logger.info("Created field['value']['value'] = {{'type': 'string', 'value': {}}}".format(value)) else: # If value dict is empty or doesn't have nested value, create it with type field['value'] = {'value': {'type': 'string', 'value': value}} logger.info("Created field['value'] = {{'value': {{'type': 'string', 'value': {}}}}}".format(value)) logger.info("After setting, field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None')) def _set_date_field_value(self, field, date_string): """ Set date field value with proper type for DAM API Args: field: Field dict to update date_string: Date as ISO string (YYYY-MM-DDTHH:mm:ss) """ field_id = field.get('id', 'UNKNOWN') logger.info("_set_date_field_value called for: {} with value: {}".format( field_id, date_string )) if 'value' in field: if isinstance(field['value'], dict): if 'value' in field['value'] and isinstance(field['value']['value'], dict): # Update existing nested structure field['value']['value']['type'] = 'string' field['value']['value']['value'] = date_string logger.info("Set via field['value']['value'] with type 'string'") else: # Create nested structure field['value'] = { 'value': { 'type': 'string', 'value': date_string } } logger.info("Created field['value'] with type 'string'") else: # Create value structure from scratch field['value'] = { 'value': { 'type': 'string', 'value': date_string } } logger.info("Created field['value'] from scratch with type 'string'") def _update_creativex_fields(self, mvp_fields, box_metadata): """ Update CreativeX fields from Box metadata template Args: mvp_fields: List of MVP fields box_metadata: dict with 'score' and 'url' from Box template Returns: Updated mvp_fields list """ # Map Box metadata to DAM field IDs creativex_mapping = { 'score': 'FERRERO.TAB.FIELD.CREATIVEX', # Platform > Rating (%) 'url': 'FERRERO.FIELD.CREATIVEX LINK' # CreativeX Hyperlink } if box_metadata.get('score'): # Update CreativeX Score field (tabular field structure) # New structure: Platform^Score (e.g., "Google Ads^100") score_val = box_metadata['score'] platforms = box_metadata.get('platforms', []) # If no platforms, skip adding the CREATIVEX field # "Unknown" is not a valid platform in the DAM's cascading domain if not platforms: logger.warning("No Platforms mapped for CreativeX score - skipping CREATIVEX field (not a required field)") else: # Construct value objects for each platform value_objects = [] for platform in platforms: combined_value = "{}^{}".format(platform, score_val) value_obj = { "cascading_domain_value": True, "domain_value": False, "is_locked": False, "value": { "field_value": { "type": "string", "value": combined_value }, "type": "com.artesia.metadata.CascadingDomainValue" } } value_objects.append(value_obj) logger.info("Constructed CreativeX value: {}".format(combined_value)) score_field_found = False for field in mvp_fields: if field.get('id') == 'FERRERO.TAB.FIELD.CREATIVEX': score_field_found = True try: # Replace values list with new list of objects field['values'] = value_objects logger.info("Set CREATIVEX field with {} values".format(len(value_objects))) except Exception as e: logger.error("Failed to set CreativeX Score: {}".format(str(e))) import traceback logger.error(traceback.format_exc()) break if not score_field_found: logger.warning("CREATIVEX Score field not found in master metadata - adding it now") # Create the field structure (tabular field) creativex_score_field = { "type": "com.artesia.metadata.MetadataTableField", "id": "FERRERO.TAB.FIELD.CREATIVEX", "parent_table_id": "FERRERO.TABULAR.FIELD.CREATIVEX", "values": value_objects } mvp_fields.append(creativex_score_field) logger.info("Added CREATIVEX Score field with {} values".format(len(value_objects))) if box_metadata.get('url'): # Update CreativeX URL field logger.info("Updating CreativeX URL from database: {}".format(box_metadata['url'])) url_field_found = False for field in mvp_fields: if field.get('id') == 'FERRERO.FIELD.CREATIVEX LINK': url_field_found = True try: # Log field structure before setting logger.info("CREATIVEX URL field structure: {}".format(field.get('value', {}).keys() if isinstance(field.get('value'), dict) else 'not a dict')) self._set_field_value(field, box_metadata['url']) logger.info("Set CREATIVEX LINK to: {}".format(box_metadata['url'])) except Exception as e: logger.error("Failed to set CreativeX URL: {}".format(str(e))) import traceback logger.error(traceback.format_exc()) break if not url_field_found: logger.warning("CREATIVEX URL field not found in master metadata - adding it now") # Create the field structure (text field) creativex_url_field = { 'id': 'FERRERO.FIELD.CREATIVEX LINK', 'name': 'CreativeX Hyperlink', 'type': 'com.artesia.metadata.MetadataField', 'value': { 'value': { 'type': 'string', 'value': box_metadata['url'] } }, 'data_type': 'CHAR', 'required': False } mvp_fields.append(creativex_url_field) logger.info("Added CREATIVEX URL field with value: {}".format(box_metadata['url'])) return mvp_fields def _add_master_asset_id_field(self, mvp_fields, master_opentext_id): """ Add Master Asset ID field (configurable via MASTER_ASSET_ID_FIELD in .env) Args: mvp_fields: List of MVP fields master_opentext_id: DAM Asset ID of the master asset Returns: Updated mvp_fields list """ # Read configured field ID from environment, default to legacy ARTESIA field master_field_id = os.environ.get('MASTER_ASSET_ID_FIELD', 'ARTESIA.FIELD.ASSET_ID') logger.info("Using Master Asset ID field: {} (Value: {})".format(master_field_id, master_opentext_id)) # Check if field already exists in MVP fields (update scenario) for field in mvp_fields: field_id = self._get_field_id(field) if field_id == master_field_id: # Update existing field value # If tabular, we need special handling, but _add_missing_fields usually won't add this # so we assume if it exists, we just update the value if 'TABULAR' in master_field_id: # Tabular field update logic would go here if needed # For now, assuming we are creating it new mostly pass self._set_field_value(field, master_opentext_id) logger.info("Updated existing Master Asset ID field: {}".format(master_opentext_id)) return mvp_fields # Field doesn't exist - add new field # Check if it's a tabular field if 'TABULAR' in master_field_id: # Construct tabular field structure using SIMPLE structure (no MetadataTableFieldRow wrapper) # Logic updated based on Staging Definition: # Parent: FERRERO.TABULAR.FIELD.MASTERASSETIDS # Child Column: FERRERO.MASTERASSETIDS # Determine child column ID if master_field_id == 'FERRERO.TABULAR.FIELD.MASTERASSETIDS': child_column_id = 'FERRERO.MASTERASSETIDS' else: # Fallback for other potential tabular fields child_column_id = master_field_id new_field = { 'id': child_column_id, 'parent_table_id': master_field_id, 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'field_value': { 'type': 'string', 'value': master_opentext_id }, 'type': 'com.artesia.metadata.DomainValue' } } ] } mvp_fields.append(new_field) logger.info("Added new TABULAR Master Asset ID field: {} (Parent: {})".format(child_column_id, master_field_id)) else: # Standard Text Field mvp_fields.append({ 'id': master_field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'value': { 'type': 'string', 'value': master_opentext_id } } }) logger.info("Added new Master Asset ID field: {}".format(master_field_id)) return mvp_fields def _add_master_asset_ids_field(self, mvp_fields, master_opentext_ids): """ Add FERRERO.MASTERASSETIDS tabular field with multiple master asset IDs Supports Many-to-Many relationship between derivatives and masters Args: mvp_fields: List of MVP fields master_opentext_ids: List of DAM Asset IDs of master assets Returns: Updated mvp_fields list with FERRERO.MASTERASSETIDS """ if not master_opentext_ids or len(master_opentext_ids) == 0: logger.info("No master_opentext_ids provided - skipping FERRERO.MASTERASSETIDS field") return mvp_fields # Check if field already exists for field in mvp_fields: if self._get_field_id(field) == 'FERRERO.MASTERASSETIDS': logger.info("FERRERO.MASTERASSETIDS already present - skipping") return mvp_fields # Build values array with all master asset IDs values = [] for master_id in master_opentext_ids: values.append({ 'cascading_domain_value': False, 'domain_value': False, 'is_locked': False, 'value': { 'type': 'string', 'value': master_id } }) # Create tabular field new_field = { 'id': 'FERRERO.MASTERASSETIDS', 'parent_table_id': 'FERRERO.TABULAR.FIELD.MASTERASSETIDS', 'type': 'com.artesia.metadata.MetadataTableField', 'values': values } mvp_fields.append(new_field) logger.info("Added FERRERO.MASTERASSETIDS field with {} master asset ID(s): {}".format( len(values), ', '.join(master_opentext_ids[:3]) + ('...' if len(master_opentext_ids) > 3 else ''))) return mvp_fields def _get_field_id(self, field): """Extract field ID from field dict""" if isinstance(field, dict): return field.get('id', '') return ''