""" Metadata Extractor MVP - Extract MVP fields from master metadata Ported from PHP MetadataExtractorMVP.php Compatible with Python 3.6+ *********************************************************************** *** PRODUCTION VERSION (metadata_extractor_mvp_PROD.py) *** *** *** *** This version uses SIMPLER tabular field structure *** *** (without MetadataTableFieldRow wrapper, like CreativeX field) *** *** *** *** Use this for PROD environment (dam.ferrero.com) *** *** *** *** For PPR environment, use metadata_extractor_mvp.py *** *********************************************************************** """ import logging from datetime import datetime, timedelta import os from shared.config_loader import load_country_code_mappings logger = logging.getLogger('MetadataExtractorMVP') class MetadataExtractorMVP: def __init__(self, field_mappings, config=None): """ Initialize with field mappings from config Args: field_mappings: dict from field_mappings.yaml config: Optional config dict with DAM base_url for environment detection """ self.mvp_field_ids = field_mappings['mvp_fields'] self.filename_updates = field_mappings.get('filename_updates', {}) self.forced_values = field_mappings.get('forced_values', {}) self.defaults = field_mappings.get('defaults', {}) # Store DAM base URL for environment detection self.dam_base_url = None if config and 'dam' in config and 'base_url' in config['dam']: self.dam_base_url = config['dam']['base_url'] logger.info("Environment detection: DAM URL = {}".format(self.dam_base_url)) # Load country code mappings (ISO -> DAM codes) self.country_mappings = load_country_code_mappings() if self.country_mappings: logger.info("Loaded {} country code mappings (ISO->DAM)".format(len(self.country_mappings))) # Load asset type mappings (3-letter codes -> DAM codes) self.asset_type_mappings = self._load_asset_type_mappings() if self.asset_type_mappings: logger.info("Loaded {} asset type mappings (3-letter->DAM)".format(len(self.asset_type_mappings))) def extract_mvp_fields(self, master_metadata): """ Extract only MVP fields from full master metadata Args: master_metadata: Complete DAM asset metadata Returns: List of MVP field objects """ extracted_fields = [] found_field_ids = [] # Navigate to metadata structure # master_metadata is the full asset, need to go to: metadata.metadata_element_list metadata_list = [] if isinstance(master_metadata, dict): if 'metadata' in master_metadata and 'metadata_element_list' in master_metadata['metadata']: metadata_list = master_metadata['metadata']['metadata_element_list'] logger.info("Using master_metadata['metadata']['metadata_element_list']") logger.info("Searching through {} categories for MVP fields".format(len(metadata_list))) # Search through categories for MVP fields for item in metadata_list: if 'metadata_element_list' in item: # Category with nested fields for field in item['metadata_element_list']: field_id = field.get('id') if field_id in self.mvp_field_ids: extracted_fields.append(field) found_field_ids.append(field_id) logger.debug("Found MVP field: {}".format(field_id)) elif 'id' in item and item['id'] in self.mvp_field_ids: # Direct field extracted_fields.append(item) found_field_ids.append(item['id']) logger.debug("Found direct MVP field: {}".format(item['id'])) # Log results missing = [f for f in self.mvp_field_ids if f not in found_field_ids] logger.info("Found {}/{} MVP fields".format(len(found_field_ids), len(self.mvp_field_ids))) if missing: logger.info("Missing fields: {}".format(', '.join(missing[:5]))) return extracted_fields def build_mvp_asset_representation(self, master_metadata, clean_filename, parsed_filename, box_metadata=None, tracking_mode='full', master_opentext_id=None): """ Build asset representation with MVP fields + updates from filename Args: master_metadata: Full master asset metadata clean_filename: Clean filename (stripped) parsed_filename: Parsed V2 filename dict box_metadata: Optional Box metadata tracking_mode: 'full' (inherit all metadata) or 'folder_only' (only use folder) master_opentext_id: Optional DAM Asset ID of master asset (for derivative tracking) Returns: Asset representation dict ready for upload """ if tracking_mode == 'full': # FULL INHERITANCE MODE - Standard behavior logger.info("Full inheritance mode - using master metadata") # Extract MVP fields from master mvp_fields = self.extract_mvp_fields(master_metadata) # Update fields from filename and forced values mvp_fields = self._update_fields(mvp_fields, clean_filename, parsed_filename) elif tracking_mode == 'folder_only': # FOLDER ONLY MODE - New asset, only use upload folder logger.info("Folder-only mode (-N suffix) - building metadata from filename only") logger.warning("Note: Upload folder comes from master, all other metadata from filename") # Start with empty fields, build from filename mvp_fields = [] mvp_fields = self._build_fields_from_filename(parsed_filename, clean_filename) # Add missing MVP fields with defaults (both modes) mvp_fields = self._add_missing_fields(mvp_fields, parsed_filename) # Update CreativeX fields from Box metadata if provided if box_metadata: mvp_fields = self._update_creativex_fields(mvp_fields, box_metadata) # Add Master Asset ID field if provided (derivative tracking) if master_opentext_id: mvp_fields = self._add_master_asset_id_field(mvp_fields, master_opentext_id) logger.info("Added Master Asset ID field: {}".format(master_opentext_id)) # Add FERRERO.MASTERASSETIDS if not present (Issue #1 from comparison) mvp_fields = self._ensure_master_asset_ids_field(mvp_fields, master_opentext_id) # Clean metadata structure to match client reference (Issue #2 and #3) mvp_fields = self._clean_metadata_structure(mvp_fields) # Build asset representation asset_rep = { 'asset_resource': { 'asset': { 'metadata': { 'metadata_element_list': mvp_fields }, 'metadata_model_id': 'ECOMMERCE', 'security_policy_list': [ {'id': 1594} ] } } } logger.info("Built MVP asset representation with {} fields".format(len(mvp_fields))) return asset_rep def _update_fields(self, mvp_fields, clean_filename, parsed_filename): """Update specific fields from filename and forced values""" # Process filename_updates from configuration for field_id, config in self.filename_updates.items(): source = config.get('source') transform = config.get('transform', '') # Get value from appropriate source if source == 'clean_filename': value = clean_filename elif source and parsed_filename: value = parsed_filename.get(source) else: continue if not value: continue # Apply transform if specified if transform == 'uppercase': value = value.upper() elif transform == 'lowercase': value = value.lower() # Apply asset type mapping if this is the asset type field if field_id == 'FERRERO.FIELD.MKTG.ASSET TYPE' and source == 'asset_type': value = self._map_asset_type(value) # Update the field for field in mvp_fields: if field.get('id') == field_id: # For tabular fields (like MAIN_LANGUAGES), update the 'values' array # The DAM reads from 'values' (plural), not 'value' (singular) if field.get('type') == 'com.artesia.metadata.MetadataTableField' or 'values' in field: field['values'] = [ { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'expired_value': False, 'field_value': { 'type': 'string', 'value': value }, 'type': 'com.artesia.metadata.DomainValue' } } ] logger.info("Updated tabular field {} values array from filename: {}".format(field_id, value)) else: self._set_field_value(field, value) logger.info("Updated {} from filename: {}".format(field_id, value)) break # Apply country code mapping (ISO -> DAM codes) for field in mvp_fields: if field.get('id') == 'FERRERO.FIELD.COUNTRY': current_value = self._get_field_value(field) if current_value: mapped_value = self._map_country_code(current_value) if mapped_value != current_value: self._set_field_value(field, mapped_value) logger.info("Mapped country code: {} -> {}".format(current_value, mapped_value)) # Apply forced values from configuration for field_id, forced_value in self.forced_values.items(): for field in mvp_fields: if field.get('id') == field_id: self._set_field_value(field, forced_value) logger.info("Set {} to {}".format(field_id, forced_value)) break # Set Asset Validity Dates (Start = Today, End = Today + 1 Year) # Field 4: Date the asset was uploaded # Field 5: Add 1 year from date provided above try: today = datetime.now() one_year_later = today + timedelta(days=365) # Convert to US Date Format (MM/DD/YYYY) # This is the format the DAM expects for date fields start_date_str = today.strftime('%m/%d/%Y') end_date_str = one_year_later.strftime('%m/%d/%Y') date_fields = { 'FERRERO.FIELD.ASSET VALIDITY START PERIOD': start_date_str, 'FERRERO.FIELD.ASSET VALIDITY END PERIOD': end_date_str } for field_id, value in date_fields.items(): field_found = False for field in mvp_fields: if field.get('id') == field_id: # Use specialized method for date fields self._set_date_field_value(field, value) logger.info("Set {} to {} (Upload Date Logic)".format(field_id, value)) field_found = True break if not field_found: # Add new date field with proper structure (string type per client's asset_representation.json) mvp_fields.append({ 'id': field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'value': { 'type': 'string', 'value': value } } }) logger.info("Added {} with value {} (Upload Date Logic)".format(field_id, value)) except Exception as e: logger.error("Failed to set validity dates: {}".format(str(e))) return mvp_fields def _add_missing_fields(self, mvp_fields, parsed_filename): """Add missing MVP fields from filename or defaults""" field_ids = [f.get('id') for f in mvp_fields] # Handle MAIN_LANGUAGES field if parsed_filename and parsed_filename.get('language_code'): language = parsed_filename['language_code'].upper() # Check if MAIN_LANGUAGES already exists (possibly with null value from Box webhook) existing_main_lang_idx = None for idx, field in enumerate(mvp_fields): if field.get('id') == 'MAIN_LANGUAGES': existing_main_lang_idx = idx break # Create MAIN_LANGUAGES structure - PROD version (simpler structure like CreativeX) main_languages_field = { 'id': 'MAIN_LANGUAGES', 'parent_table_id': 'FERRERO.TABULAR.FIELD.MAIN LANGUAGES', 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'field_value': { 'type': 'string', 'value': language }, 'type': 'com.artesia.metadata.DomainValue' } } ] } if existing_main_lang_idx is not None: # Replace existing MAIN_LANGUAGES (likely has null value from Box webhook) logger.info("Replacing existing MAIN_LANGUAGES with: {}".format(language)) mvp_fields[existing_main_lang_idx] = main_languages_field else: # Add new MAIN_LANGUAGES logger.info("Adding MAIN_LANGUAGES: {}".format(language)) mvp_fields.append(main_languages_field) elif 'MAIN_LANGUAGES' in field_ids: # MAIN_LANGUAGES exists but we have no language_code - remove it to avoid null error logger.warning("Removing MAIN_LANGUAGES field - no language_code available from filename") mvp_fields[:] = [f for f in mvp_fields if f.get('id') != 'MAIN_LANGUAGES'] # Add other missing fields with defaults field_ids = [f.get('id') for f in mvp_fields] for field_id, default_value in self.defaults.items(): if field_id not in field_ids: logger.info("Adding {} with default: {}".format(field_id, default_value)) # Check if it's a tabular field (contains .TABULAR. in parent table ID) is_tabular = 'TABULAR' in field_id or field_id in [ 'FERRERO.FIELD.ASSETCOMPLIANCE', 'MARKETING_TAG' ] if is_tabular: # Use simpler structure for tabular fields - PROD version (like CreativeX) mvp_fields.append({ 'id': field_id, 'parent_table_id': 'FERRERO.TABULAR.FIELD.' + field_id.split('.')[-1], 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'field_value': { 'type': 'string', 'value': default_value }, 'type': 'com.artesia.metadata.DomainValue' } } ] }) else: mvp_fields.append({ 'id': field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'type': 'string', 'value': default_value } } }) return mvp_fields def _map_country_code(self, iso_code): """ Map ISO country code to DAM country code Args: iso_code: ISO 3166-1 Alpha-2 code (e.g., 'BD', 'DE') Returns: str: DAM country code (e.g., 'BG' for BD, 'DE' for DE) """ if not iso_code: return iso_code iso_upper = iso_code.upper() # Check if we have a mapping if iso_upper in self.country_mappings: dam_code = self.country_mappings[iso_upper] if dam_code != iso_upper: logger.info("Country code mapping: {} (ISO) -> {} (DAM)".format(iso_upper, dam_code)) return dam_code else: # No mapping found, use ISO code as-is logger.debug("No mapping for country code: {} (using as-is)".format(iso_upper)) return iso_upper def _load_asset_type_mappings(self): """ Load asset type mappings: 3-letter codes -> DAM codes Returns: dict: 3-letter code -> DAM code mapping """ import yaml mapping_path = 'config/asset_type_mappings.yaml' try: with open(mapping_path, 'r') as f: mappings = yaml.safe_load(f) return mappings if mappings else {} except Exception as e: logger.warning("Could not load asset type mappings: {}".format(str(e))) return {} def _map_asset_type(self, three_letter_code): """ Map 3-letter asset type code to DAM code Args: three_letter_code: 3-letter code (e.g., 'EHI', 'IMG', 'TVC') Returns: DAM code (e.g., 'heroimage', 'keyvisual', 'tvc') """ if not three_letter_code: return three_letter_code code_upper = three_letter_code.upper() # Check if we have a mapping if code_upper in self.asset_type_mappings: dam_code = self.asset_type_mappings[code_upper] logger.info("Asset type mapping: {} -> {}".format(code_upper, dam_code)) return dam_code # No mapping - return as-is logger.warning("No mapping for asset type: {} - using as-is (may fail DAM validation)".format(code_upper)) return three_letter_code def _build_fields_from_filename(self, parsed_filename, clean_filename): """ Build ALL metadata fields from parsed filename Used in folder-only mode (tracking ID with -N suffix) Note: Uses codes directly for now. Can add lookup tables later for brand_code->brand_name, country_code->country_name, etc. """ fields = [] # ASSET NAME fields.append({ 'id': 'ARTESIA.FIELD.ASSET NAME', 'value': {'value': {'type': 'string', 'value': clean_filename}} }) # DESCRIPTION (from subject_title) if parsed_filename.get('subject_title'): fields.append({ 'id': 'ARTESIA.FIELD.ASSET DESCRIPTION', 'value': {'value': {'type': 'string', 'value': parsed_filename['subject_title']}} }) # BRAND (use code for now, could add lookup later) if parsed_filename.get('brand_code'): fields.append({ 'id': 'FERRERO.FIELD.BRAND', 'value': {'value': {'type': 'string', 'value': parsed_filename['brand_code']}} }) # COUNTRY (map ISO code to DAM code) if parsed_filename.get('country_code'): dam_country_code = self._map_country_code(parsed_filename['country_code']) fields.append({ 'id': 'FERRERO.FIELD.COUNTRY', 'value': {'value': {'value': dam_country_code}} }) # LANGUAGE (use code for now) if parsed_filename.get('language_code'): fields.append({ 'id': 'FERRERO.FIELD.LANGUAGES', 'value': {'value': {'value': parsed_filename['language_code']}} }) # ASSET TYPE (use code for now) if parsed_filename.get('asset_type'): fields.append({ 'id': 'FERRERO.FIELD.ASSET TYPE', 'value': {'value': {'value': parsed_filename['asset_type']}} }) # STATE (force to Local) fields.append({ 'id': 'FERRERO.FIELD.STATE', 'value': {'value': {'value': 'Local'}} }) logger.info("Built {} fields from filename (folder-only mode)".format(len(fields))) return fields def _get_field_value(self, field): """Get field value handling different structures""" if 'value' in field: if isinstance(field['value'], dict): if 'value' in field['value'] and isinstance(field['value']['value'], dict): if 'value' in field['value']['value']: return field['value']['value']['value'] elif 'field_value' in field['value']['value']: return field['value']['value']['field_value'].get('value') return None def _set_field_value(self, field, value): """Set field value handling different structures""" import json field_id = field.get('id', 'UNKNOWN') logger.info("_set_field_value called for: {} with value: {}".format(field_id, value)) logger.info("Current field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None')) # Check if this is a domain field is_domain_field = field.get('domained', False) or field.get('domain_id') if 'value' in field: if isinstance(field['value'], dict): # Try nested structure first (most common) if 'value' in field['value'] and isinstance(field['value']['value'], dict): if 'value' in field['value']['value']: field['value']['value']['value'] = value # Ensure type is set for CreativeX URL field if field_id == 'FERRERO.FIELD.CREATIVEX LINK' and 'type' not in field['value']['value']: field['value']['value']['type'] = 'string' logger.info("Set via field['value']['value']['value']") elif 'field_value' in field['value']['value']: # DomainValue structure - update field_value field['value']['value']['field_value']['value'] = value # Also update display_value to match if 'display_value' in field['value']['value']: field['value']['value']['display_value'] = value logger.info("Set via field['value']['value']['field_value']['value'] (DomainValue)") else: # If nested dict is empty, check if it's a domain field if is_domain_field: # Create DomainValue structure for domain fields field['value']['value'] = { 'type': 'com.artesia.metadata.DomainValue', 'active_to': '', 'active_from': '', 'field_value': { 'type': 'string', 'value': value }, 'display_value': value, 'expired_value': False } logger.info("Created DomainValue structure for domain field") else: # Create simple string structure for non-domain fields field['value']['value'] = {'type': 'string', 'value': value} logger.info("Created simple string structure for non-domain field") else: # If value dict is empty or doesn't have nested value, create it if is_domain_field: # Create full DomainValue structure for domain fields field['value'] = { 'value': { 'type': 'com.artesia.metadata.DomainValue', 'active_to': '', 'active_from': '', 'field_value': { 'type': 'string', 'value': value }, 'display_value': value, 'expired_value': False }, 'is_locked': False, 'domain_value': True, 'cascading_domain_value': False } logger.info("Created full DomainValue structure from scratch") else: # Create simple structure for non-domain fields field['value'] = {'value': {'type': 'string', 'value': value}} logger.info("Created simple string structure from scratch") logger.info("After setting, field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None')) def _set_date_field_value(self, field, date_string): """ Set date field value with proper type for DAM API Args: field: Field dict to update date_string: Date as ISO string (YYYY-MM-DDTHH:mm:ss) """ field_id = field.get('id', 'UNKNOWN') logger.info("_set_date_field_value called for: {} with value: {}".format( field_id, date_string )) if 'value' in field: if isinstance(field['value'], dict): if 'value' in field['value'] and isinstance(field['value']['value'], dict): # Update existing nested structure field['value']['value']['type'] = 'string' field['value']['value']['value'] = date_string logger.info("Set via field['value']['value'] with type 'string'") else: # Create nested structure field['value'] = { 'value': { 'type': 'string', 'value': date_string } } logger.info("Created field['value'] with type 'string'") else: # Create value structure from scratch field['value'] = { 'value': { 'type': 'string', 'value': date_string } } logger.info("Created field['value'] from scratch with type 'string'") def _update_creativex_fields(self, mvp_fields, box_metadata): """ Update CreativeX fields from Box metadata template Args: mvp_fields: List of MVP fields box_metadata: dict with 'score' and 'url' from Box template Returns: Updated mvp_fields list """ # Map Box metadata to DAM field IDs creativex_mapping = { 'score': 'FERRERO.TAB.FIELD.CREATIVEX', # Platform > Rating (%) 'url': 'FERRERO.FIELD.CREATIVEX LINK' # CreativeX Hyperlink } # Only process CreativeX if we have valid score and platforms score_val = box_metadata.get('score') platforms = box_metadata.get('platforms', []) # Skip if score is 0, '0', None, or no platforms (avoid invalid "Unknown" default) if score_val and str(score_val) != '0' and platforms: # Update CreativeX Score field (tabular field structure) # New structure: Platform^Score (e.g., "Google Ads^100") logger.info("Processing CreativeX score: {} with platforms: {}".format(score_val, platforms)) # Construct value objects for each platform value_objects = [] for platform in platforms: combined_value = "{}^{}".format(platform, score_val) value_obj = { "cascading_domain_value": True, "domain_value": False, "is_locked": False, "value": { "field_value": { "type": "string", "value": combined_value }, "type": "com.artesia.metadata.CascadingDomainValue" } } value_objects.append(value_obj) logger.info("Constructed CreativeX value: {}".format(combined_value)) score_field_found = False for field in mvp_fields: if field.get('id') == 'FERRERO.TAB.FIELD.CREATIVEX': score_field_found = True try: # Replace values list with new list of objects field['values'] = value_objects logger.info("Set CREATIVEX field with {} values".format(len(value_objects))) except Exception as e: logger.error("Failed to set CreativeX Score: {}".format(str(e))) import traceback logger.error(traceback.format_exc()) break if not score_field_found: logger.warning("CREATIVEX Score field not found in master metadata - adding it now") # Create the field structure (tabular field) creativex_score_field = { "type": "com.artesia.metadata.MetadataTableField", "id": "FERRERO.TAB.FIELD.CREATIVEX", "parent_table_id": "FERRERO.TABULAR.FIELD.CREATIVEX", "values": value_objects } mvp_fields.append(creativex_score_field) logger.info("Added CREATIVEX Score field with {} values".format(len(value_objects))) else: logger.info("Skipping CreativeX score field - Score: {}, Platforms: {} (will not add invalid 'Unknown' value)".format( score_val if score_val else 'None', platforms if platforms else 'None' )) if box_metadata.get('url'): # Update CreativeX URL field logger.info("Updating CreativeX URL from database: {}".format(box_metadata['url'])) url_field_found = False for field in mvp_fields: if field.get('id') == 'FERRERO.FIELD.CREATIVEX LINK': url_field_found = True try: # Log field structure before setting logger.info("CREATIVEX URL field structure: {}".format(field.get('value', {}).keys() if isinstance(field.get('value'), dict) else 'not a dict')) self._set_field_value(field, box_metadata['url']) logger.info("Set CREATIVEX LINK to: {}".format(box_metadata['url'])) except Exception as e: logger.error("Failed to set CreativeX URL: {}".format(str(e))) import traceback logger.error(traceback.format_exc()) break if not url_field_found: logger.warning("CREATIVEX URL field not found in master metadata - adding it now") # Create the field structure (text field) creativex_url_field = { 'id': 'FERRERO.FIELD.CREATIVEX LINK', 'name': 'CreativeX Hyperlink', 'type': 'com.artesia.metadata.MetadataField', 'value': { 'value': { 'type': 'string', 'value': box_metadata['url'] } }, 'data_type': 'CHAR', 'required': False } mvp_fields.append(creativex_url_field) logger.info("Added CREATIVEX URL field with value: {}".format(box_metadata['url'])) return mvp_fields def _add_master_asset_id_field(self, mvp_fields, master_opentext_id): """ Add Master Asset ID field (configurable via MASTER_ASSET_ID_FIELD in .env) Args: mvp_fields: List of MVP fields master_opentext_id: DAM Asset ID of the master asset Returns: Updated mvp_fields list """ # Read configured field ID from environment, default to legacy ARTESIA field master_field_id = os.environ.get('MASTER_ASSET_ID_FIELD', 'ARTESIA.FIELD.ASSET_ID') logger.info("Using Master Asset ID field: {} (Value: {})".format(master_field_id, master_opentext_id)) # Check if field already exists in MVP fields (update scenario) for field in mvp_fields: field_id = self._get_field_id(field) if field_id == master_field_id: # Update existing field value # If tabular, we need special handling, but _add_missing_fields usually won't add this # so we assume if it exists, we just update the value if 'TABULAR' in master_field_id: # Tabular field update logic would go here if needed # For now, assuming we are creating it new mostly pass self._set_field_value(field, master_opentext_id) logger.info("Updated existing Master Asset ID field: {}".format(master_opentext_id)) return mvp_fields # Field doesn't exist - add new field # Check if it's a tabular field if 'TABULAR' in master_field_id: # Construct tabular field structure # Logic updated based on Staging Definition: # Parent: FERRERO.TABULAR.FIELD.MASTERASSETIDS # Child Column: FERRERO.MASTERASSETIDS # Determine child column ID if master_field_id == 'FERRERO.TABULAR.FIELD.MASTERASSETIDS': child_column_id = 'FERRERO.MASTERASSETIDS' else: # Fallback for other potential tabular fields child_column_id = master_field_id # Use simpler structure for tabular fields - PROD version (like CreativeX) new_field = { 'id': child_column_id, 'parent_table_id': master_field_id, 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'field_value': { 'type': 'string', 'value': master_opentext_id }, 'type': 'com.artesia.metadata.DomainValue' } } ] } mvp_fields.append(new_field) logger.info("Added new TABULAR Master Asset ID field: {} (Column: {})".format(master_field_id, child_column_id)) else: # Standard Text Field (system field structure per client's asset_representation.json) mvp_fields.append({ 'id': master_field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'cascading_domain_value': False, 'domain_value': False, 'is_locked': False, 'value': { 'type': 'string', 'value': master_opentext_id } } }) logger.info("Added new Master Asset ID field: {}".format(master_field_id)) return mvp_fields def _get_field_id(self, field): """Extract field ID from field dict""" if isinstance(field, dict): return field.get('id', '') return '' def _is_prod_environment(self): """ Detect if we're running in PROD vs PPR environment Returns: bool: True if PROD, False if PPR or unknown """ if not self.dam_base_url: # Can't detect - assume PROD for safety logger.warning("Cannot detect environment (no DAM URL) - assuming PROD") return True # PPR uses ppr.dam.ferrero.com, PROD uses dam.ferrero.com is_ppr = 'ppr.dam.ferrero.com' in self.dam_base_url.lower() is_prod = not is_ppr logger.info("Environment detected: {} (URL: {})".format( 'PROD' if is_prod else 'PPR', self.dam_base_url )) return is_prod def _ensure_master_asset_ids_field(self, mvp_fields, master_opentext_id): """ Ensure FERRERO.MASTERASSETIDS field is present when there's a value to track Only adds field if master_opentext_id is provided AND we're in PPR environment (PROD doesn't have this field configured in metadata schema) Args: mvp_fields: List of MVP fields master_opentext_id: DAM Asset ID of the master asset (optional) Returns: Updated mvp_fields list with FERRERO.MASTERASSETIDS if needed """ # Skip if no value provided if not master_opentext_id: logger.info("No master_opentext_id provided - skipping FERRERO.MASTERASSETIDS field") return mvp_fields # Skip if PROD environment (field not configured in PROD DAM schema) if self._is_prod_environment(): logger.info("PROD environment detected - skipping FERRERO.MASTERASSETIDS field (not configured in PROD)") return mvp_fields # Check if field already exists for field in mvp_fields: if self._get_field_id(field) == 'FERRERO.MASTERASSETIDS': logger.info("FERRERO.MASTERASSETIDS already present") return mvp_fields # Field doesn't exist - add it with simpler structure - PROD version (like CreativeX) new_field = { 'id': 'FERRERO.MASTERASSETIDS', 'parent_table_id': 'FERRERO.TABULAR.FIELD.MASTERASSETIDS', 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'field_value': { 'type': 'string', 'value': master_opentext_id }, 'type': 'com.artesia.metadata.DomainValue' } } ] } mvp_fields.append(new_field) logger.info("Added FERRERO.MASTERASSETIDS field with value: {}".format(master_opentext_id)) return mvp_fields def _clean_metadata_structure(self, mvp_fields): """ Clean metadata structure to match client reference file (Issues #2 and #3) Fixes: 1. Remove 'description' property from DomainValue objects (Issue #2) 2. Remove overpopulated values from optional domain fields (Issue #3) Args: mvp_fields: List of MVP fields Returns: Cleaned mvp_fields list matching reference structure """ for field in mvp_fields: if not isinstance(field, dict): continue field_id = self._get_field_id(field) # Skip tabular fields (they have different structure) if field.get('type') == 'com.artesia.metadata.MetadataTableField': # Clean DomainValue objects in tabular field values if 'values' in field and isinstance(field['values'], list): for row in field['values']: if isinstance(row, dict) and 'value' in row: self._clean_domain_value(row['value']) continue # Regular MetadataField if 'value' in field and isinstance(field['value'], dict): # Check if this is a domain field with a value if 'value' in field['value'] and isinstance(field['value']['value'], dict): value_obj = field['value']['value'] # Clean DomainValue structure (remove 'description') self._clean_domain_value(value_obj) # Check if this is an optional domain field with no actual value # (like FERRERO.FIELD.MARKETING.SIZE in reference - lines 155-186) if self._is_empty_domain_value(value_obj): # Remove the nested value object, keep only flags logger.info("Cleaning optional domain field (no value): {}".format(field_id)) field['value'] = { 'cascading_domain_value': field['value'].get('cascading_domain_value', False), 'domain_value': field['value'].get('domain_value', False), 'is_locked': field['value'].get('is_locked', False) } return mvp_fields def _clean_domain_value(self, value_obj): """ Remove 'description' property from DomainValue object (Issue #2) Args: value_obj: DomainValue dictionary """ if isinstance(value_obj, dict): # Remove 'description' if present if 'description' in value_obj: logger.info("Removing 'description' from DomainValue") del value_obj['description'] def _is_empty_domain_value(self, value_obj): """ Check if a DomainValue object is empty/unpopulated Empty means: no field_value.value or field_value.value is empty string Args: value_obj: DomainValue dictionary Returns: True if empty, False if has value """ if not isinstance(value_obj, dict): return True # Check if it's a DomainValue type if value_obj.get('type') not in ['com.artesia.metadata.DomainValue', 'com.artesia.metadata.CascadingDomainValue']: return False # Check field_value field_value = value_obj.get('field_value', {}) if isinstance(field_value, dict): actual_value = field_value.get('value', '') # Empty if no value or empty string return not actual_value return True