""" Metadata Extractor MVP - Extract MVP fields from master metadata Ported from PHP MetadataExtractorMVP.php Compatible with Python 3.6+ """ import logging from shared.config_loader import load_country_code_mappings logger = logging.getLogger('MetadataExtractorMVP') class MetadataExtractorMVP: def __init__(self, field_mappings): """ Initialize with field mappings from config Args: field_mappings: dict from field_mappings.yaml """ self.mvp_field_ids = field_mappings['mvp_fields'] self.filename_updates = field_mappings.get('filename_updates', {}) self.forced_values = field_mappings.get('forced_values', {}) self.defaults = field_mappings.get('defaults', {}) # Load country code mappings (ISO -> DAM codes) self.country_mappings = load_country_code_mappings() if self.country_mappings: logger.info("Loaded {} country code mappings (ISO->DAM)".format(len(self.country_mappings))) def extract_mvp_fields(self, master_metadata): """ Extract only MVP fields from full master metadata Args: master_metadata: Complete DAM asset metadata Returns: List of MVP field objects """ extracted_fields = [] found_field_ids = [] # Navigate to metadata structure # master_metadata is the full asset, need to go to: metadata.metadata_element_list metadata_list = [] if isinstance(master_metadata, dict): if 'metadata' in master_metadata and 'metadata_element_list' in master_metadata['metadata']: metadata_list = master_metadata['metadata']['metadata_element_list'] logger.info("Using master_metadata['metadata']['metadata_element_list']") logger.info("Searching through {} categories for MVP fields".format(len(metadata_list))) # Search through categories for MVP fields for item in metadata_list: if 'metadata_element_list' in item: # Category with nested fields for field in item['metadata_element_list']: field_id = field.get('id') if field_id in self.mvp_field_ids: extracted_fields.append(field) found_field_ids.append(field_id) logger.debug("Found MVP field: {}".format(field_id)) elif 'id' in item and item['id'] in self.mvp_field_ids: # Direct field extracted_fields.append(item) found_field_ids.append(item['id']) logger.debug("Found direct MVP field: {}".format(item['id'])) # Log results missing = [f for f in self.mvp_field_ids if f not in found_field_ids] logger.info("Found {}/{} MVP fields".format(len(found_field_ids), len(self.mvp_field_ids))) if missing: logger.info("Missing fields: {}".format(', '.join(missing[:5]))) return extracted_fields def build_mvp_asset_representation(self, master_metadata, clean_filename, parsed_filename, box_metadata=None, tracking_mode='full'): """ Build asset representation with MVP fields + updates from filename Args: master_metadata: Full master asset metadata clean_filename: Clean filename (stripped) parsed_filename: Parsed V2 filename dict box_metadata: Optional Box metadata tracking_mode: 'full' (inherit all metadata) or 'folder_only' (only use folder) Returns: Asset representation dict ready for upload """ if tracking_mode == 'full': # FULL INHERITANCE MODE - Standard behavior logger.info("Full inheritance mode - using master metadata") # Extract MVP fields from master mvp_fields = self.extract_mvp_fields(master_metadata) # Update fields from filename and forced values mvp_fields = self._update_fields(mvp_fields, clean_filename, parsed_filename) elif tracking_mode == 'folder_only': # FOLDER ONLY MODE - New asset, only use upload folder logger.info("Folder-only mode (-N suffix) - building metadata from filename only") logger.warning("Note: Upload folder comes from master, all other metadata from filename") # Start with empty fields, build from filename mvp_fields = [] mvp_fields = self._build_fields_from_filename(parsed_filename, clean_filename) # Add missing MVP fields with defaults (both modes) mvp_fields = self._add_missing_fields(mvp_fields, parsed_filename) # Update CreativeX fields from Box metadata if provided if box_metadata: mvp_fields = self._update_creativex_fields(mvp_fields, box_metadata) # Build asset representation asset_rep = { 'asset_resource': { 'asset': { 'metadata': { 'metadata_element_list': mvp_fields }, 'metadata_model_id': 'ECOMMERCE', 'security_policy_list': [ {'id': 1594} ] } } } logger.info("Built MVP asset representation with {} fields".format(len(mvp_fields))) return asset_rep def _update_fields(self, mvp_fields, clean_filename, parsed_filename): """Update specific fields from filename and forced values""" # Update ASSET NAME for field in mvp_fields: if field.get('id') == 'ARTESIA.FIELD.ASSET NAME': self._set_field_value(field, clean_filename) logger.info("Updated ASSET NAME: {}".format(clean_filename)) # Update DESCRIPTION from subject_title if parsed_filename and parsed_filename.get('subject_title'): for field in mvp_fields: if field.get('id') == 'ARTESIA.FIELD.ASSET DESCRIPTION': self._set_field_value(field, parsed_filename['subject_title']) logger.info("Updated DESCRIPTION: {}".format(parsed_filename['subject_title'])) # Apply country code mapping (ISO -> DAM codes) for field in mvp_fields: if field.get('id') == 'FERRERO.FIELD.COUNTRY': current_value = self._get_field_value(field) if current_value: mapped_value = self._map_country_code(current_value) if mapped_value != current_value: self._set_field_value(field, mapped_value) logger.info("Mapped country code: {} -> {}".format(current_value, mapped_value)) # Force STATE to Local for field in mvp_fields: if field.get('id') == 'FERRERO.FIELD.STATE': self._set_field_value(field, 'Local') logger.info("Set STATE to Local") return mvp_fields def _add_missing_fields(self, mvp_fields, parsed_filename): """Add missing MVP fields from filename or defaults""" field_ids = [f.get('id') for f in mvp_fields] # Add MAIN_LANGUAGES if missing if 'MAIN_LANGUAGES' not in field_ids and parsed_filename: if parsed_filename.get('language_code'): language = parsed_filename['language_code'].upper() logger.info("Adding MAIN_LANGUAGES: {}".format(language)) mvp_fields.append({ 'id': 'MAIN_LANGUAGES', 'parent_table_id': 'FERRERO.TABULAR.FIELD.MAIN LANGUAGES', 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'field_value': { 'type': 'string', 'value': language }, 'type': 'com.artesia.metadata.DomainValue' } } ] }) # Add other missing fields with defaults field_ids = [f.get('id') for f in mvp_fields] for field_id, default_value in self.defaults.items(): if field_id not in field_ids: logger.info("Adding {} with default: {}".format(field_id, default_value)) # Check if it's a tabular field (contains .TABULAR. in parent table ID) is_tabular = 'TABULAR' in field_id or field_id in [ 'FERRERO.FIELD.ASSETCOMPLIANCE', 'MARKETING_TAG' ] if is_tabular: mvp_fields.append({ 'id': field_id, 'parent_table_id': 'FERRERO.TABULAR.FIELD.' + field_id.split('.')[-1], 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'field_value': { 'type': 'string', 'value': default_value }, 'type': 'com.artesia.metadata.DomainValue' } } ] }) else: mvp_fields.append({ 'id': field_id, 'type': 'com.artesia.metadata.MetadataField', 'value': { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'type': 'string', 'value': default_value } } }) return mvp_fields def _map_country_code(self, iso_code): """ Map ISO country code to DAM country code Args: iso_code: ISO 3166-1 Alpha-2 code (e.g., 'BD', 'DE') Returns: str: DAM country code (e.g., 'BG' for BD, 'DE' for DE) """ if not iso_code: return iso_code iso_upper = iso_code.upper() # Check if we have a mapping if iso_upper in self.country_mappings: dam_code = self.country_mappings[iso_upper] if dam_code != iso_upper: logger.info("Country code mapping: {} (ISO) -> {} (DAM)".format(iso_upper, dam_code)) return dam_code else: # No mapping found, use ISO code as-is logger.debug("No mapping for country code: {} (using as-is)".format(iso_upper)) return iso_upper def _build_fields_from_filename(self, parsed_filename, clean_filename): """ Build ALL metadata fields from parsed filename Used in folder-only mode (tracking ID with -N suffix) Note: Uses codes directly for now. Can add lookup tables later for brand_code->brand_name, country_code->country_name, etc. """ fields = [] # ASSET NAME fields.append({ 'id': 'ARTESIA.FIELD.ASSET NAME', 'value': {'value': {'value': clean_filename}} }) # DESCRIPTION (from subject_title) if parsed_filename.get('subject_title'): fields.append({ 'id': 'ARTESIA.FIELD.ASSET DESCRIPTION', 'value': {'value': {'value': parsed_filename['subject_title']}} }) # BRAND (use code for now, could add lookup later) if parsed_filename.get('brand_code'): fields.append({ 'id': 'FERRERO.FIELD.BRAND', 'value': {'value': {'value': parsed_filename['brand_code']}} }) # COUNTRY (map ISO code to DAM code) if parsed_filename.get('country_code'): dam_country_code = self._map_country_code(parsed_filename['country_code']) fields.append({ 'id': 'FERRERO.FIELD.COUNTRY', 'value': {'value': {'value': dam_country_code}} }) # LANGUAGE (use code for now) if parsed_filename.get('language_code'): fields.append({ 'id': 'FERRERO.FIELD.LANGUAGES', 'value': {'value': {'value': parsed_filename['language_code']}} }) # ASSET TYPE (use code for now) if parsed_filename.get('asset_type'): fields.append({ 'id': 'FERRERO.FIELD.ASSET TYPE', 'value': {'value': {'value': parsed_filename['asset_type']}} }) # STATE (force to Local) fields.append({ 'id': 'FERRERO.FIELD.STATE', 'value': {'value': {'value': 'Local'}} }) logger.info("Built {} fields from filename (folder-only mode)".format(len(fields))) return fields def _get_field_value(self, field): """Get field value handling different structures""" if 'value' in field: if isinstance(field['value'], dict): if 'value' in field['value'] and isinstance(field['value']['value'], dict): if 'value' in field['value']['value']: return field['value']['value']['value'] elif 'field_value' in field['value']['value']: return field['value']['value']['field_value'].get('value') return None def _set_field_value(self, field, value): """Set field value handling different structures""" import json field_id = field.get('id', 'UNKNOWN') logger.info("_set_field_value called for: {} with value: {}".format(field_id, value)) logger.info("Current field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None')) if 'value' in field: if isinstance(field['value'], dict): # Try nested structure first (most common) if 'value' in field['value'] and isinstance(field['value']['value'], dict): if 'value' in field['value']['value']: field['value']['value']['value'] = value logger.info("Set via field['value']['value']['value']") elif 'field_value' in field['value']['value']: field['value']['value']['field_value']['value'] = value logger.info("Set via field['value']['value']['field_value']['value']") else: # If nested dict is empty, create the value structure field['value']['value'] = {'value': value} logger.info("Created field['value']['value'] = {{'value': {}}}".format(value)) else: # If value dict is empty or doesn't have nested value, create it field['value'] = {'value': {'value': value}} logger.info("Created field['value'] = {{'value': {{'value': {}}}}}".format(value)) logger.info("After setting, field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None')) def _update_creativex_fields(self, mvp_fields, box_metadata): """ Update CreativeX fields from Box metadata template Args: mvp_fields: List of MVP fields box_metadata: dict with 'score' and 'url' from Box template Returns: Updated mvp_fields list """ # Map Box metadata to DAM field IDs creativex_mapping = { 'score': 'FERRERO.TAB.FIELD.CREATIVEX', # Platform > Rating (%) 'url': 'FERRERO.FIELD.CREATIVEX LINK' # CreativeX Hyperlink } if box_metadata.get('score'): # Update CreativeX Score field (tabular field structure) logger.info("Updating CreativeX Score from database: {}".format(box_metadata['score'])) score_field_found = False for field in mvp_fields: if field.get('id') == 'FERRERO.TAB.FIELD.CREATIVEX': score_field_found = True try: # Log field structure before setting logger.info("CREATIVEX Score field structure: {}".format(field.get('value', {}).keys() if isinstance(field.get('value'), dict) else 'not a dict')) self._set_field_value(field, box_metadata['score']) logger.info("Set CREATIVEX Score to: {}".format(box_metadata['score'])) except Exception as e: logger.error("Failed to set CreativeX Score: {}".format(str(e))) import traceback logger.error(traceback.format_exc()) break if not score_field_found: logger.warning("CREATIVEX Score field not found in master metadata - adding it now") # Create the field structure (tabular field) creativex_score_field = { 'id': 'FERRERO.TAB.FIELD.CREATIVEX', 'name': 'Rating (%)', 'type': 'com.artesia.metadata.MetadataTableField', 'parent_table_id': 'FERRERO.TABULAR.FIELD.PLATFORMRATING', 'value': { 'value': { 'value': box_metadata['score'] } }, 'data_type': 'INTEGER', 'required': False } mvp_fields.append(creativex_score_field) logger.info("Added CREATIVEX Score field with value: {}".format(box_metadata['score'])) if box_metadata.get('url'): # Update CreativeX URL field logger.info("Updating CreativeX URL from database: {}".format(box_metadata['url'])) url_field_found = False for field in mvp_fields: if field.get('id') == 'FERRERO.FIELD.CREATIVEX LINK': url_field_found = True try: # Log field structure before setting logger.info("CREATIVEX URL field structure: {}".format(field.get('value', {}).keys() if isinstance(field.get('value'), dict) else 'not a dict')) self._set_field_value(field, box_metadata['url']) logger.info("Set CREATIVEX LINK to: {}".format(box_metadata['url'])) except Exception as e: logger.error("Failed to set CreativeX URL: {}".format(str(e))) import traceback logger.error(traceback.format_exc()) break if not url_field_found: logger.warning("CREATIVEX URL field not found in master metadata - adding it now") # Create the field structure (text field) creativex_url_field = { 'id': 'FERRERO.FIELD.CREATIVEX LINK', 'name': 'CreativeX Hyperlink', 'type': 'com.artesia.metadata.MetadataField', 'value': { 'value': { 'value': box_metadata['url'] } }, 'data_type': 'CHAR', 'required': False } mvp_fields.append(creativex_url_field) logger.info("Added CREATIVEX URL field with value: {}".format(box_metadata['url'])) return mvp_fields