The filename_updates logic was only updating field['value'] (singular) but for tabular fields like MAIN_LANGUAGES, the DAM reads from field['values'] (plural array). This caused the master's original language (e.g. "Global") to persist instead of the correct language from the filename (e.g. "PL"). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1028 lines
44 KiB
Python
1028 lines
44 KiB
Python
"""
|
|
Metadata Extractor MVP - Extract MVP fields from master metadata
|
|
Ported from PHP MetadataExtractorMVP.php
|
|
Compatible with Python 3.6+
|
|
|
|
***********************************************************************
|
|
*** PRODUCTION VERSION (metadata_extractor_mvp_PROD.py) ***
|
|
*** ***
|
|
*** This version uses SIMPLER tabular field structure ***
|
|
*** (without MetadataTableFieldRow wrapper, like CreativeX field) ***
|
|
*** ***
|
|
*** Use this for PROD environment (dam.ferrero.com) ***
|
|
*** ***
|
|
*** For PPR environment, use metadata_extractor_mvp.py ***
|
|
***********************************************************************
|
|
"""
|
|
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
import os
|
|
from shared.config_loader import load_country_code_mappings
|
|
|
|
logger = logging.getLogger('MetadataExtractorMVP')
|
|
|
|
class MetadataExtractorMVP:
|
|
def __init__(self, field_mappings, config=None):
|
|
"""
|
|
Initialize with field mappings from config
|
|
|
|
Args:
|
|
field_mappings: dict from field_mappings.yaml
|
|
config: Optional config dict with DAM base_url for environment detection
|
|
"""
|
|
self.mvp_field_ids = field_mappings['mvp_fields']
|
|
self.filename_updates = field_mappings.get('filename_updates', {})
|
|
self.forced_values = field_mappings.get('forced_values', {})
|
|
self.defaults = field_mappings.get('defaults', {})
|
|
|
|
# Store DAM base URL for environment detection
|
|
self.dam_base_url = None
|
|
if config and 'dam' in config and 'base_url' in config['dam']:
|
|
self.dam_base_url = config['dam']['base_url']
|
|
logger.info("Environment detection: DAM URL = {}".format(self.dam_base_url))
|
|
|
|
# Load country code mappings (ISO -> DAM codes)
|
|
self.country_mappings = load_country_code_mappings()
|
|
if self.country_mappings:
|
|
logger.info("Loaded {} country code mappings (ISO->DAM)".format(len(self.country_mappings)))
|
|
|
|
# Load asset type mappings (3-letter codes -> DAM codes)
|
|
self.asset_type_mappings = self._load_asset_type_mappings()
|
|
if self.asset_type_mappings:
|
|
logger.info("Loaded {} asset type mappings (3-letter->DAM)".format(len(self.asset_type_mappings)))
|
|
|
|
def extract_mvp_fields(self, master_metadata):
|
|
"""
|
|
Extract only MVP fields from full master metadata
|
|
|
|
Args:
|
|
master_metadata: Complete DAM asset metadata
|
|
|
|
Returns:
|
|
List of MVP field objects
|
|
"""
|
|
extracted_fields = []
|
|
found_field_ids = []
|
|
|
|
# Navigate to metadata structure
|
|
# master_metadata is the full asset, need to go to: metadata.metadata_element_list
|
|
metadata_list = []
|
|
|
|
if isinstance(master_metadata, dict):
|
|
if 'metadata' in master_metadata and 'metadata_element_list' in master_metadata['metadata']:
|
|
metadata_list = master_metadata['metadata']['metadata_element_list']
|
|
logger.info("Using master_metadata['metadata']['metadata_element_list']")
|
|
|
|
logger.info("Searching through {} categories for MVP fields".format(len(metadata_list)))
|
|
|
|
# Search through categories for MVP fields
|
|
for item in metadata_list:
|
|
if 'metadata_element_list' in item:
|
|
# Category with nested fields
|
|
for field in item['metadata_element_list']:
|
|
field_id = field.get('id')
|
|
if field_id in self.mvp_field_ids:
|
|
extracted_fields.append(field)
|
|
found_field_ids.append(field_id)
|
|
logger.debug("Found MVP field: {}".format(field_id))
|
|
elif 'id' in item and item['id'] in self.mvp_field_ids:
|
|
# Direct field
|
|
extracted_fields.append(item)
|
|
found_field_ids.append(item['id'])
|
|
logger.debug("Found direct MVP field: {}".format(item['id']))
|
|
|
|
# Log results
|
|
missing = [f for f in self.mvp_field_ids if f not in found_field_ids]
|
|
logger.info("Found {}/{} MVP fields".format(len(found_field_ids), len(self.mvp_field_ids)))
|
|
|
|
if missing:
|
|
logger.info("Missing fields: {}".format(', '.join(missing[:5])))
|
|
|
|
return extracted_fields
|
|
|
|
def build_mvp_asset_representation(self, master_metadata, clean_filename, parsed_filename, box_metadata=None, tracking_mode='full', master_opentext_id=None):
|
|
"""
|
|
Build asset representation with MVP fields + updates from filename
|
|
|
|
Args:
|
|
master_metadata: Full master asset metadata
|
|
clean_filename: Clean filename (stripped)
|
|
parsed_filename: Parsed V2 filename dict
|
|
box_metadata: Optional Box metadata
|
|
tracking_mode: 'full' (inherit all metadata) or 'folder_only' (only use folder)
|
|
master_opentext_id: Optional DAM Asset ID of master asset (for derivative tracking)
|
|
|
|
Returns:
|
|
Asset representation dict ready for upload
|
|
"""
|
|
if tracking_mode == 'full':
|
|
# FULL INHERITANCE MODE - Standard behavior
|
|
logger.info("Full inheritance mode - using master metadata")
|
|
# Extract MVP fields from master
|
|
mvp_fields = self.extract_mvp_fields(master_metadata)
|
|
|
|
# Update fields from filename and forced values
|
|
mvp_fields = self._update_fields(mvp_fields, clean_filename, parsed_filename)
|
|
|
|
elif tracking_mode == 'folder_only':
|
|
# FOLDER ONLY MODE - New asset, only use upload folder
|
|
logger.info("Folder-only mode (-N suffix) - building metadata from filename only")
|
|
logger.warning("Note: Upload folder comes from master, all other metadata from filename")
|
|
|
|
# Start with empty fields, build from filename
|
|
mvp_fields = []
|
|
mvp_fields = self._build_fields_from_filename(parsed_filename, clean_filename)
|
|
|
|
# Add missing MVP fields with defaults (both modes)
|
|
mvp_fields = self._add_missing_fields(mvp_fields, parsed_filename)
|
|
|
|
# Update CreativeX fields from Box metadata if provided
|
|
if box_metadata:
|
|
mvp_fields = self._update_creativex_fields(mvp_fields, box_metadata)
|
|
|
|
# Add Master Asset ID field if provided (derivative tracking)
|
|
if master_opentext_id:
|
|
mvp_fields = self._add_master_asset_id_field(mvp_fields, master_opentext_id)
|
|
logger.info("Added Master Asset ID field: {}".format(master_opentext_id))
|
|
|
|
# Add FERRERO.MASTERASSETIDS if not present (Issue #1 from comparison)
|
|
mvp_fields = self._ensure_master_asset_ids_field(mvp_fields, master_opentext_id)
|
|
|
|
# Clean metadata structure to match client reference (Issue #2 and #3)
|
|
mvp_fields = self._clean_metadata_structure(mvp_fields)
|
|
|
|
# Build asset representation
|
|
asset_rep = {
|
|
'asset_resource': {
|
|
'asset': {
|
|
'metadata': {
|
|
'metadata_element_list': mvp_fields
|
|
},
|
|
'metadata_model_id': 'ECOMMERCE',
|
|
'security_policy_list': [
|
|
{'id': 1594}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
|
|
logger.info("Built MVP asset representation with {} fields".format(len(mvp_fields)))
|
|
|
|
return asset_rep
|
|
|
|
def _update_fields(self, mvp_fields, clean_filename, parsed_filename):
|
|
"""Update specific fields from filename and forced values"""
|
|
|
|
# Process filename_updates from configuration
|
|
for field_id, config in self.filename_updates.items():
|
|
source = config.get('source')
|
|
transform = config.get('transform', '')
|
|
|
|
# Get value from appropriate source
|
|
if source == 'clean_filename':
|
|
value = clean_filename
|
|
elif source and parsed_filename:
|
|
value = parsed_filename.get(source)
|
|
else:
|
|
continue
|
|
|
|
if not value:
|
|
continue
|
|
|
|
# Apply transform if specified
|
|
if transform == 'uppercase':
|
|
value = value.upper()
|
|
elif transform == 'lowercase':
|
|
value = value.lower()
|
|
|
|
# Apply asset type mapping if this is the asset type field
|
|
if field_id == 'FERRERO.FIELD.MKTG.ASSET TYPE' and source == 'asset_type':
|
|
value = self._map_asset_type(value)
|
|
|
|
# Update the field
|
|
for field in mvp_fields:
|
|
if field.get('id') == field_id:
|
|
# For tabular fields (like MAIN_LANGUAGES), update the 'values' array
|
|
# The DAM reads from 'values' (plural), not 'value' (singular)
|
|
if field.get('type') == 'com.artesia.metadata.MetadataTableField' or 'values' in field:
|
|
field['values'] = [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'is_locked': False,
|
|
'value': {
|
|
'expired_value': False,
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': value
|
|
},
|
|
'type': 'com.artesia.metadata.DomainValue'
|
|
}
|
|
}
|
|
]
|
|
logger.info("Updated tabular field {} values array from filename: {}".format(field_id, value))
|
|
else:
|
|
self._set_field_value(field, value)
|
|
logger.info("Updated {} from filename: {}".format(field_id, value))
|
|
break
|
|
|
|
# Apply country code mapping (ISO -> DAM codes)
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'FERRERO.FIELD.COUNTRY':
|
|
current_value = self._get_field_value(field)
|
|
if current_value:
|
|
mapped_value = self._map_country_code(current_value)
|
|
if mapped_value != current_value:
|
|
self._set_field_value(field, mapped_value)
|
|
logger.info("Mapped country code: {} -> {}".format(current_value, mapped_value))
|
|
|
|
# Apply forced values from configuration
|
|
for field_id, forced_value in self.forced_values.items():
|
|
for field in mvp_fields:
|
|
if field.get('id') == field_id:
|
|
self._set_field_value(field, forced_value)
|
|
logger.info("Set {} to {}".format(field_id, forced_value))
|
|
break
|
|
|
|
|
|
|
|
# Set Asset Validity Dates (Start = Today, End = Today + 1 Year)
|
|
# Field 4: Date the asset was uploaded
|
|
# Field 5: Add 1 year from date provided above
|
|
try:
|
|
today = datetime.now()
|
|
one_year_later = today + timedelta(days=365)
|
|
|
|
# Convert to US Date Format (MM/DD/YYYY)
|
|
# This is the format the DAM expects for date fields
|
|
start_date_str = today.strftime('%m/%d/%Y')
|
|
end_date_str = one_year_later.strftime('%m/%d/%Y')
|
|
|
|
date_fields = {
|
|
'FERRERO.FIELD.ASSET VALIDITY START PERIOD': start_date_str,
|
|
'FERRERO.FIELD.ASSET VALIDITY END PERIOD': end_date_str
|
|
}
|
|
|
|
for field_id, value in date_fields.items():
|
|
field_found = False
|
|
for field in mvp_fields:
|
|
if field.get('id') == field_id:
|
|
# Use specialized method for date fields
|
|
self._set_date_field_value(field, value)
|
|
logger.info("Set {} to {} (Upload Date Logic)".format(field_id, value))
|
|
field_found = True
|
|
break
|
|
|
|
if not field_found:
|
|
# Add new date field with proper structure (string type per client's asset_representation.json)
|
|
mvp_fields.append({
|
|
'id': field_id,
|
|
'type': 'com.artesia.metadata.MetadataField',
|
|
'value': {
|
|
'value': {
|
|
'type': 'string',
|
|
'value': value
|
|
}
|
|
}
|
|
})
|
|
logger.info("Added {} with value {} (Upload Date Logic)".format(field_id, value))
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to set validity dates: {}".format(str(e)))
|
|
|
|
return mvp_fields
|
|
|
|
def _add_missing_fields(self, mvp_fields, parsed_filename):
|
|
"""Add missing MVP fields from filename or defaults"""
|
|
field_ids = [f.get('id') for f in mvp_fields]
|
|
|
|
# Handle MAIN_LANGUAGES field
|
|
if parsed_filename and parsed_filename.get('language_code'):
|
|
language = parsed_filename['language_code'].upper()
|
|
|
|
# Check if MAIN_LANGUAGES already exists (possibly with null value from Box webhook)
|
|
existing_main_lang_idx = None
|
|
for idx, field in enumerate(mvp_fields):
|
|
if field.get('id') == 'MAIN_LANGUAGES':
|
|
existing_main_lang_idx = idx
|
|
break
|
|
|
|
# Create MAIN_LANGUAGES structure - PROD version (simpler structure like CreativeX)
|
|
main_languages_field = {
|
|
'id': 'MAIN_LANGUAGES',
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.MAIN LANGUAGES',
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': language
|
|
},
|
|
'type': 'com.artesia.metadata.DomainValue'
|
|
}
|
|
}
|
|
]
|
|
}
|
|
|
|
if existing_main_lang_idx is not None:
|
|
# Replace existing MAIN_LANGUAGES (likely has null value from Box webhook)
|
|
logger.info("Replacing existing MAIN_LANGUAGES with: {}".format(language))
|
|
mvp_fields[existing_main_lang_idx] = main_languages_field
|
|
else:
|
|
# Add new MAIN_LANGUAGES
|
|
logger.info("Adding MAIN_LANGUAGES: {}".format(language))
|
|
mvp_fields.append(main_languages_field)
|
|
elif 'MAIN_LANGUAGES' in field_ids:
|
|
# MAIN_LANGUAGES exists but we have no language_code - remove it to avoid null error
|
|
logger.warning("Removing MAIN_LANGUAGES field - no language_code available from filename")
|
|
mvp_fields[:] = [f for f in mvp_fields if f.get('id') != 'MAIN_LANGUAGES']
|
|
|
|
# Add other missing fields with defaults
|
|
field_ids = [f.get('id') for f in mvp_fields]
|
|
|
|
for field_id, default_value in self.defaults.items():
|
|
if field_id not in field_ids:
|
|
logger.info("Adding {} with default: {}".format(field_id, default_value))
|
|
|
|
# Check if it's a tabular field (contains .TABULAR. in parent table ID)
|
|
is_tabular = 'TABULAR' in field_id or field_id in [
|
|
'FERRERO.FIELD.ASSETCOMPLIANCE', 'MARKETING_TAG'
|
|
]
|
|
|
|
if is_tabular:
|
|
# Use simpler structure for tabular fields - PROD version (like CreativeX)
|
|
mvp_fields.append({
|
|
'id': field_id,
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.' + field_id.split('.')[-1],
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': default_value
|
|
},
|
|
'type': 'com.artesia.metadata.DomainValue'
|
|
}
|
|
}
|
|
]
|
|
})
|
|
else:
|
|
mvp_fields.append({
|
|
'id': field_id,
|
|
'type': 'com.artesia.metadata.MetadataField',
|
|
'value': {
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'type': 'string',
|
|
'value': default_value
|
|
}
|
|
}
|
|
})
|
|
|
|
return mvp_fields
|
|
|
|
def _map_country_code(self, iso_code):
|
|
"""
|
|
Map ISO country code to DAM country code
|
|
|
|
Args:
|
|
iso_code: ISO 3166-1 Alpha-2 code (e.g., 'BD', 'DE')
|
|
|
|
Returns:
|
|
str: DAM country code (e.g., 'BG' for BD, 'DE' for DE)
|
|
"""
|
|
if not iso_code:
|
|
return iso_code
|
|
|
|
iso_upper = iso_code.upper()
|
|
|
|
# Check if we have a mapping
|
|
if iso_upper in self.country_mappings:
|
|
dam_code = self.country_mappings[iso_upper]
|
|
if dam_code != iso_upper:
|
|
logger.info("Country code mapping: {} (ISO) -> {} (DAM)".format(iso_upper, dam_code))
|
|
return dam_code
|
|
else:
|
|
# No mapping found, use ISO code as-is
|
|
logger.debug("No mapping for country code: {} (using as-is)".format(iso_upper))
|
|
return iso_upper
|
|
|
|
def _load_asset_type_mappings(self):
|
|
"""
|
|
Load asset type mappings: 3-letter codes -> DAM codes
|
|
|
|
Returns:
|
|
dict: 3-letter code -> DAM code mapping
|
|
"""
|
|
import yaml
|
|
mapping_path = 'config/asset_type_mappings.yaml'
|
|
|
|
try:
|
|
with open(mapping_path, 'r') as f:
|
|
mappings = yaml.safe_load(f)
|
|
return mappings if mappings else {}
|
|
except Exception as e:
|
|
logger.warning("Could not load asset type mappings: {}".format(str(e)))
|
|
return {}
|
|
|
|
def _map_asset_type(self, three_letter_code):
|
|
"""
|
|
Map 3-letter asset type code to DAM code
|
|
|
|
Args:
|
|
three_letter_code: 3-letter code (e.g., 'EHI', 'IMG', 'TVC')
|
|
|
|
Returns:
|
|
DAM code (e.g., 'heroimage', 'keyvisual', 'tvc')
|
|
"""
|
|
if not three_letter_code:
|
|
return three_letter_code
|
|
|
|
code_upper = three_letter_code.upper()
|
|
|
|
# Check if we have a mapping
|
|
if code_upper in self.asset_type_mappings:
|
|
dam_code = self.asset_type_mappings[code_upper]
|
|
logger.info("Asset type mapping: {} -> {}".format(code_upper, dam_code))
|
|
return dam_code
|
|
|
|
# No mapping - return as-is
|
|
logger.warning("No mapping for asset type: {} - using as-is (may fail DAM validation)".format(code_upper))
|
|
return three_letter_code
|
|
|
|
def _build_fields_from_filename(self, parsed_filename, clean_filename):
|
|
"""
|
|
Build ALL metadata fields from parsed filename
|
|
Used in folder-only mode (tracking ID with -N suffix)
|
|
|
|
Note: Uses codes directly for now. Can add lookup tables later
|
|
for brand_code->brand_name, country_code->country_name, etc.
|
|
"""
|
|
fields = []
|
|
|
|
# ASSET NAME
|
|
fields.append({
|
|
'id': 'ARTESIA.FIELD.ASSET NAME',
|
|
'value': {'value': {'type': 'string', 'value': clean_filename}}
|
|
})
|
|
|
|
# DESCRIPTION (from subject_title)
|
|
if parsed_filename.get('subject_title'):
|
|
fields.append({
|
|
'id': 'ARTESIA.FIELD.ASSET DESCRIPTION',
|
|
'value': {'value': {'type': 'string', 'value': parsed_filename['subject_title']}}
|
|
})
|
|
|
|
# BRAND (use code for now, could add lookup later)
|
|
if parsed_filename.get('brand_code'):
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.BRAND',
|
|
'value': {'value': {'type': 'string', 'value': parsed_filename['brand_code']}}
|
|
})
|
|
|
|
# COUNTRY (map ISO code to DAM code)
|
|
if parsed_filename.get('country_code'):
|
|
dam_country_code = self._map_country_code(parsed_filename['country_code'])
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.COUNTRY',
|
|
'value': {'value': {'value': dam_country_code}}
|
|
})
|
|
|
|
# LANGUAGE (use code for now)
|
|
if parsed_filename.get('language_code'):
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.LANGUAGES',
|
|
'value': {'value': {'value': parsed_filename['language_code']}}
|
|
})
|
|
|
|
# ASSET TYPE (use code for now)
|
|
if parsed_filename.get('asset_type'):
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.ASSET TYPE',
|
|
'value': {'value': {'value': parsed_filename['asset_type']}}
|
|
})
|
|
|
|
# STATE (force to Local)
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.STATE',
|
|
'value': {'value': {'value': 'Local'}}
|
|
})
|
|
|
|
logger.info("Built {} fields from filename (folder-only mode)".format(len(fields)))
|
|
|
|
return fields
|
|
|
|
def _get_field_value(self, field):
|
|
"""Get field value handling different structures"""
|
|
if 'value' in field:
|
|
if isinstance(field['value'], dict):
|
|
if 'value' in field['value'] and isinstance(field['value']['value'], dict):
|
|
if 'value' in field['value']['value']:
|
|
return field['value']['value']['value']
|
|
elif 'field_value' in field['value']['value']:
|
|
return field['value']['value']['field_value'].get('value')
|
|
return None
|
|
|
|
def _set_field_value(self, field, value):
|
|
"""Set field value handling different structures"""
|
|
import json
|
|
field_id = field.get('id', 'UNKNOWN')
|
|
|
|
logger.info("_set_field_value called for: {} with value: {}".format(field_id, value))
|
|
logger.info("Current field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None'))
|
|
|
|
# Check if this is a domain field
|
|
is_domain_field = field.get('domained', False) or field.get('domain_id')
|
|
|
|
if 'value' in field:
|
|
if isinstance(field['value'], dict):
|
|
# Try nested structure first (most common)
|
|
if 'value' in field['value'] and isinstance(field['value']['value'], dict):
|
|
if 'value' in field['value']['value']:
|
|
field['value']['value']['value'] = value
|
|
# Ensure type is set for CreativeX URL field
|
|
if field_id == 'FERRERO.FIELD.CREATIVEX LINK' and 'type' not in field['value']['value']:
|
|
field['value']['value']['type'] = 'string'
|
|
logger.info("Set via field['value']['value']['value']")
|
|
elif 'field_value' in field['value']['value']:
|
|
# DomainValue structure - update field_value
|
|
field['value']['value']['field_value']['value'] = value
|
|
# Also update display_value to match
|
|
if 'display_value' in field['value']['value']:
|
|
field['value']['value']['display_value'] = value
|
|
logger.info("Set via field['value']['value']['field_value']['value'] (DomainValue)")
|
|
else:
|
|
# If nested dict is empty, check if it's a domain field
|
|
if is_domain_field:
|
|
# Create DomainValue structure for domain fields
|
|
field['value']['value'] = {
|
|
'type': 'com.artesia.metadata.DomainValue',
|
|
'active_to': '',
|
|
'active_from': '',
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': value
|
|
},
|
|
'display_value': value,
|
|
'expired_value': False
|
|
}
|
|
logger.info("Created DomainValue structure for domain field")
|
|
else:
|
|
# Create simple string structure for non-domain fields
|
|
field['value']['value'] = {'type': 'string', 'value': value}
|
|
logger.info("Created simple string structure for non-domain field")
|
|
else:
|
|
# If value dict is empty or doesn't have nested value, create it
|
|
if is_domain_field:
|
|
# Create full DomainValue structure for domain fields
|
|
field['value'] = {
|
|
'value': {
|
|
'type': 'com.artesia.metadata.DomainValue',
|
|
'active_to': '',
|
|
'active_from': '',
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': value
|
|
},
|
|
'display_value': value,
|
|
'expired_value': False
|
|
},
|
|
'is_locked': False,
|
|
'domain_value': True,
|
|
'cascading_domain_value': False
|
|
}
|
|
logger.info("Created full DomainValue structure from scratch")
|
|
else:
|
|
# Create simple structure for non-domain fields
|
|
field['value'] = {'value': {'type': 'string', 'value': value}}
|
|
logger.info("Created simple string structure from scratch")
|
|
|
|
logger.info("After setting, field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None'))
|
|
|
|
def _set_date_field_value(self, field, date_string):
|
|
"""
|
|
Set date field value with proper type for DAM API
|
|
|
|
Args:
|
|
field: Field dict to update
|
|
date_string: Date as ISO string (YYYY-MM-DDTHH:mm:ss)
|
|
"""
|
|
field_id = field.get('id', 'UNKNOWN')
|
|
|
|
logger.info("_set_date_field_value called for: {} with value: {}".format(
|
|
field_id, date_string
|
|
))
|
|
|
|
if 'value' in field:
|
|
if isinstance(field['value'], dict):
|
|
if 'value' in field['value'] and isinstance(field['value']['value'], dict):
|
|
# Update existing nested structure
|
|
field['value']['value']['type'] = 'string'
|
|
field['value']['value']['value'] = date_string
|
|
logger.info("Set via field['value']['value'] with type 'string'")
|
|
else:
|
|
# Create nested structure
|
|
field['value'] = {
|
|
'value': {
|
|
'type': 'string',
|
|
'value': date_string
|
|
}
|
|
}
|
|
logger.info("Created field['value'] with type 'string'")
|
|
else:
|
|
# Create value structure from scratch
|
|
field['value'] = {
|
|
'value': {
|
|
'type': 'string',
|
|
'value': date_string
|
|
}
|
|
}
|
|
logger.info("Created field['value'] from scratch with type 'string'")
|
|
|
|
def _update_creativex_fields(self, mvp_fields, box_metadata):
|
|
"""
|
|
Update CreativeX fields from Box metadata template
|
|
|
|
Args:
|
|
mvp_fields: List of MVP fields
|
|
box_metadata: dict with 'score' and 'url' from Box template
|
|
|
|
Returns:
|
|
Updated mvp_fields list
|
|
"""
|
|
# Map Box metadata to DAM field IDs
|
|
creativex_mapping = {
|
|
'score': 'FERRERO.TAB.FIELD.CREATIVEX', # Platform > Rating (%)
|
|
'url': 'FERRERO.FIELD.CREATIVEX LINK' # CreativeX Hyperlink
|
|
}
|
|
|
|
# Only process CreativeX if we have valid score and platforms
|
|
score_val = box_metadata.get('score')
|
|
platforms = box_metadata.get('platforms', [])
|
|
|
|
# Skip if score is 0, '0', None, or no platforms (avoid invalid "Unknown" default)
|
|
if score_val and str(score_val) != '0' and platforms:
|
|
# Update CreativeX Score field (tabular field structure)
|
|
# New structure: Platform^Score (e.g., "Google Ads^100")
|
|
logger.info("Processing CreativeX score: {} with platforms: {}".format(score_val, platforms))
|
|
|
|
# Construct value objects for each platform
|
|
value_objects = []
|
|
for platform in platforms:
|
|
combined_value = "{}^{}".format(platform, score_val)
|
|
|
|
value_obj = {
|
|
"cascading_domain_value": True,
|
|
"domain_value": False,
|
|
"is_locked": False,
|
|
"value": {
|
|
"field_value": {
|
|
"type": "string",
|
|
"value": combined_value
|
|
},
|
|
"type": "com.artesia.metadata.CascadingDomainValue"
|
|
}
|
|
}
|
|
value_objects.append(value_obj)
|
|
logger.info("Constructed CreativeX value: {}".format(combined_value))
|
|
|
|
score_field_found = False
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'FERRERO.TAB.FIELD.CREATIVEX':
|
|
score_field_found = True
|
|
try:
|
|
# Replace values list with new list of objects
|
|
field['values'] = value_objects
|
|
logger.info("Set CREATIVEX field with {} values".format(len(value_objects)))
|
|
except Exception as e:
|
|
logger.error("Failed to set CreativeX Score: {}".format(str(e)))
|
|
import traceback
|
|
logger.error(traceback.format_exc())
|
|
break
|
|
|
|
if not score_field_found:
|
|
logger.warning("CREATIVEX Score field not found in master metadata - adding it now")
|
|
# Create the field structure (tabular field)
|
|
creativex_score_field = {
|
|
"type": "com.artesia.metadata.MetadataTableField",
|
|
"id": "FERRERO.TAB.FIELD.CREATIVEX",
|
|
"parent_table_id": "FERRERO.TABULAR.FIELD.CREATIVEX",
|
|
"values": value_objects
|
|
}
|
|
mvp_fields.append(creativex_score_field)
|
|
logger.info("Added CREATIVEX Score field with {} values".format(len(value_objects)))
|
|
else:
|
|
logger.info("Skipping CreativeX score field - Score: {}, Platforms: {} (will not add invalid 'Unknown' value)".format(
|
|
score_val if score_val else 'None', platforms if platforms else 'None'
|
|
))
|
|
|
|
if box_metadata.get('url'):
|
|
# Update CreativeX URL field
|
|
logger.info("Updating CreativeX URL from database: {}".format(box_metadata['url']))
|
|
url_field_found = False
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'FERRERO.FIELD.CREATIVEX LINK':
|
|
url_field_found = True
|
|
try:
|
|
# Log field structure before setting
|
|
logger.info("CREATIVEX URL field structure: {}".format(field.get('value', {}).keys() if isinstance(field.get('value'), dict) else 'not a dict'))
|
|
self._set_field_value(field, box_metadata['url'])
|
|
logger.info("Set CREATIVEX LINK to: {}".format(box_metadata['url']))
|
|
except Exception as e:
|
|
logger.error("Failed to set CreativeX URL: {}".format(str(e)))
|
|
import traceback
|
|
logger.error(traceback.format_exc())
|
|
break
|
|
|
|
if not url_field_found:
|
|
logger.warning("CREATIVEX URL field not found in master metadata - adding it now")
|
|
# Create the field structure (text field)
|
|
creativex_url_field = {
|
|
'id': 'FERRERO.FIELD.CREATIVEX LINK',
|
|
'name': 'CreativeX Hyperlink',
|
|
'type': 'com.artesia.metadata.MetadataField',
|
|
'value': {
|
|
'value': {
|
|
'type': 'string',
|
|
'value': box_metadata['url']
|
|
}
|
|
},
|
|
'data_type': 'CHAR',
|
|
'required': False
|
|
}
|
|
mvp_fields.append(creativex_url_field)
|
|
logger.info("Added CREATIVEX URL field with value: {}".format(box_metadata['url']))
|
|
|
|
return mvp_fields
|
|
|
|
def _add_master_asset_id_field(self, mvp_fields, master_opentext_id):
|
|
"""
|
|
Add Master Asset ID field (configurable via MASTER_ASSET_ID_FIELD in .env)
|
|
|
|
Args:
|
|
mvp_fields: List of MVP fields
|
|
master_opentext_id: DAM Asset ID of the master asset
|
|
|
|
Returns:
|
|
Updated mvp_fields list
|
|
"""
|
|
# Read configured field ID from environment, default to legacy ARTESIA field
|
|
master_field_id = os.environ.get('MASTER_ASSET_ID_FIELD', 'ARTESIA.FIELD.ASSET_ID')
|
|
|
|
logger.info("Using Master Asset ID field: {} (Value: {})".format(master_field_id, master_opentext_id))
|
|
|
|
# Check if field already exists in MVP fields (update scenario)
|
|
for field in mvp_fields:
|
|
field_id = self._get_field_id(field)
|
|
if field_id == master_field_id:
|
|
# Update existing field value
|
|
# If tabular, we need special handling, but _add_missing_fields usually won't add this
|
|
# so we assume if it exists, we just update the value
|
|
if 'TABULAR' in master_field_id:
|
|
# Tabular field update logic would go here if needed
|
|
# For now, assuming we are creating it new mostly
|
|
pass
|
|
|
|
self._set_field_value(field, master_opentext_id)
|
|
logger.info("Updated existing Master Asset ID field: {}".format(master_opentext_id))
|
|
return mvp_fields
|
|
|
|
# Field doesn't exist - add new field
|
|
# Check if it's a tabular field
|
|
if 'TABULAR' in master_field_id:
|
|
# Construct tabular field structure
|
|
# Logic updated based on Staging Definition:
|
|
# Parent: FERRERO.TABULAR.FIELD.MASTERASSETIDS
|
|
# Child Column: FERRERO.MASTERASSETIDS
|
|
|
|
# Determine child column ID
|
|
if master_field_id == 'FERRERO.TABULAR.FIELD.MASTERASSETIDS':
|
|
child_column_id = 'FERRERO.MASTERASSETIDS'
|
|
else:
|
|
# Fallback for other potential tabular fields
|
|
child_column_id = master_field_id
|
|
|
|
# Use simpler structure for tabular fields - PROD version (like CreativeX)
|
|
new_field = {
|
|
'id': child_column_id,
|
|
'parent_table_id': master_field_id,
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': master_opentext_id
|
|
},
|
|
'type': 'com.artesia.metadata.DomainValue'
|
|
}
|
|
}
|
|
]
|
|
}
|
|
mvp_fields.append(new_field)
|
|
logger.info("Added new TABULAR Master Asset ID field: {} (Column: {})".format(master_field_id, child_column_id))
|
|
|
|
else:
|
|
# Standard Text Field (system field structure per client's asset_representation.json)
|
|
mvp_fields.append({
|
|
'id': master_field_id,
|
|
'type': 'com.artesia.metadata.MetadataField',
|
|
'value': {
|
|
'cascading_domain_value': False,
|
|
'domain_value': False,
|
|
'is_locked': False,
|
|
'value': {
|
|
'type': 'string',
|
|
'value': master_opentext_id
|
|
}
|
|
}
|
|
})
|
|
logger.info("Added new Master Asset ID field: {}".format(master_field_id))
|
|
|
|
return mvp_fields
|
|
|
|
def _get_field_id(self, field):
|
|
"""Extract field ID from field dict"""
|
|
if isinstance(field, dict):
|
|
return field.get('id', '')
|
|
return ''
|
|
|
|
def _is_prod_environment(self):
|
|
"""
|
|
Detect if we're running in PROD vs PPR environment
|
|
|
|
Returns:
|
|
bool: True if PROD, False if PPR or unknown
|
|
"""
|
|
if not self.dam_base_url:
|
|
# Can't detect - assume PROD for safety
|
|
logger.warning("Cannot detect environment (no DAM URL) - assuming PROD")
|
|
return True
|
|
|
|
# PPR uses ppr.dam.ferrero.com, PROD uses dam.ferrero.com
|
|
is_ppr = 'ppr.dam.ferrero.com' in self.dam_base_url.lower()
|
|
is_prod = not is_ppr
|
|
|
|
logger.info("Environment detected: {} (URL: {})".format(
|
|
'PROD' if is_prod else 'PPR',
|
|
self.dam_base_url
|
|
))
|
|
|
|
return is_prod
|
|
|
|
def _ensure_master_asset_ids_field(self, mvp_fields, master_opentext_id):
|
|
"""
|
|
Ensure FERRERO.MASTERASSETIDS field is present when there's a value to track
|
|
Only adds field if master_opentext_id is provided AND we're in PPR environment
|
|
(PROD doesn't have this field configured in metadata schema)
|
|
|
|
Args:
|
|
mvp_fields: List of MVP fields
|
|
master_opentext_id: DAM Asset ID of the master asset (optional)
|
|
|
|
Returns:
|
|
Updated mvp_fields list with FERRERO.MASTERASSETIDS if needed
|
|
"""
|
|
# Skip if no value provided
|
|
if not master_opentext_id:
|
|
logger.info("No master_opentext_id provided - skipping FERRERO.MASTERASSETIDS field")
|
|
return mvp_fields
|
|
|
|
# Skip if PROD environment (field not configured in PROD DAM schema)
|
|
if self._is_prod_environment():
|
|
logger.info("PROD environment detected - skipping FERRERO.MASTERASSETIDS field (not configured in PROD)")
|
|
return mvp_fields
|
|
|
|
# Check if field already exists
|
|
for field in mvp_fields:
|
|
if self._get_field_id(field) == 'FERRERO.MASTERASSETIDS':
|
|
logger.info("FERRERO.MASTERASSETIDS already present")
|
|
return mvp_fields
|
|
|
|
# Field doesn't exist - add it with simpler structure - PROD version (like CreativeX)
|
|
new_field = {
|
|
'id': 'FERRERO.MASTERASSETIDS',
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.MASTERASSETIDS',
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': master_opentext_id
|
|
},
|
|
'type': 'com.artesia.metadata.DomainValue'
|
|
}
|
|
}
|
|
]
|
|
}
|
|
|
|
mvp_fields.append(new_field)
|
|
logger.info("Added FERRERO.MASTERASSETIDS field with value: {}".format(master_opentext_id))
|
|
|
|
return mvp_fields
|
|
|
|
def _clean_metadata_structure(self, mvp_fields):
|
|
"""
|
|
Clean metadata structure to match client reference file (Issues #2 and #3)
|
|
|
|
Fixes:
|
|
1. Remove 'description' property from DomainValue objects (Issue #2)
|
|
2. Remove overpopulated values from optional domain fields (Issue #3)
|
|
|
|
Args:
|
|
mvp_fields: List of MVP fields
|
|
|
|
Returns:
|
|
Cleaned mvp_fields list matching reference structure
|
|
"""
|
|
for field in mvp_fields:
|
|
if not isinstance(field, dict):
|
|
continue
|
|
|
|
field_id = self._get_field_id(field)
|
|
|
|
# Skip tabular fields (they have different structure)
|
|
if field.get('type') == 'com.artesia.metadata.MetadataTableField':
|
|
# Clean DomainValue objects in tabular field values
|
|
if 'values' in field and isinstance(field['values'], list):
|
|
for row in field['values']:
|
|
if isinstance(row, dict) and 'value' in row:
|
|
self._clean_domain_value(row['value'])
|
|
continue
|
|
|
|
# Regular MetadataField
|
|
if 'value' in field and isinstance(field['value'], dict):
|
|
# Check if this is a domain field with a value
|
|
if 'value' in field['value'] and isinstance(field['value']['value'], dict):
|
|
value_obj = field['value']['value']
|
|
|
|
# Clean DomainValue structure (remove 'description')
|
|
self._clean_domain_value(value_obj)
|
|
|
|
# Check if this is an optional domain field with no actual value
|
|
# (like FERRERO.FIELD.MARKETING.SIZE in reference - lines 155-186)
|
|
if self._is_empty_domain_value(value_obj):
|
|
# Remove the nested value object, keep only flags
|
|
logger.info("Cleaning optional domain field (no value): {}".format(field_id))
|
|
field['value'] = {
|
|
'cascading_domain_value': field['value'].get('cascading_domain_value', False),
|
|
'domain_value': field['value'].get('domain_value', False),
|
|
'is_locked': field['value'].get('is_locked', False)
|
|
}
|
|
|
|
return mvp_fields
|
|
|
|
def _clean_domain_value(self, value_obj):
|
|
"""
|
|
Remove 'description' property from DomainValue object (Issue #2)
|
|
|
|
Args:
|
|
value_obj: DomainValue dictionary
|
|
"""
|
|
if isinstance(value_obj, dict):
|
|
# Remove 'description' if present
|
|
if 'description' in value_obj:
|
|
logger.info("Removing 'description' from DomainValue")
|
|
del value_obj['description']
|
|
|
|
def _is_empty_domain_value(self, value_obj):
|
|
"""
|
|
Check if a DomainValue object is empty/unpopulated
|
|
|
|
Empty means: no field_value.value or field_value.value is empty string
|
|
|
|
Args:
|
|
value_obj: DomainValue dictionary
|
|
|
|
Returns:
|
|
True if empty, False if has value
|
|
"""
|
|
if not isinstance(value_obj, dict):
|
|
return True
|
|
|
|
# Check if it's a DomainValue type
|
|
if value_obj.get('type') not in ['com.artesia.metadata.DomainValue', 'com.artesia.metadata.CascadingDomainValue']:
|
|
return False
|
|
|
|
# Check field_value
|
|
field_value = value_obj.get('field_value', {})
|
|
if isinstance(field_value, dict):
|
|
actual_value = field_value.get('value', '')
|
|
# Empty if no value or empty string
|
|
return not actual_value
|
|
|
|
return True
|
|
|