Adds detailed logging to trace exactly how field values are being set
and diagnose why CreativeX score/URL aren't appearing in final JSON.
Changes to _set_field_value():
- Logs field ID being updated
- Logs current field['value'] structure BEFORE setting
- Logs which code path is taken (nested vs created)
- Logs field['value'] structure AFTER setting
- Shows full JSON structure at each step
Output Example:
_set_field_value called for: FERRERO.TAB.FIELD.CREATIVEX with value: 85
Current field['value']: {
"is_locked": false,
"domain_value": false,
...
}
Created field['value'] = {'value': {'value': 85}}
After setting, field['value']: {
"value": {
"value": 85
}
}
Purpose:
Diagnose why CreativeX fields show empty value dicts in asset
representation even though logs say "Set CREATIVEX Score to: 0".
This verbose logging will show:
1. What the field structure looks like before we set it
2. Which code path is executed
3. What the field structure looks like after we set it
4. Whether the value is actually being placed in the right location
Run with --dryrun to see full debug output without uploading.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
434 lines
18 KiB
Python
434 lines
18 KiB
Python
"""
|
|
Metadata Extractor MVP - Extract MVP fields from master metadata
|
|
Ported from PHP MetadataExtractorMVP.php
|
|
Compatible with Python 3.6+
|
|
"""
|
|
|
|
import logging
|
|
from shared.config_loader import load_country_code_mappings
|
|
|
|
logger = logging.getLogger('MetadataExtractorMVP')
|
|
|
|
class MetadataExtractorMVP:
|
|
def __init__(self, field_mappings):
|
|
"""
|
|
Initialize with field mappings from config
|
|
|
|
Args:
|
|
field_mappings: dict from field_mappings.yaml
|
|
"""
|
|
self.mvp_field_ids = field_mappings['mvp_fields']
|
|
self.filename_updates = field_mappings.get('filename_updates', {})
|
|
self.forced_values = field_mappings.get('forced_values', {})
|
|
self.defaults = field_mappings.get('defaults', {})
|
|
|
|
# Load country code mappings (ISO -> DAM codes)
|
|
self.country_mappings = load_country_code_mappings()
|
|
if self.country_mappings:
|
|
logger.info("Loaded {} country code mappings (ISO->DAM)".format(len(self.country_mappings)))
|
|
|
|
def extract_mvp_fields(self, master_metadata):
|
|
"""
|
|
Extract only MVP fields from full master metadata
|
|
|
|
Args:
|
|
master_metadata: Complete DAM asset metadata
|
|
|
|
Returns:
|
|
List of MVP field objects
|
|
"""
|
|
extracted_fields = []
|
|
found_field_ids = []
|
|
|
|
# Navigate to metadata structure
|
|
# master_metadata is the full asset, need to go to: metadata.metadata_element_list
|
|
metadata_list = []
|
|
|
|
if isinstance(master_metadata, dict):
|
|
if 'metadata' in master_metadata and 'metadata_element_list' in master_metadata['metadata']:
|
|
metadata_list = master_metadata['metadata']['metadata_element_list']
|
|
logger.info("Using master_metadata['metadata']['metadata_element_list']")
|
|
|
|
logger.info("Searching through {} categories for MVP fields".format(len(metadata_list)))
|
|
|
|
# Search through categories for MVP fields
|
|
for item in metadata_list:
|
|
if 'metadata_element_list' in item:
|
|
# Category with nested fields
|
|
for field in item['metadata_element_list']:
|
|
field_id = field.get('id')
|
|
if field_id in self.mvp_field_ids:
|
|
extracted_fields.append(field)
|
|
found_field_ids.append(field_id)
|
|
logger.debug("Found MVP field: {}".format(field_id))
|
|
elif 'id' in item and item['id'] in self.mvp_field_ids:
|
|
# Direct field
|
|
extracted_fields.append(item)
|
|
found_field_ids.append(item['id'])
|
|
logger.debug("Found direct MVP field: {}".format(item['id']))
|
|
|
|
# Log results
|
|
missing = [f for f in self.mvp_field_ids if f not in found_field_ids]
|
|
logger.info("Found {}/{} MVP fields".format(len(found_field_ids), len(self.mvp_field_ids)))
|
|
|
|
if missing:
|
|
logger.info("Missing fields: {}".format(', '.join(missing[:5])))
|
|
|
|
return extracted_fields
|
|
|
|
def build_mvp_asset_representation(self, master_metadata, clean_filename, parsed_filename, box_metadata=None, tracking_mode='full'):
|
|
"""
|
|
Build asset representation with MVP fields + updates from filename
|
|
|
|
Args:
|
|
master_metadata: Full master asset metadata
|
|
clean_filename: Clean filename (stripped)
|
|
parsed_filename: Parsed V2 filename dict
|
|
box_metadata: Optional Box metadata
|
|
tracking_mode: 'full' (inherit all metadata) or 'folder_only' (only use folder)
|
|
|
|
Returns:
|
|
Asset representation dict ready for upload
|
|
"""
|
|
if tracking_mode == 'full':
|
|
# FULL INHERITANCE MODE - Standard behavior
|
|
logger.info("Full inheritance mode - using master metadata")
|
|
# Extract MVP fields from master
|
|
mvp_fields = self.extract_mvp_fields(master_metadata)
|
|
|
|
# Update fields from filename and forced values
|
|
mvp_fields = self._update_fields(mvp_fields, clean_filename, parsed_filename)
|
|
|
|
elif tracking_mode == 'folder_only':
|
|
# FOLDER ONLY MODE - New asset, only use upload folder
|
|
logger.info("Folder-only mode (-N suffix) - building metadata from filename only")
|
|
logger.warning("Note: Upload folder comes from master, all other metadata from filename")
|
|
|
|
# Start with empty fields, build from filename
|
|
mvp_fields = []
|
|
mvp_fields = self._build_fields_from_filename(parsed_filename, clean_filename)
|
|
|
|
# Add missing MVP fields with defaults (both modes)
|
|
mvp_fields = self._add_missing_fields(mvp_fields, parsed_filename)
|
|
|
|
# Update CreativeX fields from Box metadata if provided
|
|
if box_metadata:
|
|
mvp_fields = self._update_creativex_fields(mvp_fields, box_metadata)
|
|
|
|
# Build asset representation
|
|
asset_rep = {
|
|
'asset_resource': {
|
|
'asset': {
|
|
'metadata': {
|
|
'metadata_element_list': mvp_fields
|
|
},
|
|
'metadata_model_id': 'ECOMMERCE',
|
|
'security_policy_list': [
|
|
{'id': 1594}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
|
|
logger.info("Built MVP asset representation with {} fields".format(len(mvp_fields)))
|
|
|
|
return asset_rep
|
|
|
|
def _update_fields(self, mvp_fields, clean_filename, parsed_filename):
|
|
"""Update specific fields from filename and forced values"""
|
|
|
|
# Update ASSET NAME
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'ARTESIA.FIELD.ASSET NAME':
|
|
self._set_field_value(field, clean_filename)
|
|
logger.info("Updated ASSET NAME: {}".format(clean_filename))
|
|
|
|
# Update DESCRIPTION from subject_title
|
|
if parsed_filename and parsed_filename.get('subject_title'):
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'ARTESIA.FIELD.ASSET DESCRIPTION':
|
|
self._set_field_value(field, parsed_filename['subject_title'])
|
|
logger.info("Updated DESCRIPTION: {}".format(parsed_filename['subject_title']))
|
|
|
|
# Apply country code mapping (ISO -> DAM codes)
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'FERRERO.FIELD.COUNTRY':
|
|
current_value = self._get_field_value(field)
|
|
if current_value:
|
|
mapped_value = self._map_country_code(current_value)
|
|
if mapped_value != current_value:
|
|
self._set_field_value(field, mapped_value)
|
|
logger.info("Mapped country code: {} -> {}".format(current_value, mapped_value))
|
|
|
|
# Force STATE to Local
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'FERRERO.FIELD.STATE':
|
|
self._set_field_value(field, 'Local')
|
|
logger.info("Set STATE to Local")
|
|
|
|
return mvp_fields
|
|
|
|
def _add_missing_fields(self, mvp_fields, parsed_filename):
|
|
"""Add missing MVP fields from filename or defaults"""
|
|
field_ids = [f.get('id') for f in mvp_fields]
|
|
|
|
# Add MAIN_LANGUAGES if missing
|
|
if 'MAIN_LANGUAGES' not in field_ids and parsed_filename:
|
|
if parsed_filename.get('language_code'):
|
|
language = parsed_filename['language_code'].upper()
|
|
logger.info("Adding MAIN_LANGUAGES: {}".format(language))
|
|
|
|
mvp_fields.append({
|
|
'id': 'MAIN_LANGUAGES',
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.MAIN LANGUAGES',
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': language
|
|
},
|
|
'type': 'com.artesia.metadata.DomainValue'
|
|
}
|
|
}
|
|
]
|
|
})
|
|
|
|
# Add other missing fields with defaults
|
|
field_ids = [f.get('id') for f in mvp_fields]
|
|
|
|
for field_id, default_value in self.defaults.items():
|
|
if field_id not in field_ids:
|
|
logger.info("Adding {} with default: {}".format(field_id, default_value))
|
|
|
|
# Check if it's a tabular field (contains .TABULAR. in parent table ID)
|
|
is_tabular = 'TABULAR' in field_id or field_id in [
|
|
'FERRERO.FIELD.ASSETCOMPLIANCE', 'MARKETING_TAG'
|
|
]
|
|
|
|
if is_tabular:
|
|
mvp_fields.append({
|
|
'id': field_id,
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.' + field_id.split('.')[-1],
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': default_value
|
|
},
|
|
'type': 'com.artesia.metadata.DomainValue'
|
|
}
|
|
}
|
|
]
|
|
})
|
|
else:
|
|
mvp_fields.append({
|
|
'id': field_id,
|
|
'type': 'com.artesia.metadata.MetadataField',
|
|
'value': {
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'type': 'string',
|
|
'value': default_value
|
|
}
|
|
}
|
|
})
|
|
|
|
return mvp_fields
|
|
|
|
def _map_country_code(self, iso_code):
|
|
"""
|
|
Map ISO country code to DAM country code
|
|
|
|
Args:
|
|
iso_code: ISO 3166-1 Alpha-2 code (e.g., 'BD', 'DE')
|
|
|
|
Returns:
|
|
str: DAM country code (e.g., 'BG' for BD, 'DE' for DE)
|
|
"""
|
|
if not iso_code:
|
|
return iso_code
|
|
|
|
iso_upper = iso_code.upper()
|
|
|
|
# Check if we have a mapping
|
|
if iso_upper in self.country_mappings:
|
|
dam_code = self.country_mappings[iso_upper]
|
|
if dam_code != iso_upper:
|
|
logger.info("Country code mapping: {} (ISO) -> {} (DAM)".format(iso_upper, dam_code))
|
|
return dam_code
|
|
else:
|
|
# No mapping found, use ISO code as-is
|
|
logger.debug("No mapping for country code: {} (using as-is)".format(iso_upper))
|
|
return iso_upper
|
|
|
|
def _build_fields_from_filename(self, parsed_filename, clean_filename):
|
|
"""
|
|
Build ALL metadata fields from parsed filename
|
|
Used in folder-only mode (tracking ID with -N suffix)
|
|
|
|
Note: Uses codes directly for now. Can add lookup tables later
|
|
for brand_code->brand_name, country_code->country_name, etc.
|
|
"""
|
|
fields = []
|
|
|
|
# ASSET NAME
|
|
fields.append({
|
|
'id': 'ARTESIA.FIELD.ASSET NAME',
|
|
'value': {'value': {'value': clean_filename}}
|
|
})
|
|
|
|
# DESCRIPTION (from subject_title)
|
|
if parsed_filename.get('subject_title'):
|
|
fields.append({
|
|
'id': 'ARTESIA.FIELD.ASSET DESCRIPTION',
|
|
'value': {'value': {'value': parsed_filename['subject_title']}}
|
|
})
|
|
|
|
# BRAND (use code for now, could add lookup later)
|
|
if parsed_filename.get('brand_code'):
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.BRAND',
|
|
'value': {'value': {'value': parsed_filename['brand_code']}}
|
|
})
|
|
|
|
# COUNTRY (map ISO code to DAM code)
|
|
if parsed_filename.get('country_code'):
|
|
dam_country_code = self._map_country_code(parsed_filename['country_code'])
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.COUNTRY',
|
|
'value': {'value': {'value': dam_country_code}}
|
|
})
|
|
|
|
# LANGUAGE (use code for now)
|
|
if parsed_filename.get('language_code'):
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.LANGUAGES',
|
|
'value': {'value': {'value': parsed_filename['language_code']}}
|
|
})
|
|
|
|
# ASSET TYPE (use code for now)
|
|
if parsed_filename.get('asset_type'):
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.ASSET TYPE',
|
|
'value': {'value': {'value': parsed_filename['asset_type']}}
|
|
})
|
|
|
|
# STATE (force to Local)
|
|
fields.append({
|
|
'id': 'FERRERO.FIELD.STATE',
|
|
'value': {'value': {'value': 'Local'}}
|
|
})
|
|
|
|
logger.info("Built {} fields from filename (folder-only mode)".format(len(fields)))
|
|
|
|
return fields
|
|
|
|
def _get_field_value(self, field):
|
|
"""Get field value handling different structures"""
|
|
if 'value' in field:
|
|
if isinstance(field['value'], dict):
|
|
if 'value' in field['value'] and isinstance(field['value']['value'], dict):
|
|
if 'value' in field['value']['value']:
|
|
return field['value']['value']['value']
|
|
elif 'field_value' in field['value']['value']:
|
|
return field['value']['value']['field_value'].get('value')
|
|
return None
|
|
|
|
def _set_field_value(self, field, value):
|
|
"""Set field value handling different structures"""
|
|
import json
|
|
field_id = field.get('id', 'UNKNOWN')
|
|
|
|
logger.info("_set_field_value called for: {} with value: {}".format(field_id, value))
|
|
logger.info("Current field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None'))
|
|
|
|
if 'value' in field:
|
|
if isinstance(field['value'], dict):
|
|
# Try nested structure first (most common)
|
|
if 'value' in field['value'] and isinstance(field['value']['value'], dict):
|
|
if 'value' in field['value']['value']:
|
|
field['value']['value']['value'] = value
|
|
logger.info("Set via field['value']['value']['value']")
|
|
elif 'field_value' in field['value']['value']:
|
|
field['value']['value']['field_value']['value'] = value
|
|
logger.info("Set via field['value']['value']['field_value']['value']")
|
|
else:
|
|
# If nested dict is empty, create the value structure
|
|
field['value']['value'] = {'value': value}
|
|
logger.info("Created field['value']['value'] = {{'value': {}}}".format(value))
|
|
else:
|
|
# If value dict is empty or doesn't have nested value, create it
|
|
field['value'] = {'value': {'value': value}}
|
|
logger.info("Created field['value'] = {{'value': {{'value': {}}}}}".format(value))
|
|
|
|
logger.info("After setting, field['value']: {}".format(json.dumps(field.get('value'), indent=2) if field.get('value') else 'None'))
|
|
|
|
def _update_creativex_fields(self, mvp_fields, box_metadata):
|
|
"""
|
|
Update CreativeX fields from Box metadata template
|
|
|
|
Args:
|
|
mvp_fields: List of MVP fields
|
|
box_metadata: dict with 'score' and 'url' from Box template
|
|
|
|
Returns:
|
|
Updated mvp_fields list
|
|
"""
|
|
# Map Box metadata to DAM field IDs
|
|
creativex_mapping = {
|
|
'score': 'FERRERO.TAB.FIELD.CREATIVEX', # Platform > Rating (%)
|
|
'url': 'FERRERO.FIELD.CREATIVEX LINK' # CreativeX Hyperlink
|
|
}
|
|
|
|
if box_metadata.get('score'):
|
|
# Update CreativeX Score field (tabular field structure)
|
|
logger.info("Updating CreativeX Score from database: {}".format(box_metadata['score']))
|
|
score_field_found = False
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'FERRERO.TAB.FIELD.CREATIVEX':
|
|
score_field_found = True
|
|
try:
|
|
# Log field structure before setting
|
|
logger.info("CREATIVEX Score field structure: {}".format(field.get('value', {}).keys() if isinstance(field.get('value'), dict) else 'not a dict'))
|
|
self._set_field_value(field, box_metadata['score'])
|
|
logger.info("Set CREATIVEX Score to: {}".format(box_metadata['score']))
|
|
except Exception as e:
|
|
logger.error("Failed to set CreativeX Score: {}".format(str(e)))
|
|
import traceback
|
|
logger.error(traceback.format_exc())
|
|
break
|
|
|
|
if not score_field_found:
|
|
logger.error("CREATIVEX Score field (FERRERO.TAB.FIELD.CREATIVEX) NOT FOUND in mvp_fields!")
|
|
|
|
if box_metadata.get('url'):
|
|
# Update CreativeX URL field
|
|
logger.info("Updating CreativeX URL from database: {}".format(box_metadata['url']))
|
|
url_field_found = False
|
|
for field in mvp_fields:
|
|
if field.get('id') == 'FERRERO.FIELD.CREATIVEX LINK':
|
|
url_field_found = True
|
|
try:
|
|
# Log field structure before setting
|
|
logger.info("CREATIVEX URL field structure: {}".format(field.get('value', {}).keys() if isinstance(field.get('value'), dict) else 'not a dict'))
|
|
self._set_field_value(field, box_metadata['url'])
|
|
logger.info("Set CREATIVEX LINK to: {}".format(box_metadata['url']))
|
|
except Exception as e:
|
|
logger.error("Failed to set CreativeX URL: {}".format(str(e)))
|
|
import traceback
|
|
logger.error(traceback.format_exc())
|
|
break
|
|
|
|
if not url_field_found:
|
|
logger.error("CREATIVEX URL field (FERRERO.FIELD.CREATIVEX LINK) NOT FOUND in mvp_fields!")
|
|
|
|
return mvp_fields
|