Fix: Recursively walk metadata_element_list when extracting CreativeX

Diagnostic confirmed FERRERO.TAB.FIELD.CREATIVEX (score) lives at depth 2
in B1 master metadata — nested under FERRERO.TABULAR.FIELD.CREATIVEX
inside a category — and FERRERO.FIELD.CREATIVEX LINK lives at depth 1.
The flat top-level walk used previously never reached them, so live B1
runs and the backfill both reported zero CX scores. Updated extractor
in b1_to_b2_download.py and the inline copy in
backfill_b1_creativex_scores.py to descend recursively.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
nickviljoen 2026-04-29 11:53:15 +02:00
parent a463eb42f8
commit 8bf8dc1325
2 changed files with 41 additions and 12 deletions

View file

@ -53,18 +53,32 @@ logging.basicConfig(
logger = logging.getLogger('B1toB2')
def _walk_metadata_elements(elements):
"""Recursively yield every element in nested metadata_element_list arrays.
Categories and tables both nest fields underneath them, so a flat walk
misses anything below the top level."""
for e in elements or []:
if not isinstance(e, dict):
continue
yield e
nested = e.get('metadata_element_list')
if isinstance(nested, list):
for sub in _walk_metadata_elements(nested):
yield sub
def extract_creativex_from_dam_metadata(asset_metadata):
"""
Extract CreativeX score and URL from DAM asset metadata if present.
Mirrors the extractor in a1_to_a2_box_uploader.py so B1 masters surface
the same CX fields.
Walks the metadata_element_list recursively because the score field
(FERRERO.TAB.FIELD.CREATIVEX) is nested at depth 2 under its parent
table FERRERO.TABULAR.FIELD.CREATIVEX, not at the top level.
"""
try:
metadata_elements = asset_metadata.get('metadata', {}).get('metadata_element_list', [])
top = (asset_metadata or {}).get('metadata', {}).get('metadata_element_list', [])
cx = {'score': None, 'url': None}
creativex_data = {'score': None, 'url': None}
for element in metadata_elements:
for element in _walk_metadata_elements(top):
element_id = element.get('id')
if element_id == 'FERRERO.TAB.FIELD.CREATIVEX':
@ -76,7 +90,7 @@ def extract_creativex_from_dam_metadata(asset_metadata):
if isinstance(field_value, dict):
score = field_value.get('value')
if score:
creativex_data['score'] = str(score)
cx['score'] = str(score)
elif element_id == 'FERRERO.FIELD.CREATIVEX LINK':
value_obj = element.get('value', {})
@ -85,9 +99,9 @@ def extract_creativex_from_dam_metadata(asset_metadata):
if isinstance(nested_value, dict):
url = nested_value.get('value')
if url:
creativex_data['url'] = url
cx['url'] = url
return creativex_data
return cx
except Exception as e:
logger.warning("Failed to extract CreativeX from metadata: {}".format(str(e)))

View file

@ -35,14 +35,29 @@ logging.basicConfig(
logger = logging.getLogger('B1CXBackfill')
def _walk_metadata_elements(elements):
"""Recursively yield every element in nested metadata_element_list arrays."""
for e in elements or []:
if not isinstance(e, dict):
continue
yield e
nested = e.get('metadata_element_list')
if isinstance(nested, list):
for sub in _walk_metadata_elements(nested):
yield sub
def extract_creativex_from_dam_metadata(asset_metadata):
"""Mirror of the extractor in b1_to_b2_download.py — duplicated here
to keep the backfill script self-contained (avoids triggering
b1_to_b2_download's module-level logging setup on import)."""
b1_to_b2_download's module-level logging setup on import).
Walks recursively: the score field is at depth 2 (nested inside
FERRERO.TABULAR.FIELD.CREATIVEX, which lives inside a category)."""
try:
elements = (asset_metadata or {}).get('metadata', {}).get('metadata_element_list', [])
top = (asset_metadata or {}).get('metadata', {}).get('metadata_element_list', [])
cx = {'score': None, 'url': None}
for element in elements:
for element in _walk_metadata_elements(top):
element_id = element.get('id')
if element_id == 'FERRERO.TAB.FIELD.CREATIVEX':
values = element.get('values', [])