Fix: Recursively walk metadata_element_list when extracting CreativeX
Diagnostic confirmed FERRERO.TAB.FIELD.CREATIVEX (score) lives at depth 2 in B1 master metadata — nested under FERRERO.TABULAR.FIELD.CREATIVEX inside a category — and FERRERO.FIELD.CREATIVEX LINK lives at depth 1. The flat top-level walk used previously never reached them, so live B1 runs and the backfill both reported zero CX scores. Updated extractor in b1_to_b2_download.py and the inline copy in backfill_b1_creativex_scores.py to descend recursively. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a463eb42f8
commit
8bf8dc1325
2 changed files with 41 additions and 12 deletions
|
|
@ -53,18 +53,32 @@ logging.basicConfig(
|
|||
logger = logging.getLogger('B1toB2')
|
||||
|
||||
|
||||
def _walk_metadata_elements(elements):
|
||||
"""Recursively yield every element in nested metadata_element_list arrays.
|
||||
Categories and tables both nest fields underneath them, so a flat walk
|
||||
misses anything below the top level."""
|
||||
for e in elements or []:
|
||||
if not isinstance(e, dict):
|
||||
continue
|
||||
yield e
|
||||
nested = e.get('metadata_element_list')
|
||||
if isinstance(nested, list):
|
||||
for sub in _walk_metadata_elements(nested):
|
||||
yield sub
|
||||
|
||||
|
||||
def extract_creativex_from_dam_metadata(asset_metadata):
|
||||
"""
|
||||
Extract CreativeX score and URL from DAM asset metadata if present.
|
||||
Mirrors the extractor in a1_to_a2_box_uploader.py so B1 masters surface
|
||||
the same CX fields.
|
||||
Walks the metadata_element_list recursively because the score field
|
||||
(FERRERO.TAB.FIELD.CREATIVEX) is nested at depth 2 under its parent
|
||||
table FERRERO.TABULAR.FIELD.CREATIVEX, not at the top level.
|
||||
"""
|
||||
try:
|
||||
metadata_elements = asset_metadata.get('metadata', {}).get('metadata_element_list', [])
|
||||
top = (asset_metadata or {}).get('metadata', {}).get('metadata_element_list', [])
|
||||
cx = {'score': None, 'url': None}
|
||||
|
||||
creativex_data = {'score': None, 'url': None}
|
||||
|
||||
for element in metadata_elements:
|
||||
for element in _walk_metadata_elements(top):
|
||||
element_id = element.get('id')
|
||||
|
||||
if element_id == 'FERRERO.TAB.FIELD.CREATIVEX':
|
||||
|
|
@ -76,7 +90,7 @@ def extract_creativex_from_dam_metadata(asset_metadata):
|
|||
if isinstance(field_value, dict):
|
||||
score = field_value.get('value')
|
||||
if score:
|
||||
creativex_data['score'] = str(score)
|
||||
cx['score'] = str(score)
|
||||
|
||||
elif element_id == 'FERRERO.FIELD.CREATIVEX LINK':
|
||||
value_obj = element.get('value', {})
|
||||
|
|
@ -85,9 +99,9 @@ def extract_creativex_from_dam_metadata(asset_metadata):
|
|||
if isinstance(nested_value, dict):
|
||||
url = nested_value.get('value')
|
||||
if url:
|
||||
creativex_data['url'] = url
|
||||
cx['url'] = url
|
||||
|
||||
return creativex_data
|
||||
return cx
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to extract CreativeX from metadata: {}".format(str(e)))
|
||||
|
|
|
|||
|
|
@ -35,14 +35,29 @@ logging.basicConfig(
|
|||
logger = logging.getLogger('B1CXBackfill')
|
||||
|
||||
|
||||
def _walk_metadata_elements(elements):
|
||||
"""Recursively yield every element in nested metadata_element_list arrays."""
|
||||
for e in elements or []:
|
||||
if not isinstance(e, dict):
|
||||
continue
|
||||
yield e
|
||||
nested = e.get('metadata_element_list')
|
||||
if isinstance(nested, list):
|
||||
for sub in _walk_metadata_elements(nested):
|
||||
yield sub
|
||||
|
||||
|
||||
def extract_creativex_from_dam_metadata(asset_metadata):
|
||||
"""Mirror of the extractor in b1_to_b2_download.py — duplicated here
|
||||
to keep the backfill script self-contained (avoids triggering
|
||||
b1_to_b2_download's module-level logging setup on import)."""
|
||||
b1_to_b2_download's module-level logging setup on import).
|
||||
|
||||
Walks recursively: the score field is at depth 2 (nested inside
|
||||
FERRERO.TABULAR.FIELD.CREATIVEX, which lives inside a category)."""
|
||||
try:
|
||||
elements = (asset_metadata or {}).get('metadata', {}).get('metadata_element_list', [])
|
||||
top = (asset_metadata or {}).get('metadata', {}).get('metadata_element_list', [])
|
||||
cx = {'score': None, 'url': None}
|
||||
for element in elements:
|
||||
for element in _walk_metadata_elements(top):
|
||||
element_id = element.get('id')
|
||||
if element_id == 'FERRERO.TAB.FIELD.CREATIVEX':
|
||||
values = element.get('values', [])
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue