diff --git a/Python-Version/scripts/b1_to_b2_download.py b/Python-Version/scripts/b1_to_b2_download.py index dde2f6f..d534eb9 100755 --- a/Python-Version/scripts/b1_to_b2_download.py +++ b/Python-Version/scripts/b1_to_b2_download.py @@ -53,18 +53,32 @@ logging.basicConfig( logger = logging.getLogger('B1toB2') +def _walk_metadata_elements(elements): + """Recursively yield every element in nested metadata_element_list arrays. + Categories and tables both nest fields underneath them, so a flat walk + misses anything below the top level.""" + for e in elements or []: + if not isinstance(e, dict): + continue + yield e + nested = e.get('metadata_element_list') + if isinstance(nested, list): + for sub in _walk_metadata_elements(nested): + yield sub + + def extract_creativex_from_dam_metadata(asset_metadata): """ Extract CreativeX score and URL from DAM asset metadata if present. - Mirrors the extractor in a1_to_a2_box_uploader.py so B1 masters surface - the same CX fields. + Walks the metadata_element_list recursively because the score field + (FERRERO.TAB.FIELD.CREATIVEX) is nested at depth 2 under its parent + table FERRERO.TABULAR.FIELD.CREATIVEX, not at the top level. """ try: - metadata_elements = asset_metadata.get('metadata', {}).get('metadata_element_list', []) + top = (asset_metadata or {}).get('metadata', {}).get('metadata_element_list', []) + cx = {'score': None, 'url': None} - creativex_data = {'score': None, 'url': None} - - for element in metadata_elements: + for element in _walk_metadata_elements(top): element_id = element.get('id') if element_id == 'FERRERO.TAB.FIELD.CREATIVEX': @@ -76,7 +90,7 @@ def extract_creativex_from_dam_metadata(asset_metadata): if isinstance(field_value, dict): score = field_value.get('value') if score: - creativex_data['score'] = str(score) + cx['score'] = str(score) elif element_id == 'FERRERO.FIELD.CREATIVEX LINK': value_obj = element.get('value', {}) @@ -85,9 +99,9 @@ def extract_creativex_from_dam_metadata(asset_metadata): if isinstance(nested_value, dict): url = nested_value.get('value') if url: - creativex_data['url'] = url + cx['url'] = url - return creativex_data + return cx except Exception as e: logger.warning("Failed to extract CreativeX from metadata: {}".format(str(e))) diff --git a/Python-Version/scripts/backfill_b1_creativex_scores.py b/Python-Version/scripts/backfill_b1_creativex_scores.py index b32d92c..c775faa 100644 --- a/Python-Version/scripts/backfill_b1_creativex_scores.py +++ b/Python-Version/scripts/backfill_b1_creativex_scores.py @@ -35,14 +35,29 @@ logging.basicConfig( logger = logging.getLogger('B1CXBackfill') +def _walk_metadata_elements(elements): + """Recursively yield every element in nested metadata_element_list arrays.""" + for e in elements or []: + if not isinstance(e, dict): + continue + yield e + nested = e.get('metadata_element_list') + if isinstance(nested, list): + for sub in _walk_metadata_elements(nested): + yield sub + + def extract_creativex_from_dam_metadata(asset_metadata): """Mirror of the extractor in b1_to_b2_download.py — duplicated here to keep the backfill script self-contained (avoids triggering - b1_to_b2_download's module-level logging setup on import).""" + b1_to_b2_download's module-level logging setup on import). + + Walks recursively: the score field is at depth 2 (nested inside + FERRERO.TABULAR.FIELD.CREATIVEX, which lives inside a category).""" try: - elements = (asset_metadata or {}).get('metadata', {}).get('metadata_element_list', []) + top = (asset_metadata or {}).get('metadata', {}).get('metadata_element_list', []) cx = {'score': None, 'url': None} - for element in elements: + for element in _walk_metadata_elements(top): element_id = element.get('id') if element_id == 'FERRERO.TAB.FIELD.CREATIVEX': values = element.get('values', [])