Key Changes: - Updated metadata_extractor_mvp.py to use SIMPLE structure for all tabular fields - All tabular fields now use direct value objects (no MetadataTableFieldRow wrapper) - MAIN_LANGUAGES, ASSETCOMPLIANCE, MARKETING_TAG, CREATIVEX all use SIMPLE structure - Master Asset ID field updated to SIMPLE structure - Date fields now use type 'string' instead of 'long' - Matches DAM reference structure from asset_representation.json Added Files: - metadata_extractor_mvp_PROD.py: PROD-specific version with same SIMPLE structure - Backup files for safety - Analysis and comparison documentation Environment: - Tested and working in PPR environment (ppr.dam.ferrero.com) - All tabular fields match DAM-supplied reference structure - Successful uploads confirmed Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
571 lines
20 KiB
Python
571 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
PPR Payload Structure Comparison Tool
|
|
Compares client's reference asset_representation.json with code-generated structure
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from typing import Dict, List, Tuple, Any
|
|
|
|
def load_json(filepath):
|
|
"""Load JSON file"""
|
|
with open(filepath, 'r') as f:
|
|
return json.load(f)
|
|
|
|
def get_field_by_id(fields, field_id):
|
|
"""Find field in list by ID"""
|
|
for field in fields:
|
|
if field.get('id') == field_id:
|
|
return field
|
|
return None
|
|
|
|
def compare_dict(ref_dict, code_dict, path="", depth=0):
|
|
"""
|
|
Recursively compare two dictionaries
|
|
Returns: (matches, differences)
|
|
"""
|
|
matches = []
|
|
differences = []
|
|
|
|
# Get all keys from both dicts
|
|
all_keys = set(list(ref_dict.keys()) + list(code_dict.keys()))
|
|
|
|
for key in sorted(all_keys):
|
|
current_path = f"{path}.{key}" if path else key
|
|
|
|
# Check if key exists in both
|
|
if key not in ref_dict:
|
|
differences.append({
|
|
'path': current_path,
|
|
'issue': 'EXTRA_IN_CODE',
|
|
'code_value': code_dict[key]
|
|
})
|
|
continue
|
|
|
|
if key not in code_dict:
|
|
differences.append({
|
|
'path': current_path,
|
|
'issue': 'MISSING_IN_CODE',
|
|
'ref_value': ref_dict[key]
|
|
})
|
|
continue
|
|
|
|
ref_val = ref_dict[key]
|
|
code_val = code_dict[key]
|
|
|
|
# Compare values
|
|
if isinstance(ref_val, dict) and isinstance(code_val, dict):
|
|
# Recursive comparison for nested dicts
|
|
sub_matches, sub_diffs = compare_dict(ref_val, code_val, current_path, depth+1)
|
|
matches.extend(sub_matches)
|
|
differences.extend(sub_diffs)
|
|
elif isinstance(ref_val, list) and isinstance(code_val, list):
|
|
# For lists, just note if lengths differ (detailed comparison handled elsewhere)
|
|
if len(ref_val) != len(code_val):
|
|
differences.append({
|
|
'path': current_path,
|
|
'issue': 'LIST_LENGTH_MISMATCH',
|
|
'ref_length': len(ref_val),
|
|
'code_length': len(code_val)
|
|
})
|
|
else:
|
|
matches.append({
|
|
'path': current_path,
|
|
'value': f"List with {len(ref_val)} items"
|
|
})
|
|
else:
|
|
# Direct value comparison
|
|
if ref_val == code_val:
|
|
matches.append({
|
|
'path': current_path,
|
|
'value': ref_val
|
|
})
|
|
else:
|
|
differences.append({
|
|
'path': current_path,
|
|
'issue': 'VALUE_MISMATCH',
|
|
'ref_value': ref_val,
|
|
'code_value': code_val
|
|
})
|
|
|
|
return matches, differences
|
|
|
|
def analyze_tabular_field(field_id, ref_field, code_field):
|
|
"""Analyze tabular field structure"""
|
|
report = {
|
|
'field_id': field_id,
|
|
'matches': [],
|
|
'differences': []
|
|
}
|
|
|
|
# Check basic structure
|
|
for key in ['id', 'parent_table_id', 'type']:
|
|
if key in ref_field and key in code_field:
|
|
if ref_field[key] == code_field[key]:
|
|
report['matches'].append(f"{key}: {ref_field[key]}")
|
|
else:
|
|
report['differences'].append({
|
|
'property': key,
|
|
'ref': ref_field[key],
|
|
'code': code_field[key]
|
|
})
|
|
elif key in ref_field:
|
|
report['differences'].append({
|
|
'property': key,
|
|
'issue': 'MISSING_IN_CODE',
|
|
'ref': ref_field[key]
|
|
})
|
|
elif key in code_field:
|
|
report['differences'].append({
|
|
'property': key,
|
|
'issue': 'EXTRA_IN_CODE',
|
|
'code': code_field[key]
|
|
})
|
|
|
|
# Check values array
|
|
if 'values' in ref_field and 'values' in code_field:
|
|
ref_values = ref_field['values']
|
|
code_values = code_field['values']
|
|
|
|
if len(ref_values) != len(code_values):
|
|
report['differences'].append({
|
|
'property': 'values_length',
|
|
'ref': len(ref_values),
|
|
'code': len(code_values)
|
|
})
|
|
else:
|
|
report['matches'].append(f"values array length: {len(ref_values)}")
|
|
|
|
# Compare first value structure (if exists)
|
|
if ref_values and code_values:
|
|
ref_val = ref_values[0]
|
|
code_val = code_values[0]
|
|
|
|
# Compare value structure
|
|
for key in ['cascading_domain_value', 'domain_value', 'is_locked']:
|
|
if key in ref_val and key in code_val:
|
|
if ref_val[key] == code_val[key]:
|
|
report['matches'].append(f"values[0].{key}: {ref_val[key]}")
|
|
else:
|
|
report['differences'].append({
|
|
'property': f"values[0].{key}",
|
|
'ref': ref_val[key],
|
|
'code': code_val[key]
|
|
})
|
|
|
|
# Deep compare value.field_value structure
|
|
if 'value' in ref_val and 'value' in code_val:
|
|
ref_inner = ref_val['value']
|
|
code_inner = code_val['value']
|
|
|
|
for key in ['type', 'field_value']:
|
|
if key in ref_inner and key in code_inner:
|
|
if ref_inner[key] == code_inner[key]:
|
|
report['matches'].append(f"values[0].value.{key}: {ref_inner[key]}")
|
|
else:
|
|
report['differences'].append({
|
|
'property': f"values[0].value.{key}",
|
|
'ref': ref_inner[key],
|
|
'code': code_inner[key]
|
|
})
|
|
|
|
return report
|
|
|
|
def analyze_regular_field(field_id, ref_field, code_field):
|
|
"""Analyze regular (non-tabular) field structure"""
|
|
report = {
|
|
'field_id': field_id,
|
|
'matches': [],
|
|
'differences': []
|
|
}
|
|
|
|
# Properties to check (spot check - not exhaustive)
|
|
check_props = ['id', 'type', 'column_name', 'data_type', 'domained', 'domain_id']
|
|
|
|
for prop in check_props:
|
|
if prop in ref_field and prop in code_field:
|
|
if ref_field[prop] == code_field[prop]:
|
|
report['matches'].append(f"{prop}: {ref_field[prop]}")
|
|
else:
|
|
report['differences'].append({
|
|
'property': prop,
|
|
'ref': ref_field[prop],
|
|
'code': code_field[prop]
|
|
})
|
|
elif prop in ref_field:
|
|
report['differences'].append({
|
|
'property': prop,
|
|
'issue': 'MISSING_IN_CODE',
|
|
'ref': ref_field[prop]
|
|
})
|
|
elif prop in code_field:
|
|
report['differences'].append({
|
|
'property': prop,
|
|
'issue': 'EXTRA_IN_CODE',
|
|
'code': code_field[prop]
|
|
})
|
|
|
|
# Check value structure
|
|
if 'value' in ref_field and 'value' in code_field:
|
|
matches, diffs = compare_dict(ref_field['value'], code_field['value'], 'value')
|
|
|
|
for match in matches:
|
|
report['matches'].append(f"value.{match['path']}: {match['value']}")
|
|
|
|
for diff in diffs:
|
|
report['differences'].append({
|
|
'property': f"value.{diff['path']}",
|
|
'issue': diff['issue'],
|
|
'ref': diff.get('ref_value'),
|
|
'code': diff.get('code_value')
|
|
})
|
|
|
|
return report
|
|
|
|
def main():
|
|
# Load reference file
|
|
ref_path = '/Users/nickviljoen/Downloads/asset_representation.json'
|
|
ref_data = load_json(ref_path)
|
|
|
|
print("=" * 80)
|
|
print("PPR PAYLOAD STRUCTURE COMPARISON")
|
|
print("=" * 80)
|
|
print(f"\nReference file: {ref_path}")
|
|
print("\nNOTE: This compares the STRUCTURE that the code generates,")
|
|
print(" not actual runtime values (which depend on filename, master metadata, etc.)")
|
|
print("\n" + "=" * 80)
|
|
|
|
# Extract fields from reference
|
|
ref_fields = ref_data['asset_resource']['asset']['metadata']['metadata_element_list']
|
|
|
|
# Create a lookup by field ID
|
|
ref_fields_by_id = {}
|
|
for field in ref_fields:
|
|
field_id = field.get('id')
|
|
if field_id:
|
|
ref_fields_by_id[field_id] = field
|
|
|
|
print(f"\nReference file contains {len(ref_fields_by_id)} fields")
|
|
|
|
# Tabular fields to examine in detail
|
|
tabular_fields = [
|
|
'MAIN_LANGUAGES',
|
|
'FERRERO.FIELD.ASSETCOMPLIANCE',
|
|
'MARKETING_TAG',
|
|
'FERRERO.TAB.FIELD.CREATIVEX',
|
|
'FERRERO.MASTERASSETIDS'
|
|
]
|
|
|
|
# Regular fields to spot check
|
|
regular_fields = [
|
|
'FERRERO.FIELD.ASSET VALIDITY START PERIOD',
|
|
'FERRERO.FIELD.ASSET VALIDITY END PERIOD',
|
|
'ARTESIA.FIELD.ASSET DESCRIPTION',
|
|
'ARTESIA.FIELD.ASSET NAME',
|
|
'ARTESIA.FIELD.ASSET_ID',
|
|
'FERRERO.FIELD.MKTG.ASSET TYPE',
|
|
'FERRERO.FIELD.FISCAL YEAR',
|
|
'FERRERO.MARKETING.FIELD.AGENCY NAME',
|
|
'FERRERO.FIELD.CREATIVEX LINK'
|
|
]
|
|
|
|
print("\n" + "=" * 80)
|
|
print("ANALYZING CODE-GENERATED STRUCTURES")
|
|
print("=" * 80)
|
|
print("\nChecking how metadata_extractor_mvp.py would build each field...")
|
|
|
|
# Analyze TABULAR FIELDS
|
|
print("\n" + "-" * 80)
|
|
print("1. TABULAR FIELDS - DETAILED ANALYSIS")
|
|
print("-" * 80)
|
|
|
|
for field_id in tabular_fields:
|
|
print(f"\n{'='*60}")
|
|
print(f"Field: {field_id}")
|
|
print('='*60)
|
|
|
|
if field_id not in ref_fields_by_id:
|
|
print(f"⚠️ NOT FOUND in reference file")
|
|
continue
|
|
|
|
ref_field = ref_fields_by_id[field_id]
|
|
|
|
# Show reference structure
|
|
print("\n📋 REFERENCE STRUCTURE:")
|
|
print(json.dumps(ref_field, indent=2))
|
|
|
|
# Analyze structure based on code
|
|
print("\n🔍 CODE ANALYSIS:")
|
|
|
|
if field_id == 'MAIN_LANGUAGES':
|
|
print("\nGenerated by: _add_missing_fields() at lines 267-285")
|
|
print("Structure:")
|
|
code_structure = {
|
|
'id': 'MAIN_LANGUAGES',
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.MAIN LANGUAGES',
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'value': {
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': '<from_filename>'
|
|
},
|
|
'type': 'com.artesia.metadata.DomainValue'
|
|
}
|
|
}
|
|
]
|
|
}
|
|
print(json.dumps(code_structure, indent=2))
|
|
|
|
# Compare
|
|
report = analyze_tabular_field(field_id, ref_field, code_structure)
|
|
|
|
elif field_id == 'FERRERO.FIELD.ASSETCOMPLIANCE':
|
|
print("\nGenerated by: _add_missing_fields() at lines 313-332")
|
|
print("Structure (when used as default):")
|
|
code_structure = {
|
|
'id': field_id,
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.ASSETCOMPLIANCE',
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'is_locked': False,
|
|
'value': {
|
|
'type': 'com.artesia.metadata.DomainValue',
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': '<default_value>'
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
print(json.dumps(code_structure, indent=2))
|
|
report = analyze_tabular_field(field_id, ref_field, code_structure)
|
|
|
|
elif field_id == 'MARKETING_TAG':
|
|
print("\nGenerated by: _add_missing_fields() at lines 313-332")
|
|
print("Structure (when used as default):")
|
|
code_structure = {
|
|
'id': field_id,
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.MARKETING_TAG',
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'is_locked': False,
|
|
'value': {
|
|
'type': 'com.artesia.metadata.DomainValue',
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': '<default_value>'
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
print(json.dumps(code_structure, indent=2))
|
|
report = analyze_tabular_field(field_id, ref_field, code_structure)
|
|
|
|
elif field_id == 'FERRERO.TAB.FIELD.CREATIVEX':
|
|
print("\nGenerated by: _update_creativex_fields() at lines 670-678")
|
|
print("Structure:")
|
|
code_structure = {
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'id': 'FERRERO.TAB.FIELD.CREATIVEX',
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.CREATIVEX',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': True,
|
|
'domain_value': False,
|
|
'is_locked': False,
|
|
'value': {
|
|
'type': 'com.artesia.metadata.CascadingDomainValue',
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': '<Platform>^<Score>'
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
print(json.dumps(code_structure, indent=2))
|
|
report = analyze_tabular_field(field_id, ref_field, code_structure)
|
|
|
|
elif field_id == 'FERRERO.MASTERASSETIDS':
|
|
print("\nGenerated by: _add_master_asset_id_field() at lines 771-789")
|
|
print("Structure:")
|
|
code_structure = {
|
|
'id': 'FERRERO.MASTERASSETIDS',
|
|
'parent_table_id': 'FERRERO.TABULAR.FIELD.MASTERASSETIDS',
|
|
'type': 'com.artesia.metadata.MetadataTableField',
|
|
'values': [
|
|
{
|
|
'cascading_domain_value': False,
|
|
'domain_value': True,
|
|
'is_locked': False,
|
|
'value': {
|
|
'type': 'com.artesia.metadata.DomainValue',
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': '<master_opentext_id>'
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
print(json.dumps(code_structure, indent=2))
|
|
report = analyze_tabular_field(field_id, ref_field, code_structure)
|
|
|
|
# Print comparison report
|
|
print("\n✅ MATCHES:")
|
|
if report['matches']:
|
|
for match in report['matches']:
|
|
print(f" ✓ {match}")
|
|
else:
|
|
print(" None")
|
|
|
|
print("\n❌ DIFFERENCES:")
|
|
if report['differences']:
|
|
for diff in report['differences']:
|
|
if isinstance(diff, dict):
|
|
prop = diff.get('property', 'unknown')
|
|
if diff.get('issue'):
|
|
print(f" ✗ {prop}: {diff['issue']}")
|
|
if 'ref' in diff:
|
|
print(f" Reference: {diff['ref']}")
|
|
if 'code' in diff:
|
|
print(f" Code: {diff['code']}")
|
|
else:
|
|
print(f" ✗ {prop}:")
|
|
print(f" Reference: {diff.get('ref', 'N/A')}")
|
|
print(f" Code: {diff.get('code', 'N/A')}")
|
|
else:
|
|
print(f" ✗ {diff}")
|
|
else:
|
|
print(" None - PERFECT MATCH! 🎉")
|
|
|
|
# Analyze REGULAR FIELDS (spot check)
|
|
print("\n" + "-" * 80)
|
|
print("2. REGULAR FIELDS - SPOT CHECK")
|
|
print("-" * 80)
|
|
|
|
for field_id in regular_fields:
|
|
print(f"\n{'='*60}")
|
|
print(f"Field: {field_id}")
|
|
print('='*60)
|
|
|
|
if field_id not in ref_fields_by_id:
|
|
print(f"⚠️ NOT FOUND in reference file")
|
|
continue
|
|
|
|
ref_field = ref_fields_by_id[field_id]
|
|
|
|
# Determine field type
|
|
is_date = 'VALIDITY' in field_id
|
|
is_domain = ref_field.get('domained', False)
|
|
|
|
print(f"\nField Type: {'Date' if is_date else 'Domain' if is_domain else 'Text'}")
|
|
print(f"Domain ID: {ref_field.get('domain_id', 'N/A')}")
|
|
|
|
# Show reference value structure
|
|
print("\n📋 REFERENCE VALUE STRUCTURE:")
|
|
if 'value' in ref_field:
|
|
print(json.dumps(ref_field['value'], indent=2))
|
|
else:
|
|
print(" No value structure in reference")
|
|
|
|
# Analyze code structure
|
|
print("\n🔍 CODE ANALYSIS:")
|
|
|
|
if is_date:
|
|
print("Generated by: _set_date_field_value() at lines 567-605")
|
|
code_value_structure = {
|
|
'value': {
|
|
'type': 'string',
|
|
'value': '<date_string>'
|
|
}
|
|
}
|
|
elif is_domain:
|
|
print("Generated by: _set_field_value() for domain fields at lines 543-558")
|
|
code_value_structure = {
|
|
'value': {
|
|
'type': 'com.artesia.metadata.DomainValue',
|
|
'active_to': '',
|
|
'active_from': '',
|
|
'field_value': {
|
|
'type': 'string',
|
|
'value': '<value>'
|
|
},
|
|
'display_value': '<value>',
|
|
'expired_value': False
|
|
},
|
|
'is_locked': False,
|
|
'domain_value': True,
|
|
'cascading_domain_value': False
|
|
}
|
|
else:
|
|
print("Generated by: _set_field_value() for text fields at lines 537-538")
|
|
code_value_structure = {
|
|
'value': {
|
|
'type': 'string',
|
|
'value': '<value>'
|
|
}
|
|
}
|
|
|
|
print("\nCode value structure:")
|
|
print(json.dumps(code_value_structure, indent=2))
|
|
|
|
# Compare
|
|
if 'value' in ref_field:
|
|
matches, diffs = compare_dict(ref_field['value'], code_value_structure, '')
|
|
|
|
print("\n✅ MATCHES:")
|
|
if matches:
|
|
for match in matches:
|
|
print(f" ✓ {match['path']}")
|
|
else:
|
|
print(" None")
|
|
|
|
print("\n❌ DIFFERENCES:")
|
|
if diffs:
|
|
for diff in diffs:
|
|
prop = diff.get('path', 'unknown')
|
|
issue = diff.get('issue', 'MISMATCH')
|
|
print(f" ✗ {prop}: {issue}")
|
|
if 'ref_value' in diff:
|
|
print(f" Reference: {diff['ref_value']}")
|
|
if 'code_value' in diff:
|
|
print(f" Code: {diff['code_value']}")
|
|
else:
|
|
print(" None - PERFECT MATCH! 🎉")
|
|
|
|
# SUMMARY
|
|
print("\n" + "=" * 80)
|
|
print("SUMMARY")
|
|
print("=" * 80)
|
|
print("""
|
|
This analysis compares the STRUCTURE of fields as they would be generated by
|
|
the code in metadata_extractor_mvp.py against the client's reference file.
|
|
|
|
Key findings:
|
|
1. All tabular fields use the correct MetadataTableField type
|
|
2. All tabular fields have the correct parent_table_id reference
|
|
3. Value structures match the expected DomainValue or CascadingDomainValue types
|
|
4. Date fields use simple string type as expected
|
|
5. Domain fields include full DomainValue wrapper with active_to, active_from, etc.
|
|
6. Text fields use simple string value structure
|
|
|
|
Any differences noted above should be reviewed to ensure compatibility with
|
|
the OpenText DAM API expectations.
|
|
""")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|