ferrero-opentext/Python-Version/compare_ppr_structure.py
nickviljoen f83b4fae3e PPR Environment: Use SIMPLE metadata structure for tabular fields
Key Changes:
- Updated metadata_extractor_mvp.py to use SIMPLE structure for all tabular fields
- All tabular fields now use direct value objects (no MetadataTableFieldRow wrapper)
- MAIN_LANGUAGES, ASSETCOMPLIANCE, MARKETING_TAG, CREATIVEX all use SIMPLE structure
- Master Asset ID field updated to SIMPLE structure
- Date fields now use type 'string' instead of 'long'
- Matches DAM reference structure from asset_representation.json

Added Files:
- metadata_extractor_mvp_PROD.py: PROD-specific version with same SIMPLE structure
- Backup files for safety
- Analysis and comparison documentation

Environment:
- Tested and working in PPR environment (ppr.dam.ferrero.com)
- All tabular fields match DAM-supplied reference structure
- Successful uploads confirmed

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-23 16:52:50 +02:00

571 lines
20 KiB
Python

#!/usr/bin/env python3
"""
PPR Payload Structure Comparison Tool
Compares client's reference asset_representation.json with code-generated structure
"""
import json
import sys
from typing import Dict, List, Tuple, Any
def load_json(filepath):
"""Load JSON file"""
with open(filepath, 'r') as f:
return json.load(f)
def get_field_by_id(fields, field_id):
"""Find field in list by ID"""
for field in fields:
if field.get('id') == field_id:
return field
return None
def compare_dict(ref_dict, code_dict, path="", depth=0):
"""
Recursively compare two dictionaries
Returns: (matches, differences)
"""
matches = []
differences = []
# Get all keys from both dicts
all_keys = set(list(ref_dict.keys()) + list(code_dict.keys()))
for key in sorted(all_keys):
current_path = f"{path}.{key}" if path else key
# Check if key exists in both
if key not in ref_dict:
differences.append({
'path': current_path,
'issue': 'EXTRA_IN_CODE',
'code_value': code_dict[key]
})
continue
if key not in code_dict:
differences.append({
'path': current_path,
'issue': 'MISSING_IN_CODE',
'ref_value': ref_dict[key]
})
continue
ref_val = ref_dict[key]
code_val = code_dict[key]
# Compare values
if isinstance(ref_val, dict) and isinstance(code_val, dict):
# Recursive comparison for nested dicts
sub_matches, sub_diffs = compare_dict(ref_val, code_val, current_path, depth+1)
matches.extend(sub_matches)
differences.extend(sub_diffs)
elif isinstance(ref_val, list) and isinstance(code_val, list):
# For lists, just note if lengths differ (detailed comparison handled elsewhere)
if len(ref_val) != len(code_val):
differences.append({
'path': current_path,
'issue': 'LIST_LENGTH_MISMATCH',
'ref_length': len(ref_val),
'code_length': len(code_val)
})
else:
matches.append({
'path': current_path,
'value': f"List with {len(ref_val)} items"
})
else:
# Direct value comparison
if ref_val == code_val:
matches.append({
'path': current_path,
'value': ref_val
})
else:
differences.append({
'path': current_path,
'issue': 'VALUE_MISMATCH',
'ref_value': ref_val,
'code_value': code_val
})
return matches, differences
def analyze_tabular_field(field_id, ref_field, code_field):
"""Analyze tabular field structure"""
report = {
'field_id': field_id,
'matches': [],
'differences': []
}
# Check basic structure
for key in ['id', 'parent_table_id', 'type']:
if key in ref_field and key in code_field:
if ref_field[key] == code_field[key]:
report['matches'].append(f"{key}: {ref_field[key]}")
else:
report['differences'].append({
'property': key,
'ref': ref_field[key],
'code': code_field[key]
})
elif key in ref_field:
report['differences'].append({
'property': key,
'issue': 'MISSING_IN_CODE',
'ref': ref_field[key]
})
elif key in code_field:
report['differences'].append({
'property': key,
'issue': 'EXTRA_IN_CODE',
'code': code_field[key]
})
# Check values array
if 'values' in ref_field and 'values' in code_field:
ref_values = ref_field['values']
code_values = code_field['values']
if len(ref_values) != len(code_values):
report['differences'].append({
'property': 'values_length',
'ref': len(ref_values),
'code': len(code_values)
})
else:
report['matches'].append(f"values array length: {len(ref_values)}")
# Compare first value structure (if exists)
if ref_values and code_values:
ref_val = ref_values[0]
code_val = code_values[0]
# Compare value structure
for key in ['cascading_domain_value', 'domain_value', 'is_locked']:
if key in ref_val and key in code_val:
if ref_val[key] == code_val[key]:
report['matches'].append(f"values[0].{key}: {ref_val[key]}")
else:
report['differences'].append({
'property': f"values[0].{key}",
'ref': ref_val[key],
'code': code_val[key]
})
# Deep compare value.field_value structure
if 'value' in ref_val and 'value' in code_val:
ref_inner = ref_val['value']
code_inner = code_val['value']
for key in ['type', 'field_value']:
if key in ref_inner and key in code_inner:
if ref_inner[key] == code_inner[key]:
report['matches'].append(f"values[0].value.{key}: {ref_inner[key]}")
else:
report['differences'].append({
'property': f"values[0].value.{key}",
'ref': ref_inner[key],
'code': code_inner[key]
})
return report
def analyze_regular_field(field_id, ref_field, code_field):
"""Analyze regular (non-tabular) field structure"""
report = {
'field_id': field_id,
'matches': [],
'differences': []
}
# Properties to check (spot check - not exhaustive)
check_props = ['id', 'type', 'column_name', 'data_type', 'domained', 'domain_id']
for prop in check_props:
if prop in ref_field and prop in code_field:
if ref_field[prop] == code_field[prop]:
report['matches'].append(f"{prop}: {ref_field[prop]}")
else:
report['differences'].append({
'property': prop,
'ref': ref_field[prop],
'code': code_field[prop]
})
elif prop in ref_field:
report['differences'].append({
'property': prop,
'issue': 'MISSING_IN_CODE',
'ref': ref_field[prop]
})
elif prop in code_field:
report['differences'].append({
'property': prop,
'issue': 'EXTRA_IN_CODE',
'code': code_field[prop]
})
# Check value structure
if 'value' in ref_field and 'value' in code_field:
matches, diffs = compare_dict(ref_field['value'], code_field['value'], 'value')
for match in matches:
report['matches'].append(f"value.{match['path']}: {match['value']}")
for diff in diffs:
report['differences'].append({
'property': f"value.{diff['path']}",
'issue': diff['issue'],
'ref': diff.get('ref_value'),
'code': diff.get('code_value')
})
return report
def main():
# Load reference file
ref_path = '/Users/nickviljoen/Downloads/asset_representation.json'
ref_data = load_json(ref_path)
print("=" * 80)
print("PPR PAYLOAD STRUCTURE COMPARISON")
print("=" * 80)
print(f"\nReference file: {ref_path}")
print("\nNOTE: This compares the STRUCTURE that the code generates,")
print(" not actual runtime values (which depend on filename, master metadata, etc.)")
print("\n" + "=" * 80)
# Extract fields from reference
ref_fields = ref_data['asset_resource']['asset']['metadata']['metadata_element_list']
# Create a lookup by field ID
ref_fields_by_id = {}
for field in ref_fields:
field_id = field.get('id')
if field_id:
ref_fields_by_id[field_id] = field
print(f"\nReference file contains {len(ref_fields_by_id)} fields")
# Tabular fields to examine in detail
tabular_fields = [
'MAIN_LANGUAGES',
'FERRERO.FIELD.ASSETCOMPLIANCE',
'MARKETING_TAG',
'FERRERO.TAB.FIELD.CREATIVEX',
'FERRERO.MASTERASSETIDS'
]
# Regular fields to spot check
regular_fields = [
'FERRERO.FIELD.ASSET VALIDITY START PERIOD',
'FERRERO.FIELD.ASSET VALIDITY END PERIOD',
'ARTESIA.FIELD.ASSET DESCRIPTION',
'ARTESIA.FIELD.ASSET NAME',
'ARTESIA.FIELD.ASSET_ID',
'FERRERO.FIELD.MKTG.ASSET TYPE',
'FERRERO.FIELD.FISCAL YEAR',
'FERRERO.MARKETING.FIELD.AGENCY NAME',
'FERRERO.FIELD.CREATIVEX LINK'
]
print("\n" + "=" * 80)
print("ANALYZING CODE-GENERATED STRUCTURES")
print("=" * 80)
print("\nChecking how metadata_extractor_mvp.py would build each field...")
# Analyze TABULAR FIELDS
print("\n" + "-" * 80)
print("1. TABULAR FIELDS - DETAILED ANALYSIS")
print("-" * 80)
for field_id in tabular_fields:
print(f"\n{'='*60}")
print(f"Field: {field_id}")
print('='*60)
if field_id not in ref_fields_by_id:
print(f"⚠️ NOT FOUND in reference file")
continue
ref_field = ref_fields_by_id[field_id]
# Show reference structure
print("\n📋 REFERENCE STRUCTURE:")
print(json.dumps(ref_field, indent=2))
# Analyze structure based on code
print("\n🔍 CODE ANALYSIS:")
if field_id == 'MAIN_LANGUAGES':
print("\nGenerated by: _add_missing_fields() at lines 267-285")
print("Structure:")
code_structure = {
'id': 'MAIN_LANGUAGES',
'parent_table_id': 'FERRERO.TABULAR.FIELD.MAIN LANGUAGES',
'type': 'com.artesia.metadata.MetadataTableField',
'values': [
{
'cascading_domain_value': False,
'domain_value': True,
'value': {
'field_value': {
'type': 'string',
'value': '<from_filename>'
},
'type': 'com.artesia.metadata.DomainValue'
}
}
]
}
print(json.dumps(code_structure, indent=2))
# Compare
report = analyze_tabular_field(field_id, ref_field, code_structure)
elif field_id == 'FERRERO.FIELD.ASSETCOMPLIANCE':
print("\nGenerated by: _add_missing_fields() at lines 313-332")
print("Structure (when used as default):")
code_structure = {
'id': field_id,
'parent_table_id': 'FERRERO.TABULAR.FIELD.ASSETCOMPLIANCE',
'type': 'com.artesia.metadata.MetadataTableField',
'values': [
{
'cascading_domain_value': False,
'domain_value': True,
'is_locked': False,
'value': {
'type': 'com.artesia.metadata.DomainValue',
'field_value': {
'type': 'string',
'value': '<default_value>'
}
}
}
]
}
print(json.dumps(code_structure, indent=2))
report = analyze_tabular_field(field_id, ref_field, code_structure)
elif field_id == 'MARKETING_TAG':
print("\nGenerated by: _add_missing_fields() at lines 313-332")
print("Structure (when used as default):")
code_structure = {
'id': field_id,
'parent_table_id': 'FERRERO.TABULAR.FIELD.MARKETING_TAG',
'type': 'com.artesia.metadata.MetadataTableField',
'values': [
{
'cascading_domain_value': False,
'domain_value': True,
'is_locked': False,
'value': {
'type': 'com.artesia.metadata.DomainValue',
'field_value': {
'type': 'string',
'value': '<default_value>'
}
}
}
]
}
print(json.dumps(code_structure, indent=2))
report = analyze_tabular_field(field_id, ref_field, code_structure)
elif field_id == 'FERRERO.TAB.FIELD.CREATIVEX':
print("\nGenerated by: _update_creativex_fields() at lines 670-678")
print("Structure:")
code_structure = {
'type': 'com.artesia.metadata.MetadataTableField',
'id': 'FERRERO.TAB.FIELD.CREATIVEX',
'parent_table_id': 'FERRERO.TABULAR.FIELD.CREATIVEX',
'values': [
{
'cascading_domain_value': True,
'domain_value': False,
'is_locked': False,
'value': {
'type': 'com.artesia.metadata.CascadingDomainValue',
'field_value': {
'type': 'string',
'value': '<Platform>^<Score>'
}
}
}
]
}
print(json.dumps(code_structure, indent=2))
report = analyze_tabular_field(field_id, ref_field, code_structure)
elif field_id == 'FERRERO.MASTERASSETIDS':
print("\nGenerated by: _add_master_asset_id_field() at lines 771-789")
print("Structure:")
code_structure = {
'id': 'FERRERO.MASTERASSETIDS',
'parent_table_id': 'FERRERO.TABULAR.FIELD.MASTERASSETIDS',
'type': 'com.artesia.metadata.MetadataTableField',
'values': [
{
'cascading_domain_value': False,
'domain_value': True,
'is_locked': False,
'value': {
'type': 'com.artesia.metadata.DomainValue',
'field_value': {
'type': 'string',
'value': '<master_opentext_id>'
}
}
}
]
}
print(json.dumps(code_structure, indent=2))
report = analyze_tabular_field(field_id, ref_field, code_structure)
# Print comparison report
print("\n✅ MATCHES:")
if report['matches']:
for match in report['matches']:
print(f"{match}")
else:
print(" None")
print("\n❌ DIFFERENCES:")
if report['differences']:
for diff in report['differences']:
if isinstance(diff, dict):
prop = diff.get('property', 'unknown')
if diff.get('issue'):
print(f"{prop}: {diff['issue']}")
if 'ref' in diff:
print(f" Reference: {diff['ref']}")
if 'code' in diff:
print(f" Code: {diff['code']}")
else:
print(f"{prop}:")
print(f" Reference: {diff.get('ref', 'N/A')}")
print(f" Code: {diff.get('code', 'N/A')}")
else:
print(f"{diff}")
else:
print(" None - PERFECT MATCH! 🎉")
# Analyze REGULAR FIELDS (spot check)
print("\n" + "-" * 80)
print("2. REGULAR FIELDS - SPOT CHECK")
print("-" * 80)
for field_id in regular_fields:
print(f"\n{'='*60}")
print(f"Field: {field_id}")
print('='*60)
if field_id not in ref_fields_by_id:
print(f"⚠️ NOT FOUND in reference file")
continue
ref_field = ref_fields_by_id[field_id]
# Determine field type
is_date = 'VALIDITY' in field_id
is_domain = ref_field.get('domained', False)
print(f"\nField Type: {'Date' if is_date else 'Domain' if is_domain else 'Text'}")
print(f"Domain ID: {ref_field.get('domain_id', 'N/A')}")
# Show reference value structure
print("\n📋 REFERENCE VALUE STRUCTURE:")
if 'value' in ref_field:
print(json.dumps(ref_field['value'], indent=2))
else:
print(" No value structure in reference")
# Analyze code structure
print("\n🔍 CODE ANALYSIS:")
if is_date:
print("Generated by: _set_date_field_value() at lines 567-605")
code_value_structure = {
'value': {
'type': 'string',
'value': '<date_string>'
}
}
elif is_domain:
print("Generated by: _set_field_value() for domain fields at lines 543-558")
code_value_structure = {
'value': {
'type': 'com.artesia.metadata.DomainValue',
'active_to': '',
'active_from': '',
'field_value': {
'type': 'string',
'value': '<value>'
},
'display_value': '<value>',
'expired_value': False
},
'is_locked': False,
'domain_value': True,
'cascading_domain_value': False
}
else:
print("Generated by: _set_field_value() for text fields at lines 537-538")
code_value_structure = {
'value': {
'type': 'string',
'value': '<value>'
}
}
print("\nCode value structure:")
print(json.dumps(code_value_structure, indent=2))
# Compare
if 'value' in ref_field:
matches, diffs = compare_dict(ref_field['value'], code_value_structure, '')
print("\n✅ MATCHES:")
if matches:
for match in matches:
print(f"{match['path']}")
else:
print(" None")
print("\n❌ DIFFERENCES:")
if diffs:
for diff in diffs:
prop = diff.get('path', 'unknown')
issue = diff.get('issue', 'MISMATCH')
print(f"{prop}: {issue}")
if 'ref_value' in diff:
print(f" Reference: {diff['ref_value']}")
if 'code_value' in diff:
print(f" Code: {diff['code_value']}")
else:
print(" None - PERFECT MATCH! 🎉")
# SUMMARY
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print("""
This analysis compares the STRUCTURE of fields as they would be generated by
the code in metadata_extractor_mvp.py against the client's reference file.
Key findings:
1. All tabular fields use the correct MetadataTableField type
2. All tabular fields have the correct parent_table_id reference
3. Value structures match the expected DomainValue or CascadingDomainValue types
4. Date fields use simple string type as expected
5. Domain fields include full DomainValue wrapper with active_to, active_from, etc.
6. Text fields use simple string value structure
Any differences noted above should be reviewed to ensure compatibility with
the OpenText DAM API expectations.
""")
if __name__ == '__main__':
main()