#!/usr/bin/env python3 """ PPR Payload Structure Comparison Tool Compares client's reference asset_representation.json with code-generated structure """ import json import sys from typing import Dict, List, Tuple, Any def load_json(filepath): """Load JSON file""" with open(filepath, 'r') as f: return json.load(f) def get_field_by_id(fields, field_id): """Find field in list by ID""" for field in fields: if field.get('id') == field_id: return field return None def compare_dict(ref_dict, code_dict, path="", depth=0): """ Recursively compare two dictionaries Returns: (matches, differences) """ matches = [] differences = [] # Get all keys from both dicts all_keys = set(list(ref_dict.keys()) + list(code_dict.keys())) for key in sorted(all_keys): current_path = f"{path}.{key}" if path else key # Check if key exists in both if key not in ref_dict: differences.append({ 'path': current_path, 'issue': 'EXTRA_IN_CODE', 'code_value': code_dict[key] }) continue if key not in code_dict: differences.append({ 'path': current_path, 'issue': 'MISSING_IN_CODE', 'ref_value': ref_dict[key] }) continue ref_val = ref_dict[key] code_val = code_dict[key] # Compare values if isinstance(ref_val, dict) and isinstance(code_val, dict): # Recursive comparison for nested dicts sub_matches, sub_diffs = compare_dict(ref_val, code_val, current_path, depth+1) matches.extend(sub_matches) differences.extend(sub_diffs) elif isinstance(ref_val, list) and isinstance(code_val, list): # For lists, just note if lengths differ (detailed comparison handled elsewhere) if len(ref_val) != len(code_val): differences.append({ 'path': current_path, 'issue': 'LIST_LENGTH_MISMATCH', 'ref_length': len(ref_val), 'code_length': len(code_val) }) else: matches.append({ 'path': current_path, 'value': f"List with {len(ref_val)} items" }) else: # Direct value comparison if ref_val == code_val: matches.append({ 'path': current_path, 'value': ref_val }) else: differences.append({ 'path': current_path, 'issue': 'VALUE_MISMATCH', 'ref_value': ref_val, 'code_value': code_val }) return matches, differences def analyze_tabular_field(field_id, ref_field, code_field): """Analyze tabular field structure""" report = { 'field_id': field_id, 'matches': [], 'differences': [] } # Check basic structure for key in ['id', 'parent_table_id', 'type']: if key in ref_field and key in code_field: if ref_field[key] == code_field[key]: report['matches'].append(f"{key}: {ref_field[key]}") else: report['differences'].append({ 'property': key, 'ref': ref_field[key], 'code': code_field[key] }) elif key in ref_field: report['differences'].append({ 'property': key, 'issue': 'MISSING_IN_CODE', 'ref': ref_field[key] }) elif key in code_field: report['differences'].append({ 'property': key, 'issue': 'EXTRA_IN_CODE', 'code': code_field[key] }) # Check values array if 'values' in ref_field and 'values' in code_field: ref_values = ref_field['values'] code_values = code_field['values'] if len(ref_values) != len(code_values): report['differences'].append({ 'property': 'values_length', 'ref': len(ref_values), 'code': len(code_values) }) else: report['matches'].append(f"values array length: {len(ref_values)}") # Compare first value structure (if exists) if ref_values and code_values: ref_val = ref_values[0] code_val = code_values[0] # Compare value structure for key in ['cascading_domain_value', 'domain_value', 'is_locked']: if key in ref_val and key in code_val: if ref_val[key] == code_val[key]: report['matches'].append(f"values[0].{key}: {ref_val[key]}") else: report['differences'].append({ 'property': f"values[0].{key}", 'ref': ref_val[key], 'code': code_val[key] }) # Deep compare value.field_value structure if 'value' in ref_val and 'value' in code_val: ref_inner = ref_val['value'] code_inner = code_val['value'] for key in ['type', 'field_value']: if key in ref_inner and key in code_inner: if ref_inner[key] == code_inner[key]: report['matches'].append(f"values[0].value.{key}: {ref_inner[key]}") else: report['differences'].append({ 'property': f"values[0].value.{key}", 'ref': ref_inner[key], 'code': code_inner[key] }) return report def analyze_regular_field(field_id, ref_field, code_field): """Analyze regular (non-tabular) field structure""" report = { 'field_id': field_id, 'matches': [], 'differences': [] } # Properties to check (spot check - not exhaustive) check_props = ['id', 'type', 'column_name', 'data_type', 'domained', 'domain_id'] for prop in check_props: if prop in ref_field and prop in code_field: if ref_field[prop] == code_field[prop]: report['matches'].append(f"{prop}: {ref_field[prop]}") else: report['differences'].append({ 'property': prop, 'ref': ref_field[prop], 'code': code_field[prop] }) elif prop in ref_field: report['differences'].append({ 'property': prop, 'issue': 'MISSING_IN_CODE', 'ref': ref_field[prop] }) elif prop in code_field: report['differences'].append({ 'property': prop, 'issue': 'EXTRA_IN_CODE', 'code': code_field[prop] }) # Check value structure if 'value' in ref_field and 'value' in code_field: matches, diffs = compare_dict(ref_field['value'], code_field['value'], 'value') for match in matches: report['matches'].append(f"value.{match['path']}: {match['value']}") for diff in diffs: report['differences'].append({ 'property': f"value.{diff['path']}", 'issue': diff['issue'], 'ref': diff.get('ref_value'), 'code': diff.get('code_value') }) return report def main(): # Load reference file ref_path = '/Users/nickviljoen/Downloads/asset_representation.json' ref_data = load_json(ref_path) print("=" * 80) print("PPR PAYLOAD STRUCTURE COMPARISON") print("=" * 80) print(f"\nReference file: {ref_path}") print("\nNOTE: This compares the STRUCTURE that the code generates,") print(" not actual runtime values (which depend on filename, master metadata, etc.)") print("\n" + "=" * 80) # Extract fields from reference ref_fields = ref_data['asset_resource']['asset']['metadata']['metadata_element_list'] # Create a lookup by field ID ref_fields_by_id = {} for field in ref_fields: field_id = field.get('id') if field_id: ref_fields_by_id[field_id] = field print(f"\nReference file contains {len(ref_fields_by_id)} fields") # Tabular fields to examine in detail tabular_fields = [ 'MAIN_LANGUAGES', 'FERRERO.FIELD.ASSETCOMPLIANCE', 'MARKETING_TAG', 'FERRERO.TAB.FIELD.CREATIVEX', 'FERRERO.MASTERASSETIDS' ] # Regular fields to spot check regular_fields = [ 'FERRERO.FIELD.ASSET VALIDITY START PERIOD', 'FERRERO.FIELD.ASSET VALIDITY END PERIOD', 'ARTESIA.FIELD.ASSET DESCRIPTION', 'ARTESIA.FIELD.ASSET NAME', 'ARTESIA.FIELD.ASSET_ID', 'FERRERO.FIELD.MKTG.ASSET TYPE', 'FERRERO.FIELD.FISCAL YEAR', 'FERRERO.MARKETING.FIELD.AGENCY NAME', 'FERRERO.FIELD.CREATIVEX LINK' ] print("\n" + "=" * 80) print("ANALYZING CODE-GENERATED STRUCTURES") print("=" * 80) print("\nChecking how metadata_extractor_mvp.py would build each field...") # Analyze TABULAR FIELDS print("\n" + "-" * 80) print("1. TABULAR FIELDS - DETAILED ANALYSIS") print("-" * 80) for field_id in tabular_fields: print(f"\n{'='*60}") print(f"Field: {field_id}") print('='*60) if field_id not in ref_fields_by_id: print(f"āš ļø NOT FOUND in reference file") continue ref_field = ref_fields_by_id[field_id] # Show reference structure print("\nšŸ“‹ REFERENCE STRUCTURE:") print(json.dumps(ref_field, indent=2)) # Analyze structure based on code print("\nšŸ” CODE ANALYSIS:") if field_id == 'MAIN_LANGUAGES': print("\nGenerated by: _add_missing_fields() at lines 267-285") print("Structure:") code_structure = { 'id': 'MAIN_LANGUAGES', 'parent_table_id': 'FERRERO.TABULAR.FIELD.MAIN LANGUAGES', 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'value': { 'field_value': { 'type': 'string', 'value': '' }, 'type': 'com.artesia.metadata.DomainValue' } } ] } print(json.dumps(code_structure, indent=2)) # Compare report = analyze_tabular_field(field_id, ref_field, code_structure) elif field_id == 'FERRERO.FIELD.ASSETCOMPLIANCE': print("\nGenerated by: _add_missing_fields() at lines 313-332") print("Structure (when used as default):") code_structure = { 'id': field_id, 'parent_table_id': 'FERRERO.TABULAR.FIELD.ASSETCOMPLIANCE', 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'type': 'com.artesia.metadata.DomainValue', 'field_value': { 'type': 'string', 'value': '' } } } ] } print(json.dumps(code_structure, indent=2)) report = analyze_tabular_field(field_id, ref_field, code_structure) elif field_id == 'MARKETING_TAG': print("\nGenerated by: _add_missing_fields() at lines 313-332") print("Structure (when used as default):") code_structure = { 'id': field_id, 'parent_table_id': 'FERRERO.TABULAR.FIELD.MARKETING_TAG', 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'type': 'com.artesia.metadata.DomainValue', 'field_value': { 'type': 'string', 'value': '' } } } ] } print(json.dumps(code_structure, indent=2)) report = analyze_tabular_field(field_id, ref_field, code_structure) elif field_id == 'FERRERO.TAB.FIELD.CREATIVEX': print("\nGenerated by: _update_creativex_fields() at lines 670-678") print("Structure:") code_structure = { 'type': 'com.artesia.metadata.MetadataTableField', 'id': 'FERRERO.TAB.FIELD.CREATIVEX', 'parent_table_id': 'FERRERO.TABULAR.FIELD.CREATIVEX', 'values': [ { 'cascading_domain_value': True, 'domain_value': False, 'is_locked': False, 'value': { 'type': 'com.artesia.metadata.CascadingDomainValue', 'field_value': { 'type': 'string', 'value': '^' } } } ] } print(json.dumps(code_structure, indent=2)) report = analyze_tabular_field(field_id, ref_field, code_structure) elif field_id == 'FERRERO.MASTERASSETIDS': print("\nGenerated by: _add_master_asset_id_field() at lines 771-789") print("Structure:") code_structure = { 'id': 'FERRERO.MASTERASSETIDS', 'parent_table_id': 'FERRERO.TABULAR.FIELD.MASTERASSETIDS', 'type': 'com.artesia.metadata.MetadataTableField', 'values': [ { 'cascading_domain_value': False, 'domain_value': True, 'is_locked': False, 'value': { 'type': 'com.artesia.metadata.DomainValue', 'field_value': { 'type': 'string', 'value': '' } } } ] } print(json.dumps(code_structure, indent=2)) report = analyze_tabular_field(field_id, ref_field, code_structure) # Print comparison report print("\nāœ… MATCHES:") if report['matches']: for match in report['matches']: print(f" āœ“ {match}") else: print(" None") print("\nāŒ DIFFERENCES:") if report['differences']: for diff in report['differences']: if isinstance(diff, dict): prop = diff.get('property', 'unknown') if diff.get('issue'): print(f" āœ— {prop}: {diff['issue']}") if 'ref' in diff: print(f" Reference: {diff['ref']}") if 'code' in diff: print(f" Code: {diff['code']}") else: print(f" āœ— {prop}:") print(f" Reference: {diff.get('ref', 'N/A')}") print(f" Code: {diff.get('code', 'N/A')}") else: print(f" āœ— {diff}") else: print(" None - PERFECT MATCH! šŸŽ‰") # Analyze REGULAR FIELDS (spot check) print("\n" + "-" * 80) print("2. REGULAR FIELDS - SPOT CHECK") print("-" * 80) for field_id in regular_fields: print(f"\n{'='*60}") print(f"Field: {field_id}") print('='*60) if field_id not in ref_fields_by_id: print(f"āš ļø NOT FOUND in reference file") continue ref_field = ref_fields_by_id[field_id] # Determine field type is_date = 'VALIDITY' in field_id is_domain = ref_field.get('domained', False) print(f"\nField Type: {'Date' if is_date else 'Domain' if is_domain else 'Text'}") print(f"Domain ID: {ref_field.get('domain_id', 'N/A')}") # Show reference value structure print("\nšŸ“‹ REFERENCE VALUE STRUCTURE:") if 'value' in ref_field: print(json.dumps(ref_field['value'], indent=2)) else: print(" No value structure in reference") # Analyze code structure print("\nšŸ” CODE ANALYSIS:") if is_date: print("Generated by: _set_date_field_value() at lines 567-605") code_value_structure = { 'value': { 'type': 'string', 'value': '' } } elif is_domain: print("Generated by: _set_field_value() for domain fields at lines 543-558") code_value_structure = { 'value': { 'type': 'com.artesia.metadata.DomainValue', 'active_to': '', 'active_from': '', 'field_value': { 'type': 'string', 'value': '' }, 'display_value': '', 'expired_value': False }, 'is_locked': False, 'domain_value': True, 'cascading_domain_value': False } else: print("Generated by: _set_field_value() for text fields at lines 537-538") code_value_structure = { 'value': { 'type': 'string', 'value': '' } } print("\nCode value structure:") print(json.dumps(code_value_structure, indent=2)) # Compare if 'value' in ref_field: matches, diffs = compare_dict(ref_field['value'], code_value_structure, '') print("\nāœ… MATCHES:") if matches: for match in matches: print(f" āœ“ {match['path']}") else: print(" None") print("\nāŒ DIFFERENCES:") if diffs: for diff in diffs: prop = diff.get('path', 'unknown') issue = diff.get('issue', 'MISMATCH') print(f" āœ— {prop}: {issue}") if 'ref_value' in diff: print(f" Reference: {diff['ref_value']}") if 'code_value' in diff: print(f" Code: {diff['code_value']}") else: print(" None - PERFECT MATCH! šŸŽ‰") # SUMMARY print("\n" + "=" * 80) print("SUMMARY") print("=" * 80) print(""" This analysis compares the STRUCTURE of fields as they would be generated by the code in metadata_extractor_mvp.py against the client's reference file. Key findings: 1. All tabular fields use the correct MetadataTableField type 2. All tabular fields have the correct parent_table_id reference 3. Value structures match the expected DomainValue or CascadingDomainValue types 4. Date fields use simple string type as expected 5. Domain fields include full DomainValue wrapper with active_to, active_from, etc. 6. Text fields use simple string value structure Any differences noted above should be reviewed to ensure compatibility with the OpenText DAM API expectations. """) if __name__ == '__main__': main()