#!/usr/bin/env python3 """ Metadata Diagnostic Tool Analyzes a file's metadata to show what was in the original DAM asset vs what was sent Usage: python scripts/test-metadata-diagnostic.py "filename.jpg" """ import sys import os import json import argparse # Add shared library to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from shared.config_loader import load_config, load_field_mappings from shared.database import Database from shared.metadata_extractor_mvp import MetadataExtractorMVP from shared.filename_parser import FilenameParser def extract_field_value(field): """Extract value from DAM field structure""" if 'value' in field: val = field['value'] if isinstance(val, dict): if 'value' in val and isinstance(val['value'], dict): if 'value' in val['value']: return val['value']['value'] elif 'field_value' in val['value'] and 'value' in val['value']['field_value']: return val['value']['field_value']['value'] return None def analyze_metadata(filename): """Analyze metadata for a given filename""" print("=" * 80) print("METADATA DIAGNOSTIC TOOL") print("=" * 80) print("") print("Filename: {}".format(filename)) print("") # Load config and initialize config = load_config('config/config.yaml') field_mappings = load_field_mappings(config) db = Database(config) parser = FilenameParser() mvp_extractor = MetadataExtractorMVP(field_mappings, config=config) # Parse filename to get tracking ID parsed = parser.parse_filename(filename) if not parsed['is_valid']: print("❌ ERROR: Invalid V2 filename") print("Validation errors: {}".format(', '.join(parsed['validation_errors']))) return tracking_id = parsed['tracking_id'] print("✓ Tracking ID: {}".format(tracking_id)) print("") # Get master asset from database master_asset = db.get_master_asset(tracking_id) if not master_asset: print("❌ ERROR: No master asset found for tracking ID: {}".format(tracking_id)) return print("✓ Master asset found in database") print(" OpenText ID: {}".format(master_asset['opentext_id'])) print("") # Get full metadata full_metadata = master_asset['full_metadata'] # Extract all metadata fields from original DAM asset print("=" * 80) print("ORIGINAL DAM METADATA FIELDS") print("=" * 80) print("") original_fields = {} metadata_elements = full_metadata.get('metadata', {}).get('metadata_element_list', []) for category in metadata_elements: if 'metadata_element_list' in category: for field in category['metadata_element_list']: field_id = field.get('id') value = extract_field_value(field) if field_id: original_fields[field_id] = value status = "✓ POPULATED" if value else "✗ EMPTY" print("{:60} {}".format(field_id, status)) if value: print(" Value: {}".format(str(value)[:100])) print("") print("Total fields in original metadata: {}".format(len(original_fields))) print("") # Build asset representation (what we would send to DAM) print("=" * 80) print("BUILDING ASSET REPRESENTATION") print("=" * 80) print("") clean_filename = parser.strip_upload_components(filename) # Mock box_metadata for testing box_metadata = { 'score': '85', 'url': 'https://app.creativex.com/test', 'platforms': ['Facebook', 'Instagram'] } asset_rep = mvp_extractor.build_mvp_asset_representation( master_metadata=full_metadata, clean_filename=clean_filename, parsed_filename=parsed, box_metadata=box_metadata, tracking_mode='full' ) print("✓ Asset representation built") print("") # Extract fields from asset representation print("=" * 80) print("FIELDS IN ASSET REPRESENTATION (WHAT WE SEND)") print("=" * 80) print("") sent_fields = {} if 'metadata' in asset_rep and 'metadata_element_list' in asset_rep['metadata']: for category in asset_rep['metadata']['metadata_element_list']: if 'metadata_element_list' in category: for field in category['metadata_element_list']: field_id = field.get('id') value = extract_field_value(field) if field_id: sent_fields[field_id] = value status = "✓ POPULATED" if value else "✗ EMPTY" print("{:60} {}".format(field_id, status)) if value: print(" Value: {}".format(str(value)[:100])) print("") print("Total fields in asset representation: {}".format(len(sent_fields))) print("") # Compare original vs sent print("=" * 80) print("COMPARISON: ORIGINAL vs SENT") print("=" * 80) print("") # Fields that were in original but empty in sent print("Fields that were POPULATED in original but EMPTY in sent:") print("-" * 80) lost_data = [] for field_id, orig_value in original_fields.items(): if orig_value and field_id in sent_fields and not sent_fields[field_id]: lost_data.append(field_id) print("⚠️ {}".format(field_id)) print(" Original: {}".format(str(orig_value)[:100])) print(" Sent: EMPTY") print("") if not lost_data: print("✓ No data loss detected") print("") # Fields that were empty in original and empty in sent print("Fields that were EMPTY in both original and sent:") print("-" * 80) empty_both = [] for field_id, orig_value in original_fields.items(): if not orig_value and field_id in sent_fields and not sent_fields[field_id]: empty_both.append(field_id) print(" {}".format(field_id)) if not empty_both: print("✓ No fields empty in both") print("") # Fields only in sent (new fields) print("Fields ONLY in sent (not in original):") print("-" * 80) new_fields = [] for field_id in sent_fields: if field_id not in original_fields: new_fields.append(field_id) value = sent_fields[field_id] status = "✓ POPULATED" if value else "✗ EMPTY" print("{:60} {}".format(field_id, status)) if value: print(" Value: {}".format(str(value)[:100])) if not new_fields: print("✓ No new fields added") print("") # Summary print("=" * 80) print("SUMMARY") print("=" * 80) print("") print("Original metadata fields: {}".format(len(original_fields))) print("Sent metadata fields: {}".format(len(sent_fields))) print("Data loss (populated → empty): {}".format(len(lost_data))) print("Empty in both: {}".format(len(empty_both))) print("New fields added: {}".format(len(new_fields))) print("") # Save full JSON for inspection output_dir = 'temp/metadata_diagnostic' os.makedirs(output_dir, exist_ok=True) original_json = os.path.join(output_dir, 'original_metadata_{}.json'.format(tracking_id)) sent_json = os.path.join(output_dir, 'sent_asset_rep_{}.json'.format(tracking_id)) with open(original_json, 'w') as f: json.dump(full_metadata, f, indent=2) with open(sent_json, 'w') as f: json.dump(asset_rep, f, indent=2) print("Full JSON files saved:") print(" Original: {}".format(original_json)) print(" Sent: {}".format(sent_json)) print("") db.close() def main(): parser = argparse.ArgumentParser(description='Metadata Diagnostic Tool') parser.add_argument('filename', help='Filename to analyze (e.g., "C000000078_KIN_IT_IT_1920x1080_09Dux1.jpg")') args = parser.parse_args() try: analyze_metadata(args.filename) except Exception as e: print("") print("=" * 80) print("ERROR") print("=" * 80) print(str(e)) import traceback traceback.print_exc() sys.exit(1) if __name__ == '__main__': main()