Key Changes: - Updated metadata_extractor_mvp.py to use SIMPLE structure for all tabular fields - All tabular fields now use direct value objects (no MetadataTableFieldRow wrapper) - MAIN_LANGUAGES, ASSETCOMPLIANCE, MARKETING_TAG, CREATIVEX all use SIMPLE structure - Master Asset ID field updated to SIMPLE structure - Date fields now use type 'string' instead of 'long' - Matches DAM reference structure from asset_representation.json Added Files: - metadata_extractor_mvp_PROD.py: PROD-specific version with same SIMPLE structure - Backup files for safety - Analysis and comparison documentation Environment: - Tested and working in PPR environment (ppr.dam.ferrero.com) - All tabular fields match DAM-supplied reference structure - Successful uploads confirmed Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
253 lines
8.3 KiB
Python
253 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Metadata Diagnostic Tool
|
|
Analyzes a file's metadata to show what was in the original DAM asset vs what was sent
|
|
Usage: python scripts/test-metadata-diagnostic.py "filename.jpg"
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import json
|
|
import argparse
|
|
|
|
# Add shared library to path
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
from shared.config_loader import load_config, load_field_mappings
|
|
from shared.database import Database
|
|
from shared.metadata_extractor_mvp import MetadataExtractorMVP
|
|
from shared.filename_parser import FilenameParser
|
|
|
|
def extract_field_value(field):
|
|
"""Extract value from DAM field structure"""
|
|
if 'value' in field:
|
|
val = field['value']
|
|
if isinstance(val, dict):
|
|
if 'value' in val and isinstance(val['value'], dict):
|
|
if 'value' in val['value']:
|
|
return val['value']['value']
|
|
elif 'field_value' in val['value'] and 'value' in val['value']['field_value']:
|
|
return val['value']['field_value']['value']
|
|
return None
|
|
|
|
def analyze_metadata(filename):
|
|
"""Analyze metadata for a given filename"""
|
|
|
|
print("=" * 80)
|
|
print("METADATA DIAGNOSTIC TOOL")
|
|
print("=" * 80)
|
|
print("")
|
|
print("Filename: {}".format(filename))
|
|
print("")
|
|
|
|
# Load config and initialize
|
|
config = load_config('config/config.yaml')
|
|
field_mappings = load_field_mappings(config)
|
|
db = Database(config)
|
|
parser = FilenameParser()
|
|
mvp_extractor = MetadataExtractorMVP(field_mappings, config=config)
|
|
|
|
# Parse filename to get tracking ID
|
|
parsed = parser.parse_filename(filename)
|
|
|
|
if not parsed['is_valid']:
|
|
print("❌ ERROR: Invalid V2 filename")
|
|
print("Validation errors: {}".format(', '.join(parsed['validation_errors'])))
|
|
return
|
|
|
|
tracking_id = parsed['tracking_id']
|
|
print("✓ Tracking ID: {}".format(tracking_id))
|
|
print("")
|
|
|
|
# Get master asset from database
|
|
master_asset = db.get_master_asset(tracking_id)
|
|
|
|
if not master_asset:
|
|
print("❌ ERROR: No master asset found for tracking ID: {}".format(tracking_id))
|
|
return
|
|
|
|
print("✓ Master asset found in database")
|
|
print(" OpenText ID: {}".format(master_asset['opentext_id']))
|
|
print("")
|
|
|
|
# Get full metadata
|
|
full_metadata = master_asset['full_metadata']
|
|
|
|
# Extract all metadata fields from original DAM asset
|
|
print("=" * 80)
|
|
print("ORIGINAL DAM METADATA FIELDS")
|
|
print("=" * 80)
|
|
print("")
|
|
|
|
original_fields = {}
|
|
metadata_elements = full_metadata.get('metadata', {}).get('metadata_element_list', [])
|
|
|
|
for category in metadata_elements:
|
|
if 'metadata_element_list' in category:
|
|
for field in category['metadata_element_list']:
|
|
field_id = field.get('id')
|
|
value = extract_field_value(field)
|
|
if field_id:
|
|
original_fields[field_id] = value
|
|
status = "✓ POPULATED" if value else "✗ EMPTY"
|
|
print("{:60} {}".format(field_id, status))
|
|
if value:
|
|
print(" Value: {}".format(str(value)[:100]))
|
|
|
|
print("")
|
|
print("Total fields in original metadata: {}".format(len(original_fields)))
|
|
print("")
|
|
|
|
# Build asset representation (what we would send to DAM)
|
|
print("=" * 80)
|
|
print("BUILDING ASSET REPRESENTATION")
|
|
print("=" * 80)
|
|
print("")
|
|
|
|
clean_filename = parser.strip_upload_components(filename)
|
|
|
|
# Mock box_metadata for testing
|
|
box_metadata = {
|
|
'score': '85',
|
|
'url': 'https://app.creativex.com/test',
|
|
'platforms': ['Facebook', 'Instagram']
|
|
}
|
|
|
|
asset_rep = mvp_extractor.build_mvp_asset_representation(
|
|
master_metadata=full_metadata,
|
|
clean_filename=clean_filename,
|
|
parsed_filename=parsed,
|
|
box_metadata=box_metadata,
|
|
tracking_mode='full'
|
|
)
|
|
|
|
print("✓ Asset representation built")
|
|
print("")
|
|
|
|
# Extract fields from asset representation
|
|
print("=" * 80)
|
|
print("FIELDS IN ASSET REPRESENTATION (WHAT WE SEND)")
|
|
print("=" * 80)
|
|
print("")
|
|
|
|
sent_fields = {}
|
|
if 'metadata' in asset_rep and 'metadata_element_list' in asset_rep['metadata']:
|
|
for category in asset_rep['metadata']['metadata_element_list']:
|
|
if 'metadata_element_list' in category:
|
|
for field in category['metadata_element_list']:
|
|
field_id = field.get('id')
|
|
value = extract_field_value(field)
|
|
if field_id:
|
|
sent_fields[field_id] = value
|
|
status = "✓ POPULATED" if value else "✗ EMPTY"
|
|
print("{:60} {}".format(field_id, status))
|
|
if value:
|
|
print(" Value: {}".format(str(value)[:100]))
|
|
|
|
print("")
|
|
print("Total fields in asset representation: {}".format(len(sent_fields)))
|
|
print("")
|
|
|
|
# Compare original vs sent
|
|
print("=" * 80)
|
|
print("COMPARISON: ORIGINAL vs SENT")
|
|
print("=" * 80)
|
|
print("")
|
|
|
|
# Fields that were in original but empty in sent
|
|
print("Fields that were POPULATED in original but EMPTY in sent:")
|
|
print("-" * 80)
|
|
lost_data = []
|
|
for field_id, orig_value in original_fields.items():
|
|
if orig_value and field_id in sent_fields and not sent_fields[field_id]:
|
|
lost_data.append(field_id)
|
|
print("⚠️ {}".format(field_id))
|
|
print(" Original: {}".format(str(orig_value)[:100]))
|
|
print(" Sent: EMPTY")
|
|
print("")
|
|
|
|
if not lost_data:
|
|
print("✓ No data loss detected")
|
|
print("")
|
|
|
|
# Fields that were empty in original and empty in sent
|
|
print("Fields that were EMPTY in both original and sent:")
|
|
print("-" * 80)
|
|
empty_both = []
|
|
for field_id, orig_value in original_fields.items():
|
|
if not orig_value and field_id in sent_fields and not sent_fields[field_id]:
|
|
empty_both.append(field_id)
|
|
print(" {}".format(field_id))
|
|
|
|
if not empty_both:
|
|
print("✓ No fields empty in both")
|
|
print("")
|
|
|
|
# Fields only in sent (new fields)
|
|
print("Fields ONLY in sent (not in original):")
|
|
print("-" * 80)
|
|
new_fields = []
|
|
for field_id in sent_fields:
|
|
if field_id not in original_fields:
|
|
new_fields.append(field_id)
|
|
value = sent_fields[field_id]
|
|
status = "✓ POPULATED" if value else "✗ EMPTY"
|
|
print("{:60} {}".format(field_id, status))
|
|
if value:
|
|
print(" Value: {}".format(str(value)[:100]))
|
|
|
|
if not new_fields:
|
|
print("✓ No new fields added")
|
|
print("")
|
|
|
|
# Summary
|
|
print("=" * 80)
|
|
print("SUMMARY")
|
|
print("=" * 80)
|
|
print("")
|
|
print("Original metadata fields: {}".format(len(original_fields)))
|
|
print("Sent metadata fields: {}".format(len(sent_fields)))
|
|
print("Data loss (populated → empty): {}".format(len(lost_data)))
|
|
print("Empty in both: {}".format(len(empty_both)))
|
|
print("New fields added: {}".format(len(new_fields)))
|
|
print("")
|
|
|
|
# Save full JSON for inspection
|
|
output_dir = 'temp/metadata_diagnostic'
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
original_json = os.path.join(output_dir, 'original_metadata_{}.json'.format(tracking_id))
|
|
sent_json = os.path.join(output_dir, 'sent_asset_rep_{}.json'.format(tracking_id))
|
|
|
|
with open(original_json, 'w') as f:
|
|
json.dump(full_metadata, f, indent=2)
|
|
|
|
with open(sent_json, 'w') as f:
|
|
json.dump(asset_rep, f, indent=2)
|
|
|
|
print("Full JSON files saved:")
|
|
print(" Original: {}".format(original_json))
|
|
print(" Sent: {}".format(sent_json))
|
|
print("")
|
|
|
|
db.close()
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Metadata Diagnostic Tool')
|
|
parser.add_argument('filename', help='Filename to analyze (e.g., "C000000078_KIN_IT_IT_1920x1080_09Dux1.jpg")')
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
analyze_metadata(args.filename)
|
|
except Exception as e:
|
|
print("")
|
|
print("=" * 80)
|
|
print("ERROR")
|
|
print("=" * 80)
|
|
print(str(e))
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|