ferrero-opentext/src/MetadataMerger.php
DJP 3a95076726 Add Upload from Box workflow - Phase 1 Complete
Core Components Implemented:
- FilenameParser: V2 naming convention parser with strict validation
- MetadataMerger: Merge master + filename metadata (filename priority)
- BoxFileRetriever: List/download files from Box folders
- DAM Lookup Domains: Complete documentation (182 domains)

Features:
- Parse V2 filenames: OMG_JOB_BRAND_COUNTRY_LANG_TITLE_TYPE_VER_SEC_RATIO_TRACKING
- Strip upload components (Job Number & Tracking ID)
- Extract tracking IDs and load master metadata from PostgreSQL
- Merge metadata with filename always winning
- Identify editable vs locked fields
- Build proper asset representation for DAM upload

Files Added:
- src/FilenameParser.php (tested - 8/8 passing)
- src/MetadataMerger.php
- src/BoxFileRetriever.php
- ECOMMERCE_ALLOWED_FIELDS.md (182 lookup domains)
- DAM_LOOKUPDOMAINS_RAW.json (15MB raw data)
- test_filename_parser.php
- fetch_lookupdomains.php
- UPLOAD_FROM_BOX_STATUS.md (complete documentation)

Next Phase: UI integration - Add "Upload from Box" tab to workflow_v3.php

🤖 Generated with Claude Code
2025-10-29 15:47:30 -04:00

408 lines
13 KiB
PHP

<?php
/**
* MetadataMerger - Merge Master Metadata with Filename-Derived Data
*
* This class combines:
* 1. Master asset metadata from PostgreSQL database
* 2. Metadata extracted from V2 filename parsing
*
* Priority: Filename data always wins (as per requirements)
*
* Editable fields (derived from filename):
* - Country Code
* - Language Code
* - Asset Type
* - Aspect Ratio
* - Duration (Seconds)
*
* Locked fields (from master metadata):
* - All other fields from master asset
*/
class MetadataMerger
{
/**
* Field mapping from V2 filename components to DAM field IDs
*/
private $fieldMapping = [
'brand_code' => 'FERRERO.FIELD.SUB BRAND',
'language_code' => 'MAIN_LANGUAGES', // Tabular field
'asset_type' => 'FERRERO.FIELD.MKTG.ASSET TYPE',
'fiscal_year' => 'FERRERO.FIELD.FISCAL YEAR',
'state' => 'FERRERO.FIELD.STATE',
'asset_name' => 'ARTESIA.FIELD.ASSET NAME',
'asset_description' => 'ARTESIA.FIELD.ASSET DESCRIPTION'
];
/**
* Fields that are derived from filename (editable)
*/
private $derivedFields = [
'MAIN_LANGUAGES',
'FERRERO.FIELD.MKTG.ASSET TYPE',
'ARTESIA.FIELD.ASSET NAME'
];
/**
* Merge master metadata with filename-parsed data
*
* @param array $masterMetadata Master asset metadata from database
* @param array $parsedFilename Parsed filename data from FilenameParser
* @return array Merged metadata with source tracking
*/
public function mergeMetadata($masterMetadata, $parsedFilename)
{
$merged = [
'fields' => [],
'sources' => [], // Track which field came from where
'conflicts' => [] // Track any conflicts (for logging)
];
// Start with master metadata as base
if (isset($masterMetadata['metadata'])) {
$masterMeta = is_string($masterMetadata['metadata'])
? json_decode($masterMetadata['metadata'], true)
: $masterMetadata['metadata'];
if ($masterMeta && is_array($masterMeta)) {
$merged['fields'] = $masterMeta;
// Mark all initial fields as from master
if (isset($masterMeta['metadata_element_list'])) {
foreach ($masterMeta['metadata_element_list'] as $field) {
if (isset($field['id'])) {
$merged['sources'][$field['id']] = 'master';
}
}
}
}
}
// Override with filename-derived fields (filename always wins)
$this->applyFilenameData($merged, $parsedFilename);
return $merged;
}
/**
* Apply filename-derived data to merged metadata
*
* @param array &$merged Merged metadata array (modified in place)
* @param array $parsedFilename Parsed filename data
*/
private function applyFilenameData(&$merged, $parsedFilename)
{
if (!isset($merged['fields']['metadata_element_list'])) {
$merged['fields']['metadata_element_list'] = [];
}
// 1. Asset Type
if (!empty($parsedFilename['asset_type'])) {
$this->updateOrAddField(
$merged,
'FERRERO.FIELD.MKTG.ASSET TYPE',
$parsedFilename['asset_type'],
'filename',
'com.artesia.metadata.MetadataField',
true // domain_value
);
}
// 2. Language Code (MAIN_LANGUAGES - tabular field)
if (!empty($parsedFilename['language_code'])) {
$this->updateOrAddTabularField(
$merged,
'MAIN_LANGUAGES',
'FERRERO.TABULAR.FIELD.MAIN LANGUAGES',
strtoupper($parsedFilename['language_code']),
'filename'
);
}
// 3. Asset Name (use clean filename without extension)
$assetName = $parsedFilename['original_filename'];
if (!empty($parsedFilename['extension'])) {
$assetName = str_replace($parsedFilename['extension'], '', $assetName);
}
$this->updateOrAddField(
$merged,
'ARTESIA.FIELD.ASSET NAME',
$assetName,
'filename',
'com.artesia.metadata.MetadataField',
false // not domain_value
);
// 4. Brand Code (if available)
if (!empty($parsedFilename['brand_code'])) {
$this->updateOrAddField(
$merged,
'FERRERO.FIELD.SUB BRAND',
$parsedFilename['brand_code'],
'filename',
'com.artesia.metadata.MetadataField',
false
);
}
// 5. State (default to 'Local')
$this->updateOrAddField(
$merged,
'FERRERO.FIELD.STATE',
'Local',
'default',
'com.artesia.metadata.MetadataField',
true // domain_value
);
// 6. Fiscal Year (use from master or default)
if (!$this->fieldExists($merged, 'FERRERO.FIELD.FISCAL YEAR')) {
$this->updateOrAddField(
$merged,
'FERRERO.FIELD.FISCAL YEAR',
'2025/2026',
'default',
'com.artesia.metadata.MetadataField',
true // domain_value
);
}
}
/**
* Update or add a metadata field
*
* @param array &$merged Merged metadata array
* @param string $fieldId Field ID
* @param mixed $value Field value
* @param string $source Source (master/filename/default)
* @param string $type Field type
* @param bool $isDomainValue Whether this is a domain value
*/
private function updateOrAddField(&$merged, $fieldId, $value, $source, $type, $isDomainValue = false)
{
$fieldIndex = $this->findFieldIndex($merged, $fieldId);
$fieldStructure = [
'id' => $fieldId,
'type' => $type,
'value' => [
'cascading_domain_value' => false,
'domain_value' => $isDomainValue,
'value' => [
'type' => 'string',
'value' => $value
]
]
];
if ($fieldIndex !== false) {
// Track conflict if overriding
if ($merged['sources'][$fieldId] !== $source) {
$merged['conflicts'][] = [
'field' => $fieldId,
'old_source' => $merged['sources'][$fieldId],
'new_source' => $source,
'old_value' => $merged['fields']['metadata_element_list'][$fieldIndex]['value']['value']['value'] ?? null,
'new_value' => $value
];
}
// Update existing field
$merged['fields']['metadata_element_list'][$fieldIndex] = $fieldStructure;
} else {
// Add new field
$merged['fields']['metadata_element_list'][] = $fieldStructure;
}
$merged['sources'][$fieldId] = $source;
}
/**
* Update or add a tabular field
*
* @param array &$merged Merged metadata array
* @param string $fieldId Field ID
* @param string $parentTableId Parent table ID
* @param mixed $value Field value
* @param string $source Source (master/filename/default)
*/
private function updateOrAddTabularField(&$merged, $fieldId, $parentTableId, $value, $source)
{
$fieldIndex = $this->findFieldIndex($merged, $fieldId);
$fieldStructure = [
'id' => $fieldId,
'parent_table_id' => $parentTableId,
'type' => 'com.artesia.metadata.MetadataTableField',
'values' => [
[
'cascading_domain_value' => false,
'domain_value' => true,
'value' => [
'field_value' => [
'type' => 'string',
'value' => $value
],
'type' => 'com.artesia.metadata.DomainValue'
]
]
]
];
if ($fieldIndex !== false) {
// Track conflict
if ($merged['sources'][$fieldId] !== $source) {
$merged['conflicts'][] = [
'field' => $fieldId,
'old_source' => $merged['sources'][$fieldId],
'new_source' => $source
];
}
// Update existing field
$merged['fields']['metadata_element_list'][$fieldIndex] = $fieldStructure;
} else {
// Add new field
$merged['fields']['metadata_element_list'][] = $fieldStructure;
}
$merged['sources'][$fieldId] = $source;
}
/**
* Find index of a field in metadata_element_list
*
* @param array $merged Merged metadata array
* @param string $fieldId Field ID to find
* @return int|false Field index or false if not found
*/
private function findFieldIndex($merged, $fieldId)
{
if (!isset($merged['fields']['metadata_element_list'])) {
return false;
}
foreach ($merged['fields']['metadata_element_list'] as $index => $field) {
if (isset($field['id']) && $field['id'] === $fieldId) {
return $index;
}
}
return false;
}
/**
* Check if a field exists
*
* @param array $merged Merged metadata array
* @param string $fieldId Field ID to check
* @return bool
*/
private function fieldExists($merged, $fieldId)
{
return $this->findFieldIndex($merged, $fieldId) !== false;
}
/**
* Build asset representation for upload
*
* @param array $mergedMetadata Merged metadata from mergeMetadata()
* @return array Asset representation ready for API upload
*/
public function buildAssetRepresentation($mergedMetadata)
{
return [
'asset_resource' => [
'asset' => [
'metadata' => $mergedMetadata['fields'],
'metadata_model_id' => 'ECOMMERCE',
'security_policy_list' => [
['id' => 1594]
]
]
]
];
}
/**
* Identify which fields are editable (derived from filename)
*
* @param array $mergedMetadata Merged metadata from mergeMetadata()
* @return array List of field IDs that are editable
*/
public function identifyEditableFields($mergedMetadata)
{
$editableFields = [];
foreach ($mergedMetadata['sources'] as $fieldId => $source) {
if ($source === 'filename' || $source === 'default') {
$editableFields[] = $fieldId;
}
}
return $editableFields;
}
/**
* Get field mapping
*
* @return array
*/
public function getFieldMapping()
{
return $this->fieldMapping;
}
/**
* Get conflicts from merge
*
* @param array $mergedMetadata
* @return array
*/
public function getConflicts($mergedMetadata)
{
return $mergedMetadata['conflicts'] ?? [];
}
/**
* Format merged metadata for display
*
* @param array $mergedMetadata Merged metadata from mergeMetadata()
* @return string
*/
public function formatForDisplay($mergedMetadata)
{
$output = "=== Merged Metadata ===\n\n";
if (!empty($mergedMetadata['fields']['metadata_element_list'])) {
$output .= "Fields:\n";
foreach ($mergedMetadata['fields']['metadata_element_list'] as $field) {
$fieldId = $field['id'] ?? 'Unknown';
$source = $mergedMetadata['sources'][$fieldId] ?? 'unknown';
$editable = in_array($source, ['filename', 'default']) ? 'Editable' : 'Locked';
$output .= " - $fieldId [$source] [$editable]\n";
// Extract value
if (isset($field['value']['value']['value'])) {
$value = $field['value']['value']['value'];
$output .= " Value: $value\n";
} elseif (isset($field['values'][0]['value']['field_value']['value'])) {
$value = $field['values'][0]['value']['field_value']['value'];
$output .= " Value: $value\n";
}
}
}
if (!empty($mergedMetadata['conflicts'])) {
$output .= "\nConflicts (Filename Wins):\n";
foreach ($mergedMetadata['conflicts'] as $conflict) {
$output .= " - {$conflict['field']}:\n";
$output .= " Old ({$conflict['old_source']}): {$conflict['old_value']}\n";
$output .= " New ({$conflict['new_source']}): {$conflict['new_value']}\n";
}
}
return $output;
}
}