ferrero-opentext/src/FilenameParser.php
DJP 3a95076726 Add Upload from Box workflow - Phase 1 Complete
Core Components Implemented:
- FilenameParser: V2 naming convention parser with strict validation
- MetadataMerger: Merge master + filename metadata (filename priority)
- BoxFileRetriever: List/download files from Box folders
- DAM Lookup Domains: Complete documentation (182 domains)

Features:
- Parse V2 filenames: OMG_JOB_BRAND_COUNTRY_LANG_TITLE_TYPE_VER_SEC_RATIO_TRACKING
- Strip upload components (Job Number & Tracking ID)
- Extract tracking IDs and load master metadata from PostgreSQL
- Merge metadata with filename always winning
- Identify editable vs locked fields
- Build proper asset representation for DAM upload

Files Added:
- src/FilenameParser.php (tested - 8/8 passing)
- src/MetadataMerger.php
- src/BoxFileRetriever.php
- ECOMMERCE_ALLOWED_FIELDS.md (182 lookup domains)
- DAM_LOOKUPDOMAINS_RAW.json (15MB raw data)
- test_filename_parser.php
- fetch_lookupdomains.php
- UPLOAD_FROM_BOX_STATUS.md (complete documentation)

Next Phase: UI integration - Add "Upload from Box" tab to workflow_v3.php

🤖 Generated with Claude Code
2025-10-29 15:47:30 -04:00

384 lines
14 KiB
PHP

<?php
/**
* FilenameParser - V2 Naming Convention Parser
*
* Parses filenames according to Ferrero V2 naming convention:
* [OMG_JOB_NUMBER]_[BRAND_CODE]_[COUNTRY_CODE]_[LANGUAGE_CODE]_[SUBJECT_TITLE]_[ASSET_TYPE]_[SPOT_VERSION]_[SECONDS]S_[ASPECT_RATIO]_[TRACKING_ID]
*
* Example: 1234567_RAF_CH_de_TEST_FILE_OLV_001_15S_16x9_a7K9mP.mp4
*
* On upload to DAM, the OMG Job Number and Tracking ID are stripped:
* Final: RAF_CH_de_TEST_FILE_OLV_001_15S_16x9.mp4
*/
class FilenameParser
{
private $validationErrors = [];
private $warnings = [];
/**
* Parse a filename according to V2 naming convention
*
* @param string $filename The filename to parse (with or without extension)
* @return array Parsed components or null if invalid
*/
public function parseFilename($filename)
{
$this->validationErrors = [];
$this->warnings = [];
// Remove extension
$pathInfo = pathinfo($filename);
$filenameWithoutExt = $pathInfo['filename'];
$extension = isset($pathInfo['extension']) ? '.' . $pathInfo['extension'] : '';
// Split by underscore
$parts = explode('_', $filenameWithoutExt);
// V2 naming convention has minimum 10 parts (with tracking ID)
// Without tracking ID: 9 parts minimum
// Allow fewer parts for better error messages, but still validate
if (count($parts) < 8) {
$this->validationErrors[] = "Invalid filename structure. Expected minimum 9 parts, got " . count($parts);
// Continue parsing to provide detailed error messages
}
// Parse components
$parsed = [
'original_filename' => $filename,
'filename_without_ext' => $filenameWithoutExt,
'extension' => $extension,
'omg_job_number' => null,
'brand_code' => null,
'country_code' => null,
'language_code' => null,
'subject_title' => null,
'asset_type' => null,
'spot_version' => null,
'has_master' => false,
'seconds' => null,
'aspect_ratio' => null,
'tracking_id' => null,
'validation_errors' => [],
'warnings' => []
];
$index = 0;
// 1. OMG Job Number (must be all digits, max 10 digits)
if (isset($parts[$index]) && ctype_digit($parts[$index])) {
$omgJobNumber = $parts[$index];
if (strlen($omgJobNumber) > 10) {
$this->validationErrors[] = "OMG Job Number too long: $omgJobNumber (max 10 digits)";
} else {
$parsed['omg_job_number'] = $omgJobNumber;
}
$index++;
} else {
// Try to proceed without OMG Job Number (allow parsing but mark as invalid)
if (isset($parts[$index]) && !ctype_digit($parts[$index])) {
$this->validationErrors[] = "OMG Job Number missing or invalid (must be numbers only). Found: {$parts[$index]}";
// Don't increment index - treat current part as brand code
} else {
$this->validationErrors[] = "OMG Job Number missing";
}
}
// 2. Brand Code (2-5 characters, uppercase)
if (isset($parts[$index])) {
$brandCode = $parts[$index];
if (strlen($brandCode) >= 2 && strlen($brandCode) <= 5) {
$parsed['brand_code'] = strtoupper($brandCode);
} else {
$this->validationErrors[] = "Brand Code invalid: $brandCode (must be 2-5 characters)";
}
$index++;
} else {
$this->validationErrors[] = "Brand Code missing";
}
// 3. Country Code (2 characters, uppercase)
if (isset($parts[$index])) {
$countryCode = $parts[$index];
if (strlen($countryCode) === 2) {
$parsed['country_code'] = strtoupper($countryCode);
} else {
$this->validationErrors[] = "Country Code invalid: $countryCode (must be 2 characters)";
}
$index++;
} else {
$this->validationErrors[] = "Country Code missing";
}
// 4. Language Code (2-3 characters, lowercase)
if (isset($parts[$index])) {
$languageCode = $parts[$index];
if (strlen($languageCode) >= 2 && strlen($languageCode) <= 3) {
$parsed['language_code'] = strtolower($languageCode);
} else {
$this->validationErrors[] = "Language Code invalid: $languageCode (must be 2-3 characters)";
}
$index++;
} else {
$this->validationErrors[] = "Language Code missing";
}
// 5. Subject Title (can be multiple parts until we hit a 3-char asset type)
// Asset type is always 3 characters, so we need to find it
$subjectTitleParts = [];
$foundAssetType = false;
$assetTypeIndex = $index;
// Look ahead to find asset type (3 characters uppercase)
for ($i = $index; $i < count($parts); $i++) {
if (strlen($parts[$i]) === 3 && ctype_alpha($parts[$i]) && ctype_upper($parts[$i])) {
// Potential asset type - check if it's followed by spot version pattern
if (isset($parts[$i + 1]) && (strlen($parts[$i + 1]) === 3 || $parts[$i + 1] === 'MST')) {
// This is likely the asset type
$assetTypeIndex = $i;
$foundAssetType = true;
break;
}
}
}
if ($foundAssetType) {
// Everything between language code and asset type is subject title
for ($i = $index; $i < $assetTypeIndex; $i++) {
$subjectTitleParts[] = $parts[$i];
}
$parsed['subject_title'] = implode('_', $subjectTitleParts);
if (strlen($parsed['subject_title']) > 15) {
$this->warnings[] = "Subject Title exceeds 15 characters: {$parsed['subject_title']}";
}
// Move index to asset type
$index = $assetTypeIndex;
} else {
$this->validationErrors[] = "Could not locate Asset Type (must be 3 uppercase letters)";
// Assume next part is subject title
if (isset($parts[$index])) {
$parsed['subject_title'] = $parts[$index];
$index++;
}
}
// 6. Asset Type (3 characters, uppercase)
if (isset($parts[$index]) && strlen($parts[$index]) === 3) {
$parsed['asset_type'] = strtoupper($parts[$index]);
$index++;
} else {
$this->validationErrors[] = "Asset Type missing or invalid (must be 3 characters)";
}
// 7. Spot Version (3 characters or 'MST' for master)
if (isset($parts[$index])) {
$spotVersion = $parts[$index];
if ($spotVersion === 'MST' || strtoupper($spotVersion) === 'MST') {
$parsed['has_master'] = true;
$parsed['spot_version'] = 'MST';
} else if (strlen($spotVersion) === 3) {
$parsed['spot_version'] = strtoupper($spotVersion);
// Check if it contains MST
if (strpos(strtoupper($spotVersion), 'MST') !== false) {
$parsed['has_master'] = true;
}
} else {
$this->validationErrors[] = "Spot Version invalid: $spotVersion (must be 3 characters)";
$parsed['spot_version'] = $spotVersion;
}
$index++;
} else {
$this->validationErrors[] = "Spot Version missing";
}
// 8. Duration (Seconds) - format: 15S or 6S
if (isset($parts[$index])) {
$durationPart = $parts[$index];
if (preg_match('/^(\d+)S$/i', $durationPart, $matches)) {
$parsed['seconds'] = $matches[1];
} else {
$this->validationErrors[] = "Duration invalid: $durationPart (must be format: 15S)";
}
$index++;
} else {
$this->validationErrors[] = "Duration missing";
}
// 9. Aspect Ratio (3-4 characters) - format: 16x9, 4x3, 1x1
if (isset($parts[$index])) {
$aspectRatio = $parts[$index];
if (preg_match('/^\d+x\d+$/i', $aspectRatio)) {
$parsed['aspect_ratio'] = $aspectRatio;
} else {
$this->validationErrors[] = "Aspect Ratio invalid: $aspectRatio (must be format: 16x9)";
$parsed['aspect_ratio'] = $aspectRatio;
}
$index++;
} else {
$this->validationErrors[] = "Aspect Ratio missing";
}
// 10. Tracking ID (optional, 6 alphanumeric characters)
if (isset($parts[$index])) {
$trackingId = $parts[$index];
if (strlen($trackingId) === 6 && ctype_alnum($trackingId)) {
$parsed['tracking_id'] = $trackingId;
} else {
$this->warnings[] = "Tracking ID format invalid: $trackingId (should be 6 alphanumeric characters)";
$parsed['tracking_id'] = $trackingId;
}
$index++;
}
// Check for extra parts
if ($index < count($parts)) {
$extraParts = array_slice($parts, $index);
$this->warnings[] = "Extra parts in filename: " . implode('_', $extraParts);
}
// Add validation results
$parsed['validation_errors'] = $this->validationErrors;
$parsed['warnings'] = $this->warnings;
$parsed['is_valid'] = empty($this->validationErrors);
return $parsed;
}
/**
* Validate filename structure strictly
*
* @param string $filename
* @return bool
*/
public function validateStructure($filename)
{
$parsed = $this->parseFilename($filename);
return $parsed && $parsed['is_valid'];
}
/**
* Strip upload components (OMG Job Number and Tracking ID)
*
* @param string $filename
* @return string|null Clean filename for upload, or null if parsing failed
*/
public function stripUploadComponents($filename)
{
$parsed = $this->parseFilename($filename);
if (!$parsed) {
return null;
}
// Build clean filename without OMG Job Number and Tracking ID
$cleanParts = [];
if ($parsed['brand_code']) $cleanParts[] = $parsed['brand_code'];
if ($parsed['country_code']) $cleanParts[] = $parsed['country_code'];
if ($parsed['language_code']) $cleanParts[] = $parsed['language_code'];
if ($parsed['subject_title']) $cleanParts[] = $parsed['subject_title'];
if ($parsed['asset_type']) $cleanParts[] = $parsed['asset_type'];
if ($parsed['spot_version']) $cleanParts[] = $parsed['spot_version'];
if ($parsed['seconds']) $cleanParts[] = $parsed['seconds'] . 'S';
if ($parsed['aspect_ratio']) $cleanParts[] = $parsed['aspect_ratio'];
$cleanFilename = implode('_', $cleanParts);
// Add extension back
if ($parsed['extension']) {
$cleanFilename .= $parsed['extension'];
}
return $cleanFilename;
}
/**
* Get clean filename for upload (same as stripUploadComponents)
*
* @param string $filename
* @return string|null
*/
public function getCleanFilename($filename)
{
return $this->stripUploadComponents($filename);
}
/**
* Extract tracking ID from filename
*
* @param string $filename
* @return string|null Tracking ID or null if not found
*/
public function extractTrackingId($filename)
{
$parsed = $this->parseFilename($filename);
return $parsed ? $parsed['tracking_id'] : null;
}
/**
* Get validation errors
*
* @return array
*/
public function getValidationErrors()
{
return $this->validationErrors;
}
/**
* Get warnings
*
* @return array
*/
public function getWarnings()
{
return $this->warnings;
}
/**
* Format parsed data for display
*
* @param array $parsed
* @return string
*/
public function formatForDisplay($parsed)
{
if (!$parsed) {
return "Invalid filename";
}
$output = "Filename: {$parsed['original_filename']}\n";
$output .= "Valid: " . ($parsed['is_valid'] ? 'YES' : 'NO') . "\n\n";
$output .= "Components:\n";
$output .= " OMG Job Number: " . ($parsed['omg_job_number'] ?? 'N/A') . "\n";
$output .= " Brand Code: " . ($parsed['brand_code'] ?? 'N/A') . "\n";
$output .= " Country Code: " . ($parsed['country_code'] ?? 'N/A') . "\n";
$output .= " Language Code: " . ($parsed['language_code'] ?? 'N/A') . "\n";
$output .= " Subject Title: " . ($parsed['subject_title'] ?? 'N/A') . "\n";
$output .= " Asset Type: " . ($parsed['asset_type'] ?? 'N/A') . "\n";
$output .= " Spot Version: " . ($parsed['spot_version'] ?? 'N/A') . "\n";
$output .= " Has Master: " . ($parsed['has_master'] ? 'YES' : 'NO') . "\n";
$output .= " Duration: " . ($parsed['seconds'] ?? 'N/A') . " seconds\n";
$output .= " Aspect Ratio: " . ($parsed['aspect_ratio'] ?? 'N/A') . "\n";
$output .= " Tracking ID: " . ($parsed['tracking_id'] ?? 'N/A') . "\n";
if (!empty($parsed['validation_errors'])) {
$output .= "\nValidation Errors:\n";
foreach ($parsed['validation_errors'] as $error) {
$output .= " - $error\n";
}
}
if (!empty($parsed['warnings'])) {
$output .= "\nWarnings:\n";
foreach ($parsed['warnings'] as $warning) {
$output .= " - $warning\n";
}
}
return $output;
}
}