Core Components Implemented:
- FilenameParser: V2 naming convention parser with strict validation
- MetadataMerger: Merge master + filename metadata (filename priority)
- BoxFileRetriever: List/download files from Box folders
- DAM Lookup Domains: Complete documentation (182 domains)
Features:
- Parse V2 filenames: OMG_JOB_BRAND_COUNTRY_LANG_TITLE_TYPE_VER_SEC_RATIO_TRACKING
- Strip upload components (Job Number & Tracking ID)
- Extract tracking IDs and load master metadata from PostgreSQL
- Merge metadata with filename always winning
- Identify editable vs locked fields
- Build proper asset representation for DAM upload
Files Added:
- src/FilenameParser.php (tested - 8/8 passing)
- src/MetadataMerger.php
- src/BoxFileRetriever.php
- ECOMMERCE_ALLOWED_FIELDS.md (182 lookup domains)
- DAM_LOOKUPDOMAINS_RAW.json (15MB raw data)
- test_filename_parser.php
- fetch_lookupdomains.php
- UPLOAD_FROM_BOX_STATUS.md (complete documentation)
Next Phase: UI integration - Add "Upload from Box" tab to workflow_v3.php
🤖 Generated with Claude Code
384 lines
14 KiB
PHP
384 lines
14 KiB
PHP
<?php
|
|
|
|
/**
|
|
* FilenameParser - V2 Naming Convention Parser
|
|
*
|
|
* Parses filenames according to Ferrero V2 naming convention:
|
|
* [OMG_JOB_NUMBER]_[BRAND_CODE]_[COUNTRY_CODE]_[LANGUAGE_CODE]_[SUBJECT_TITLE]_[ASSET_TYPE]_[SPOT_VERSION]_[SECONDS]S_[ASPECT_RATIO]_[TRACKING_ID]
|
|
*
|
|
* Example: 1234567_RAF_CH_de_TEST_FILE_OLV_001_15S_16x9_a7K9mP.mp4
|
|
*
|
|
* On upload to DAM, the OMG Job Number and Tracking ID are stripped:
|
|
* Final: RAF_CH_de_TEST_FILE_OLV_001_15S_16x9.mp4
|
|
*/
|
|
class FilenameParser
|
|
{
|
|
private $validationErrors = [];
|
|
private $warnings = [];
|
|
|
|
/**
|
|
* Parse a filename according to V2 naming convention
|
|
*
|
|
* @param string $filename The filename to parse (with or without extension)
|
|
* @return array Parsed components or null if invalid
|
|
*/
|
|
public function parseFilename($filename)
|
|
{
|
|
$this->validationErrors = [];
|
|
$this->warnings = [];
|
|
|
|
// Remove extension
|
|
$pathInfo = pathinfo($filename);
|
|
$filenameWithoutExt = $pathInfo['filename'];
|
|
$extension = isset($pathInfo['extension']) ? '.' . $pathInfo['extension'] : '';
|
|
|
|
// Split by underscore
|
|
$parts = explode('_', $filenameWithoutExt);
|
|
|
|
// V2 naming convention has minimum 10 parts (with tracking ID)
|
|
// Without tracking ID: 9 parts minimum
|
|
// Allow fewer parts for better error messages, but still validate
|
|
if (count($parts) < 8) {
|
|
$this->validationErrors[] = "Invalid filename structure. Expected minimum 9 parts, got " . count($parts);
|
|
// Continue parsing to provide detailed error messages
|
|
}
|
|
|
|
// Parse components
|
|
$parsed = [
|
|
'original_filename' => $filename,
|
|
'filename_without_ext' => $filenameWithoutExt,
|
|
'extension' => $extension,
|
|
'omg_job_number' => null,
|
|
'brand_code' => null,
|
|
'country_code' => null,
|
|
'language_code' => null,
|
|
'subject_title' => null,
|
|
'asset_type' => null,
|
|
'spot_version' => null,
|
|
'has_master' => false,
|
|
'seconds' => null,
|
|
'aspect_ratio' => null,
|
|
'tracking_id' => null,
|
|
'validation_errors' => [],
|
|
'warnings' => []
|
|
];
|
|
|
|
$index = 0;
|
|
|
|
// 1. OMG Job Number (must be all digits, max 10 digits)
|
|
if (isset($parts[$index]) && ctype_digit($parts[$index])) {
|
|
$omgJobNumber = $parts[$index];
|
|
if (strlen($omgJobNumber) > 10) {
|
|
$this->validationErrors[] = "OMG Job Number too long: $omgJobNumber (max 10 digits)";
|
|
} else {
|
|
$parsed['omg_job_number'] = $omgJobNumber;
|
|
}
|
|
$index++;
|
|
} else {
|
|
// Try to proceed without OMG Job Number (allow parsing but mark as invalid)
|
|
if (isset($parts[$index]) && !ctype_digit($parts[$index])) {
|
|
$this->validationErrors[] = "OMG Job Number missing or invalid (must be numbers only). Found: {$parts[$index]}";
|
|
// Don't increment index - treat current part as brand code
|
|
} else {
|
|
$this->validationErrors[] = "OMG Job Number missing";
|
|
}
|
|
}
|
|
|
|
// 2. Brand Code (2-5 characters, uppercase)
|
|
if (isset($parts[$index])) {
|
|
$brandCode = $parts[$index];
|
|
if (strlen($brandCode) >= 2 && strlen($brandCode) <= 5) {
|
|
$parsed['brand_code'] = strtoupper($brandCode);
|
|
} else {
|
|
$this->validationErrors[] = "Brand Code invalid: $brandCode (must be 2-5 characters)";
|
|
}
|
|
$index++;
|
|
} else {
|
|
$this->validationErrors[] = "Brand Code missing";
|
|
}
|
|
|
|
// 3. Country Code (2 characters, uppercase)
|
|
if (isset($parts[$index])) {
|
|
$countryCode = $parts[$index];
|
|
if (strlen($countryCode) === 2) {
|
|
$parsed['country_code'] = strtoupper($countryCode);
|
|
} else {
|
|
$this->validationErrors[] = "Country Code invalid: $countryCode (must be 2 characters)";
|
|
}
|
|
$index++;
|
|
} else {
|
|
$this->validationErrors[] = "Country Code missing";
|
|
}
|
|
|
|
// 4. Language Code (2-3 characters, lowercase)
|
|
if (isset($parts[$index])) {
|
|
$languageCode = $parts[$index];
|
|
if (strlen($languageCode) >= 2 && strlen($languageCode) <= 3) {
|
|
$parsed['language_code'] = strtolower($languageCode);
|
|
} else {
|
|
$this->validationErrors[] = "Language Code invalid: $languageCode (must be 2-3 characters)";
|
|
}
|
|
$index++;
|
|
} else {
|
|
$this->validationErrors[] = "Language Code missing";
|
|
}
|
|
|
|
// 5. Subject Title (can be multiple parts until we hit a 3-char asset type)
|
|
// Asset type is always 3 characters, so we need to find it
|
|
$subjectTitleParts = [];
|
|
$foundAssetType = false;
|
|
$assetTypeIndex = $index;
|
|
|
|
// Look ahead to find asset type (3 characters uppercase)
|
|
for ($i = $index; $i < count($parts); $i++) {
|
|
if (strlen($parts[$i]) === 3 && ctype_alpha($parts[$i]) && ctype_upper($parts[$i])) {
|
|
// Potential asset type - check if it's followed by spot version pattern
|
|
if (isset($parts[$i + 1]) && (strlen($parts[$i + 1]) === 3 || $parts[$i + 1] === 'MST')) {
|
|
// This is likely the asset type
|
|
$assetTypeIndex = $i;
|
|
$foundAssetType = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($foundAssetType) {
|
|
// Everything between language code and asset type is subject title
|
|
for ($i = $index; $i < $assetTypeIndex; $i++) {
|
|
$subjectTitleParts[] = $parts[$i];
|
|
}
|
|
$parsed['subject_title'] = implode('_', $subjectTitleParts);
|
|
|
|
if (strlen($parsed['subject_title']) > 15) {
|
|
$this->warnings[] = "Subject Title exceeds 15 characters: {$parsed['subject_title']}";
|
|
}
|
|
|
|
// Move index to asset type
|
|
$index = $assetTypeIndex;
|
|
} else {
|
|
$this->validationErrors[] = "Could not locate Asset Type (must be 3 uppercase letters)";
|
|
// Assume next part is subject title
|
|
if (isset($parts[$index])) {
|
|
$parsed['subject_title'] = $parts[$index];
|
|
$index++;
|
|
}
|
|
}
|
|
|
|
// 6. Asset Type (3 characters, uppercase)
|
|
if (isset($parts[$index]) && strlen($parts[$index]) === 3) {
|
|
$parsed['asset_type'] = strtoupper($parts[$index]);
|
|
$index++;
|
|
} else {
|
|
$this->validationErrors[] = "Asset Type missing or invalid (must be 3 characters)";
|
|
}
|
|
|
|
// 7. Spot Version (3 characters or 'MST' for master)
|
|
if (isset($parts[$index])) {
|
|
$spotVersion = $parts[$index];
|
|
if ($spotVersion === 'MST' || strtoupper($spotVersion) === 'MST') {
|
|
$parsed['has_master'] = true;
|
|
$parsed['spot_version'] = 'MST';
|
|
} else if (strlen($spotVersion) === 3) {
|
|
$parsed['spot_version'] = strtoupper($spotVersion);
|
|
// Check if it contains MST
|
|
if (strpos(strtoupper($spotVersion), 'MST') !== false) {
|
|
$parsed['has_master'] = true;
|
|
}
|
|
} else {
|
|
$this->validationErrors[] = "Spot Version invalid: $spotVersion (must be 3 characters)";
|
|
$parsed['spot_version'] = $spotVersion;
|
|
}
|
|
$index++;
|
|
} else {
|
|
$this->validationErrors[] = "Spot Version missing";
|
|
}
|
|
|
|
// 8. Duration (Seconds) - format: 15S or 6S
|
|
if (isset($parts[$index])) {
|
|
$durationPart = $parts[$index];
|
|
if (preg_match('/^(\d+)S$/i', $durationPart, $matches)) {
|
|
$parsed['seconds'] = $matches[1];
|
|
} else {
|
|
$this->validationErrors[] = "Duration invalid: $durationPart (must be format: 15S)";
|
|
}
|
|
$index++;
|
|
} else {
|
|
$this->validationErrors[] = "Duration missing";
|
|
}
|
|
|
|
// 9. Aspect Ratio (3-4 characters) - format: 16x9, 4x3, 1x1
|
|
if (isset($parts[$index])) {
|
|
$aspectRatio = $parts[$index];
|
|
if (preg_match('/^\d+x\d+$/i', $aspectRatio)) {
|
|
$parsed['aspect_ratio'] = $aspectRatio;
|
|
} else {
|
|
$this->validationErrors[] = "Aspect Ratio invalid: $aspectRatio (must be format: 16x9)";
|
|
$parsed['aspect_ratio'] = $aspectRatio;
|
|
}
|
|
$index++;
|
|
} else {
|
|
$this->validationErrors[] = "Aspect Ratio missing";
|
|
}
|
|
|
|
// 10. Tracking ID (optional, 6 alphanumeric characters)
|
|
if (isset($parts[$index])) {
|
|
$trackingId = $parts[$index];
|
|
if (strlen($trackingId) === 6 && ctype_alnum($trackingId)) {
|
|
$parsed['tracking_id'] = $trackingId;
|
|
} else {
|
|
$this->warnings[] = "Tracking ID format invalid: $trackingId (should be 6 alphanumeric characters)";
|
|
$parsed['tracking_id'] = $trackingId;
|
|
}
|
|
$index++;
|
|
}
|
|
|
|
// Check for extra parts
|
|
if ($index < count($parts)) {
|
|
$extraParts = array_slice($parts, $index);
|
|
$this->warnings[] = "Extra parts in filename: " . implode('_', $extraParts);
|
|
}
|
|
|
|
// Add validation results
|
|
$parsed['validation_errors'] = $this->validationErrors;
|
|
$parsed['warnings'] = $this->warnings;
|
|
$parsed['is_valid'] = empty($this->validationErrors);
|
|
|
|
return $parsed;
|
|
}
|
|
|
|
/**
|
|
* Validate filename structure strictly
|
|
*
|
|
* @param string $filename
|
|
* @return bool
|
|
*/
|
|
public function validateStructure($filename)
|
|
{
|
|
$parsed = $this->parseFilename($filename);
|
|
return $parsed && $parsed['is_valid'];
|
|
}
|
|
|
|
/**
|
|
* Strip upload components (OMG Job Number and Tracking ID)
|
|
*
|
|
* @param string $filename
|
|
* @return string|null Clean filename for upload, or null if parsing failed
|
|
*/
|
|
public function stripUploadComponents($filename)
|
|
{
|
|
$parsed = $this->parseFilename($filename);
|
|
|
|
if (!$parsed) {
|
|
return null;
|
|
}
|
|
|
|
// Build clean filename without OMG Job Number and Tracking ID
|
|
$cleanParts = [];
|
|
|
|
if ($parsed['brand_code']) $cleanParts[] = $parsed['brand_code'];
|
|
if ($parsed['country_code']) $cleanParts[] = $parsed['country_code'];
|
|
if ($parsed['language_code']) $cleanParts[] = $parsed['language_code'];
|
|
if ($parsed['subject_title']) $cleanParts[] = $parsed['subject_title'];
|
|
if ($parsed['asset_type']) $cleanParts[] = $parsed['asset_type'];
|
|
if ($parsed['spot_version']) $cleanParts[] = $parsed['spot_version'];
|
|
if ($parsed['seconds']) $cleanParts[] = $parsed['seconds'] . 'S';
|
|
if ($parsed['aspect_ratio']) $cleanParts[] = $parsed['aspect_ratio'];
|
|
|
|
$cleanFilename = implode('_', $cleanParts);
|
|
|
|
// Add extension back
|
|
if ($parsed['extension']) {
|
|
$cleanFilename .= $parsed['extension'];
|
|
}
|
|
|
|
return $cleanFilename;
|
|
}
|
|
|
|
/**
|
|
* Get clean filename for upload (same as stripUploadComponents)
|
|
*
|
|
* @param string $filename
|
|
* @return string|null
|
|
*/
|
|
public function getCleanFilename($filename)
|
|
{
|
|
return $this->stripUploadComponents($filename);
|
|
}
|
|
|
|
/**
|
|
* Extract tracking ID from filename
|
|
*
|
|
* @param string $filename
|
|
* @return string|null Tracking ID or null if not found
|
|
*/
|
|
public function extractTrackingId($filename)
|
|
{
|
|
$parsed = $this->parseFilename($filename);
|
|
return $parsed ? $parsed['tracking_id'] : null;
|
|
}
|
|
|
|
/**
|
|
* Get validation errors
|
|
*
|
|
* @return array
|
|
*/
|
|
public function getValidationErrors()
|
|
{
|
|
return $this->validationErrors;
|
|
}
|
|
|
|
/**
|
|
* Get warnings
|
|
*
|
|
* @return array
|
|
*/
|
|
public function getWarnings()
|
|
{
|
|
return $this->warnings;
|
|
}
|
|
|
|
/**
|
|
* Format parsed data for display
|
|
*
|
|
* @param array $parsed
|
|
* @return string
|
|
*/
|
|
public function formatForDisplay($parsed)
|
|
{
|
|
if (!$parsed) {
|
|
return "Invalid filename";
|
|
}
|
|
|
|
$output = "Filename: {$parsed['original_filename']}\n";
|
|
$output .= "Valid: " . ($parsed['is_valid'] ? 'YES' : 'NO') . "\n\n";
|
|
|
|
$output .= "Components:\n";
|
|
$output .= " OMG Job Number: " . ($parsed['omg_job_number'] ?? 'N/A') . "\n";
|
|
$output .= " Brand Code: " . ($parsed['brand_code'] ?? 'N/A') . "\n";
|
|
$output .= " Country Code: " . ($parsed['country_code'] ?? 'N/A') . "\n";
|
|
$output .= " Language Code: " . ($parsed['language_code'] ?? 'N/A') . "\n";
|
|
$output .= " Subject Title: " . ($parsed['subject_title'] ?? 'N/A') . "\n";
|
|
$output .= " Asset Type: " . ($parsed['asset_type'] ?? 'N/A') . "\n";
|
|
$output .= " Spot Version: " . ($parsed['spot_version'] ?? 'N/A') . "\n";
|
|
$output .= " Has Master: " . ($parsed['has_master'] ? 'YES' : 'NO') . "\n";
|
|
$output .= " Duration: " . ($parsed['seconds'] ?? 'N/A') . " seconds\n";
|
|
$output .= " Aspect Ratio: " . ($parsed['aspect_ratio'] ?? 'N/A') . "\n";
|
|
$output .= " Tracking ID: " . ($parsed['tracking_id'] ?? 'N/A') . "\n";
|
|
|
|
if (!empty($parsed['validation_errors'])) {
|
|
$output .= "\nValidation Errors:\n";
|
|
foreach ($parsed['validation_errors'] as $error) {
|
|
$output .= " - $error\n";
|
|
}
|
|
}
|
|
|
|
if (!empty($parsed['warnings'])) {
|
|
$output .= "\nWarnings:\n";
|
|
foreach ($parsed['warnings'] as $warning) {
|
|
$output .= " - $warning\n";
|
|
}
|
|
}
|
|
|
|
return $output;
|
|
}
|
|
}
|