ferrero-naming-tool/backend/excel_parser.py
DJP 82eff7b76a Initial commit: Ferrero Communication Assets Naming Convention Tool
- PHP-based naming convention tool for Ferrero communication assets
- Filename builder with validation
- Filename decoder with component breakdown
- Complete help documentation
- Ferrero brand styling with Montserrat font
- OMG Job Number field (numbers only, max 10 chars)
- 105 brands, 29 countries, 39 asset types from Excel
- Responsive landscape layout
- Data management via JSON (parsed from Excel)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-14 16:05:16 -04:00

167 lines
6.1 KiB
Python

#!/usr/bin/env python3
"""
Parse Ferrero Naming Convention Excel file and extract rules
"""
import pandas as pd
import json
import sys
def parse_excel(excel_path):
"""Parse the Filenaming.xlsx and extract naming convention data"""
# Initialize data structure
data = {
"naming_structure": {
"BRAND_CODE": {"min_length": 2, "max_length": 5, "description": "Brand Code"},
"COUNTRY_CODE": {"min_length": 2, "max_length": 2, "description": "Country Code"},
"SUBJECT_TITLE": {"min_length": 1, "max_length": 15, "description": "Subject Title"},
"ASSET_TYPE": {"min_length": 3, "max_length": 3, "description": "Asset Type"},
"SPOT_VERSION": {"min_length": 3, "max_length": 3, "description": "Spot Version (add MST if Master File)"},
"SECONDS": {"min_length": 2, "max_length": 3, "description": "Duration in Seconds"},
"ASPECT_RATIO": {"min_length": 4, "max_length": 4, "description": "Aspect Ratio"}
},
"brands": {},
"countries": {},
"asset_types": {},
"aspect_ratios": [],
"spot_versions": [],
"examples": []
}
# Read the "List of Values" sheet
print("Reading 'List of Values' sheet...")
df = pd.read_excel(excel_path, sheet_name='List of Values')
# Extract brands (columns 0 and 1)
print("Extracting brands...")
for idx, row in df.iterrows():
if idx == 0: # Skip header row
continue
brand_name = row['BRAND CODE']
brand_code = row['Unnamed: 1']
if pd.notna(brand_name) and pd.notna(brand_code):
brand_name = str(brand_name).strip()
brand_code = str(brand_code).strip()
if brand_name and brand_code and brand_name != 'Brand':
data['brands'][brand_code] = brand_name
# Extract countries (columns 3 and 4)
print("Extracting countries...")
for idx, row in df.iterrows():
if idx == 0: # Skip header row
continue
country_name = row['COUNTRY CODE']
country_code = row['Unnamed: 4']
if pd.notna(country_name) and pd.notna(country_code):
country_name = str(country_name).strip()
country_code = str(country_code).strip()
if country_name and country_code and country_name != 'Country':
data['countries'][country_code] = country_name
# Extract asset types (columns 6 and 8 - column 8 has the codes)
print("Extracting asset types...")
for idx, row in df.iterrows():
if idx == 0: # Skip header row
continue
asset_name = row['ASSET TYPE']
asset_code = row['Unnamed: 8'] # Changed from 7 to 8
if pd.notna(asset_name) and pd.notna(asset_code):
asset_name = str(asset_name).strip()
asset_code = str(asset_code).strip()
if asset_name and asset_code and asset_name != 'Asset':
data['asset_types'][asset_code] = asset_name
# Extract spot versions (column 10)
print("Extracting spot versions...")
for idx, row in df.iterrows():
if idx == 0: # Skip header row
continue
spot_version = row['SPOT VERSION']
if pd.notna(spot_version):
spot_version = str(spot_version).strip()
if spot_version and spot_version != 'Spot Version' and spot_version not in data['spot_versions']:
data['spot_versions'].append(spot_version)
# Extract aspect ratios (column 13)
print("Extracting aspect ratios...")
for idx, row in df.iterrows():
if idx == 0: # Skip header row
continue
aspect_ratio = row['ASPECT RATIO']
if pd.notna(aspect_ratio):
aspect_ratio = str(aspect_ratio).strip()
if aspect_ratio and aspect_ratio != 'Code' and aspect_ratio not in data['aspect_ratios']:
data['aspect_ratios'].append(aspect_ratio)
# Add examples from the image
data['examples'] = [
{
'brand': 'Raffaello',
'filename': 'RAF_GL_ME MOMENT_OLV_6S_1x1',
'breakdown': {
'brand_code': 'RAF',
'country_code': 'GL',
'subject_title': 'ME MOMENT',
'asset_type': 'OLV',
'spot_version': '',
'seconds': '6S',
'aspect_ratio': '1x1'
}
},
{
'brand': 'Kinder Gran Sorpresa',
'filename': 'KGS_IT_IN THE AIR_TVC_MST_20S_16x9',
'breakdown': {
'brand_code': 'KGS',
'country_code': 'IT',
'subject_title': 'IN THE AIR',
'asset_type': 'TVC',
'spot_version': 'MST',
'seconds': '20S',
'aspect_ratio': '16x9'
}
},
{
'brand': 'Bueno Ice Cream',
'filename': 'BIC_GL_HOW BUENO IT IS_OLV_6S_1x1',
'breakdown': {
'brand_code': 'BIC',
'country_code': 'GL',
'subject_title': 'HOW BUENO IT IS',
'asset_type': 'OLV',
'spot_version': '',
'seconds': '6S',
'aspect_ratio': '1x1'
}
}
]
return data
if __name__ == '__main__':
excel_path = '/Users/daveporter/Python-Enviroments/Ferrero-naming-convention/Filenaming.xlsx'
try:
data = parse_excel(excel_path)
# Save to JSON
output_path = '/Users/daveporter/Python-Enviroments/Ferrero-naming-convention/backend/data.json'
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"\n✓ Successfully parsed Excel file")
print(f"✓ Generated data.json with:")
print(f" - {len(data['brands'])} brands")
print(f" - {len(data['countries'])} countries")
print(f" - {len(data['asset_types'])} asset types")
print(f" - {len(data['aspect_ratios'])} aspect ratios")
print(f" - {len(data['examples'])} examples")
except Exception as e:
print(f"Error parsing Excel file: {e}", file=sys.stderr)
sys.exit(1)