#!/usr/bin/env python3 """ Parse Ferrero Naming Convention Excel file and extract rules """ import pandas as pd import json import sys def parse_excel(excel_path): """Parse the Filenaming.xlsx and extract naming convention data""" # Initialize data structure data = { "naming_structure": { "BRAND_CODE": {"min_length": 2, "max_length": 5, "description": "Brand Code"}, "COUNTRY_CODE": {"min_length": 2, "max_length": 2, "description": "Country Code"}, "SUBJECT_TITLE": {"min_length": 1, "max_length": 15, "description": "Subject Title"}, "ASSET_TYPE": {"min_length": 3, "max_length": 3, "description": "Asset Type"}, "SPOT_VERSION": {"min_length": 3, "max_length": 3, "description": "Spot Version (add MST if Master File)"}, "SECONDS": {"min_length": 2, "max_length": 3, "description": "Duration in Seconds"}, "ASPECT_RATIO": {"min_length": 4, "max_length": 4, "description": "Aspect Ratio"} }, "brands": {}, "countries": {}, "asset_types": {}, "aspect_ratios": [], "spot_versions": [], "examples": [] } # Read the "List of Values" sheet print("Reading 'List of Values' sheet...") df = pd.read_excel(excel_path, sheet_name='List of Values') # Extract brands (columns 0 and 1) print("Extracting brands...") for idx, row in df.iterrows(): if idx == 0: # Skip header row continue brand_name = row['BRAND CODE'] brand_code = row['Unnamed: 1'] if pd.notna(brand_name) and pd.notna(brand_code): brand_name = str(brand_name).strip() brand_code = str(brand_code).strip() if brand_name and brand_code and brand_name != 'Brand': data['brands'][brand_code] = brand_name # Extract countries (columns 3 and 4) print("Extracting countries...") for idx, row in df.iterrows(): if idx == 0: # Skip header row continue country_name = row['COUNTRY CODE'] country_code = row['Unnamed: 4'] if pd.notna(country_name) and pd.notna(country_code): country_name = str(country_name).strip() country_code = str(country_code).strip() if country_name and country_code and country_name != 'Country': data['countries'][country_code] = country_name # Extract asset types (columns 6 and 8 - column 8 has the codes) print("Extracting asset types...") for idx, row in df.iterrows(): if idx == 0: # Skip header row continue asset_name = row['ASSET TYPE'] asset_code = row['Unnamed: 8'] # Changed from 7 to 8 if pd.notna(asset_name) and pd.notna(asset_code): asset_name = str(asset_name).strip() asset_code = str(asset_code).strip() if asset_name and asset_code and asset_name != 'Asset': data['asset_types'][asset_code] = asset_name # Extract spot versions (column 10) print("Extracting spot versions...") for idx, row in df.iterrows(): if idx == 0: # Skip header row continue spot_version = row['SPOT VERSION'] if pd.notna(spot_version): spot_version = str(spot_version).strip() if spot_version and spot_version != 'Spot Version' and spot_version not in data['spot_versions']: data['spot_versions'].append(spot_version) # Extract aspect ratios (column 13) print("Extracting aspect ratios...") for idx, row in df.iterrows(): if idx == 0: # Skip header row continue aspect_ratio = row['ASPECT RATIO'] if pd.notna(aspect_ratio): aspect_ratio = str(aspect_ratio).strip() if aspect_ratio and aspect_ratio != 'Code' and aspect_ratio not in data['aspect_ratios']: data['aspect_ratios'].append(aspect_ratio) # Add examples from the image data['examples'] = [ { 'brand': 'Raffaello', 'filename': 'RAF_GL_ME MOMENT_OLV_6S_1x1', 'breakdown': { 'brand_code': 'RAF', 'country_code': 'GL', 'subject_title': 'ME MOMENT', 'asset_type': 'OLV', 'spot_version': '', 'seconds': '6S', 'aspect_ratio': '1x1' } }, { 'brand': 'Kinder Gran Sorpresa', 'filename': 'KGS_IT_IN THE AIR_TVC_MST_20S_16x9', 'breakdown': { 'brand_code': 'KGS', 'country_code': 'IT', 'subject_title': 'IN THE AIR', 'asset_type': 'TVC', 'spot_version': 'MST', 'seconds': '20S', 'aspect_ratio': '16x9' } }, { 'brand': 'Bueno Ice Cream', 'filename': 'BIC_GL_HOW BUENO IT IS_OLV_6S_1x1', 'breakdown': { 'brand_code': 'BIC', 'country_code': 'GL', 'subject_title': 'HOW BUENO IT IS', 'asset_type': 'OLV', 'spot_version': '', 'seconds': '6S', 'aspect_ratio': '1x1' } } ] return data if __name__ == '__main__': excel_path = '/Users/daveporter/Python-Enviroments/Ferrero-naming-convention/Filenaming.xlsx' try: data = parse_excel(excel_path) # Save to JSON output_path = '/Users/daveporter/Python-Enviroments/Ferrero-naming-convention/backend/data.json' with open(output_path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) print(f"\nāœ“ Successfully parsed Excel file") print(f"āœ“ Generated data.json with:") print(f" - {len(data['brands'])} brands") print(f" - {len(data['countries'])} countries") print(f" - {len(data['asset_types'])} asset types") print(f" - {len(data['aspect_ratios'])} aspect ratios") print(f" - {len(data['examples'])} examples") except Exception as e: print(f"Error parsing Excel file: {e}", file=sys.stderr) sys.exit(1)