ferrero-naming-tool/backend/excel_parser.py

#!/usr/bin/env python3
"""
Parse Ferrero Naming Convention Excel file and extract rules
"""
import pandas as pd
import json
import sys

def parse_excel(excel_path):
    """Parse the Filenaming.xlsx and extract naming convention data"""

    # Initialize data structure
    data = {
        "naming_structure": {
            "BRAND_CODE": {"min_length": 2, "max_length": 5, "description": "Brand Code"},
            "COUNTRY_CODE": {"min_length": 2, "max_length": 2, "description": "Country Code"},
            "SUBJECT_TITLE": {"min_length": 1, "max_length": 15, "description": "Subject Title"},
            "ASSET_TYPE": {"min_length": 3, "max_length": 3, "description": "Asset Type"},
            "SPOT_VERSION": {"min_length": 3, "max_length": 3, "description": "Spot Version (add MST if Master File)"},
            "SECONDS": {"min_length": 2, "max_length": 3, "description": "Duration in Seconds"},
            "ASPECT_RATIO": {"min_length": 4, "max_length": 4, "description": "Aspect Ratio"}
        },
        "brands": {},
        "countries": {},
        "asset_types": {},
        "aspect_ratios": [],
        "spot_versions": [],
        "examples": []
    }

    # Read the "List of Values" sheet
    print("Reading 'List of Values' sheet...")
    df = pd.read_excel(excel_path, sheet_name='List of Values')

    # Extract brands (columns 0 and 1)
    print("Extracting brands...")
    for idx, row in df.iterrows():
        if idx == 0:  # Skip header row
            continue
        brand_name = row['BRAND CODE']
        brand_code = row['Unnamed: 1']

        if pd.notna(brand_name) and pd.notna(brand_code):
            brand_name = str(brand_name).strip()
            brand_code = str(brand_code).strip()
            if brand_name and brand_code and brand_name != 'Brand':
                data['brands'][brand_code] = brand_name

    # Extract countries (columns 3 and 4)
    print("Extracting countries...")
    for idx, row in df.iterrows():
        if idx == 0:  # Skip header row
            continue
        country_name = row['COUNTRY CODE']
        country_code = row['Unnamed: 4']

        if pd.notna(country_name) and pd.notna(country_code):
            country_name = str(country_name).strip()
            country_code = str(country_code).strip()
            if country_name and country_code and country_name != 'Country':
                data['countries'][country_code] = country_name

    # Extract asset types (columns 6 and 8 - column 8 has the codes)
    print("Extracting asset types...")
    for idx, row in df.iterrows():
        if idx == 0:  # Skip header row
            continue
        asset_name = row['ASSET TYPE']
        asset_code = row['Unnamed: 8']  # Changed from 7 to 8

        if pd.notna(asset_name) and pd.notna(asset_code):
            asset_name = str(asset_name).strip()
            asset_code = str(asset_code).strip()
            if asset_name and asset_code and asset_name != 'Asset':
                data['asset_types'][asset_code] = asset_name

    # Extract spot versions (column 10)
    print("Extracting spot versions...")
    for idx, row in df.iterrows():
        if idx == 0:  # Skip header row
            continue
        spot_version = row['SPOT VERSION']

        if pd.notna(spot_version):
            spot_version = str(spot_version).strip()
            if spot_version and spot_version != 'Spot Version' and spot_version not in data['spot_versions']:
                data['spot_versions'].append(spot_version)

    # Extract aspect ratios (column 13)
    print("Extracting aspect ratios...")
    for idx, row in df.iterrows():
        if idx == 0:  # Skip header row
            continue
        aspect_ratio = row['ASPECT RATIO']

        if pd.notna(aspect_ratio):
            aspect_ratio = str(aspect_ratio).strip()
            if aspect_ratio and aspect_ratio != 'Code' and aspect_ratio not in data['aspect_ratios']:
                data['aspect_ratios'].append(aspect_ratio)

    # Add examples from the image
    data['examples'] = [
        {
            'brand': 'Raffaello',
            'filename': 'RAF_GL_ME MOMENT_OLV_6S_1x1',
            'breakdown': {
                'brand_code': 'RAF',
                'country_code': 'GL',
                'subject_title': 'ME MOMENT',
                'asset_type': 'OLV',
                'spot_version': '',
                'seconds': '6S',
                'aspect_ratio': '1x1'
            }
        },
        {
            'brand': 'Kinder Gran Sorpresa',
            'filename': 'KGS_IT_IN THE AIR_TVC_MST_20S_16x9',
            'breakdown': {
                'brand_code': 'KGS',
                'country_code': 'IT',
                'subject_title': 'IN THE AIR',
                'asset_type': 'TVC',
                'spot_version': 'MST',
                'seconds': '20S',
                'aspect_ratio': '16x9'
            }
        },
        {
            'brand': 'Bueno Ice Cream',
            'filename': 'BIC_GL_HOW BUENO IT IS_OLV_6S_1x1',
            'breakdown': {
                'brand_code': 'BIC',
                'country_code': 'GL',
                'subject_title': 'HOW BUENO IT IS',
                'asset_type': 'OLV',
                'spot_version': '',
                'seconds': '6S',
                'aspect_ratio': '1x1'
            }
        }
    ]

    return data

if __name__ == '__main__':
    excel_path = '/Users/daveporter/Python-Enviroments/Ferrero-naming-convention/Filenaming.xlsx'

    try:
        data = parse_excel(excel_path)

        # Save to JSON
        output_path = '/Users/daveporter/Python-Enviroments/Ferrero-naming-convention/backend/data.json'
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)

        print(f"\n✓ Successfully parsed Excel file")
        print(f"✓ Generated data.json with:")
        print(f"  - {len(data['brands'])} brands")
        print(f"  - {len(data['countries'])} countries")
        print(f"  - {len(data['asset_types'])} asset types")
        print(f"  - {len(data['aspect_ratios'])} aspect ratios")
        print(f"  - {len(data['examples'])} examples")

    except Exception as e:
        print(f"Error parsing Excel file: {e}", file=sys.stderr)
        sys.exit(1)