#!/usr/bin/env python3
"""Convert All_Regions_Specs.xlsx → specs.json for the L'Oréal Spec Tool."""
import json, uuid, re
from datetime import date
import openpyxl

XLSX = "/Users/phildore/Desktop/All_Regions_Specs.xlsx"
OUT  = "/Users/phildore/Documents/CLAUDE_PROJECTS/loreal-spec-tool/specs.json"

def clean(v):
    if v is None:
        return ""
    s = str(v).strip()
    return "" if s.lower() in ("none", "n/a", "-", "nan") else s

def split_file_types(raw):
    if not raw:
        return []
    parts = re.split(r"[,/\s]+", raw.upper())
    known = {"JPG","JPEG","PNG","TIFF","TIF","PSD","WEBP","SVG","ZIP","PDF","GIF","MP4","MOV","AVI","TEXT","SPREADSHEET","VIDEO"}
    out = []
    for p in parts:
        p = p.strip(".()")
        if p in known:
            out.append(p)
    return list(dict.fromkeys(out))  # deduplicate, preserve order

wb = openpyxl.load_workbook(XLSX)
ws = wb["Retailer Specs"]

rows = list(ws.iter_rows(values_only=True))
# Row 1 is a title row; row 2 has the actual column headers
headers = [str(h).strip() if h else "" for h in rows[1]]
print("Columns:", headers)

specs = []
today = date.today().isoformat()

for row in rows[2:]:
    if not any(row):
        continue
    d = dict(zip(headers, row))

    file_types_raw = clean(d.get("FILE TYPE", ""))
    file_types = split_file_types(file_types_raw)

    spec = {
        "id":              str(uuid.uuid4()),
        "country":         clean(d.get("COUNTRY", "")),
        "retailer":        clean(d.get("RETAILER", "")),
        "contentGrouping": clean(d.get("ECOM CONTENT GROUPING", "")),
        "division":        clean(d.get("DIVISION", "")),
        "format":          clean(d.get("FORMAT", "")),
        "dimensions":      clean(d.get("ASSET DIMENSION", "")),
        "maxWeight":       clean(d.get("ASSET WEIGHT", "")),
        "fileTypes":       file_types,
        "fileTypesRaw":    file_types_raw,
        "maxAssets":       clean(d.get("MAX NUMBER OF ASSETS", "")),
        "guidelines":      clean(d.get("RETAILER ASSET GUIDELINES", "")),
        "deliveryMethod":  clean(d.get("DELIVERY METHOD FOR SYNDICATION", "")),
        "deliveryDetail":  clean(d.get("DETAILED", "")),
        "handledBy":       clean(d.get("HANDLED BY (WIP)", "")),
        "namingConvention":clean(d.get("FILE NAMING CONVENTION FOR RETAILER", "")),
        "uploadLink":      clean(d.get("LINKS FOR UPLOAD", "")),
        "imageExample":    clean(d.get("IMAGE EXAMPLE", "")),
        "notes":           "",
        "updatedAt":       today,
    }
    specs.append(spec)

output = {
    "specs": specs,
    "meta": {
        "version":     "1.0",
        "lastUpdated": today,
        "totalSpecs":  len(specs),
    }
}

with open(OUT, "w", encoding="utf-8") as f:
    json.dump(output, f, ensure_ascii=False, indent=2)

print(f"Done — {len(specs)} specs written to {OUT}")

# Print sample
if specs:
    print("\nSample spec:")
    print(json.dumps(specs[0], indent=2, ensure_ascii=False))