#!/usr/bin/env python3 """Convert All_Regions_Specs.xlsx → specs.json for the L'Oréal Spec Tool.""" import json, uuid, re from datetime import date import openpyxl XLSX = "/Users/phildore/Desktop/All_Regions_Specs.xlsx" OUT = "/Users/phildore/Documents/CLAUDE_PROJECTS/loreal-spec-tool/specs.json" def clean(v): if v is None: return "" s = str(v).strip() return "" if s.lower() in ("none", "n/a", "-", "nan") else s def split_file_types(raw): if not raw: return [] parts = re.split(r"[,/\s]+", raw.upper()) known = {"JPG","JPEG","PNG","TIFF","TIF","PSD","WEBP","SVG","ZIP","PDF","GIF","MP4","MOV","AVI","TEXT","SPREADSHEET","VIDEO"} out = [] for p in parts: p = p.strip(".()") if p in known: out.append(p) return list(dict.fromkeys(out)) # deduplicate, preserve order wb = openpyxl.load_workbook(XLSX) ws = wb["Retailer Specs"] rows = list(ws.iter_rows(values_only=True)) # Row 1 is a title row; row 2 has the actual column headers headers = [str(h).strip() if h else "" for h in rows[1]] print("Columns:", headers) specs = [] today = date.today().isoformat() for row in rows[2:]: if not any(row): continue d = dict(zip(headers, row)) file_types_raw = clean(d.get("FILE TYPE", "")) file_types = split_file_types(file_types_raw) spec = { "id": str(uuid.uuid4()), "country": clean(d.get("COUNTRY", "")), "retailer": clean(d.get("RETAILER", "")), "contentGrouping": clean(d.get("ECOM CONTENT GROUPING", "")), "division": clean(d.get("DIVISION", "")), "format": clean(d.get("FORMAT", "")), "dimensions": clean(d.get("ASSET DIMENSION", "")), "maxWeight": clean(d.get("ASSET WEIGHT", "")), "fileTypes": file_types, "fileTypesRaw": file_types_raw, "maxAssets": clean(d.get("MAX NUMBER OF ASSETS", "")), "guidelines": clean(d.get("RETAILER ASSET GUIDELINES", "")), "deliveryMethod": clean(d.get("DELIVERY METHOD FOR SYNDICATION", "")), "deliveryDetail": clean(d.get("DETAILED", "")), "handledBy": clean(d.get("HANDLED BY (WIP)", "")), "namingConvention":clean(d.get("FILE NAMING CONVENTION FOR RETAILER", "")), "uploadLink": clean(d.get("LINKS FOR UPLOAD", "")), "imageExample": clean(d.get("IMAGE EXAMPLE", "")), "notes": "", "updatedAt": today, } specs.append(spec) output = { "specs": specs, "meta": { "version": "1.0", "lastUpdated": today, "totalSpecs": len(specs), } } with open(OUT, "w", encoding="utf-8") as f: json.dump(output, f, ensure_ascii=False, indent=2) print(f"Done — {len(specs)} specs written to {OUT}") # Print sample if specs: print("\nSample spec:") print(json.dumps(specs[0], indent=2, ensure_ascii=False))