""" HTML Site Generator for H&M EMS Product Data. Generates a self-contained HTML page with embedded CSS/JS that displays product data grouped by Article ID, with a language selector to switch between translation variants and an export function for tracking edits. """ import os import json import shutil from datetime import datetime # Language display name mapping LANGUAGE_DISPLAY_NAMES = { "ar-jo": "Arabic (Jordan)", "ar-ma": "Arabic (Morocco)", "bg-bg": "Bulgarian (Bulgaria)", "bs-ba": "Bosnian (Bosnia and Herzegovina)", "ca-es": "Catalan (Spain)", "cs-cz": "Czech (Czech Republic)", "da-dk": "Danish (Denmark)", "de-at": "German (Austria)", "de-ch": "German (Switzerland)", "de-de": "German (Germany)", "el-gr": "Greek (Greece)", "en-au": "English (Australia)", "en-ca": "English (Canada)", "en-cn": "English (China)", "en-gb": "English (Great Britain)", "en-hk": "English (Hong Kong)", "en-id": "English (Indonesia)", "en-ie": "English (Ireland)", "en-in": "English (India)", "en-kh": "English (Cambodia)", "en-my": "English (Malaysia)", "en-nz": "English (New Zealand)", "en-ph": "English (Philippines)", "en-pr": "English (Puerto Rico)", "en-sg": "English (Singapore)", "en-th": "English (Thailand)", "en-tw": "English (Taiwan)", "en-us": "English (United States)", "en-za": "English (South Africa)", "es-cl": "Spanish (Chile)", "es-co": "Spanish (Colombia)", "es-cr": "Spanish (Costa Rica)", "es-ec": "Spanish (Ecuador)", "es-es": "Spanish (Spain)", "es-gt": "Spanish (Guatemala)", "es-mx": "Spanish (Mexico)", "es-pe": "Spanish (Peru)", "es-uy": "Spanish (Uruguay)", "et-ee": "Estonian (Estonia)", "fi-fi": "Finnish (Finland)", "fr-be": "French (Belgium)", "fr-ca": "French (Canada)", "fr-ch": "French (Switzerland)", "fr-fr": "French (France)", "fr-lu": "French (Luxembourg)", "he-il": "Hebrew (Israel)", "hr-hr": "Croatian (Croatia)", "hu-hu": "Hungarian (Hungary)", "is-is": "Icelandic (Iceland)", "it-ch": "Italian (Switzerland)", "it-it": "Italian (Italy)", "ja-jp": "Japanese (Japan)", "ka-ge": "Georgian (Georgia)", "ko-kr": "Korean (South Korea)", "lt-lt": "Lithuanian (Lithuania)", "lv-lv": "Latvian (Latvia)", "mk-mk": "Macedonian (North Macedonia)", "nb-no": "Norwegian (Norway)", "nl-be": "Dutch (Belgium)", "nl-nl": "Dutch (Netherlands)", "pl-pl": "Polish (Poland)", "pt-pt": "Portuguese (Portugal)", "ro-ro": "Romanian (Romania)", "sk-sk": "Slovak (Slovakia)", "sl-si": "Slovenian (Slovenia)", "sq-xk": "Albanian (Kosovo)", "sr-rs": "Serbian (Serbia)", "sv-se": "Swedish (Sweden)", "tr-tr": "Turkish (Turkey)", "uk-ua": "Ukrainian (Ukraine)", "vi-vn": "Vietnamese (Vietnam)", "zh-cn": "Chinese (China)", "zh-hk": "Chinese (Hong Kong)", "zh-tw": "Chinese (Taiwan)", } def _get_campaign_prefix(json_filename): """Extract campaign prefix from JSON filename (text before first underscore).""" name = os.path.splitext(os.path.basename(json_filename))[0] return name.split("_")[0] def _get_main_image_filename(filename_field): """ Extract the main campaign image filename from the Filename field. Returns the first .tif filename converted to .jpg, or None. """ if not filename_field: return None filenames = [f.strip() for f in filename_field.split(",")] for fn in filenames: if fn.lower().endswith(".tif"): return os.path.splitext(fn)[0] + ".jpg" return None def _get_all_image_filenames(filename_field): """ Extract all campaign image filenames from the Filename field. Returns list of .tif filenames converted to .jpg. """ if not filename_field: return [] filenames = [f.strip() for f in filename_field.split(",")] results = [] for fn in filenames: if fn.lower().endswith(".tif"): results.append(os.path.splitext(fn)[0] + ".jpg") return results def _copy_campaign_images(json_data, output_dir, image_source_dir): """ Copy the first campaign image per unique Article id to output_dir/images/. Returns a dict mapping Article id -> relative image path (or None). """ images_dir = os.path.join(output_dir, "images") os.makedirs(images_dir, exist_ok=True) # Collect unique article ids and their image filenames article_images = {} for record in json_data: article_id = record.get("Article id", "") if article_id in article_images: continue img_name = _get_main_image_filename(record.get("Filename", "")) article_images[article_id] = img_name # Copy images image_map = {} for article_id, img_name in article_images.items(): if not img_name: image_map[article_id] = None continue src = os.path.join(image_source_dir, img_name) if os.path.isfile(src): dst = os.path.join(images_dir, img_name) if not os.path.isfile(dst): shutil.copy2(src, dst) image_map[article_id] = f"images/{img_name}" else: image_map[article_id] = None return image_map def generate_html_site(json_data, output_dir, json_filename, image_base_path): """ Generate an HTML site from processed JSON data. Args: json_data: List of dicts (the processed JSON records) output_dir: Path to write the HTML site json_filename: Original JSON filename (for campaign prefix) image_base_path: Base path for campaign images """ os.makedirs(output_dir, exist_ok=True) campaign_prefix = _get_campaign_prefix(json_filename) image_source_dir = os.path.join( image_base_path, campaign_prefix, "Automation_LR" ) # Copy images and get mapping image_map = _copy_campaign_images(json_data, output_dir, image_source_dir) # Derive campaign and season from first record campaign_name = json_data[0].get("Campaign", campaign_prefix) if json_data else campaign_prefix season = json_data[0].get("Season", "") if json_data else "" # Collect all languages (lowercased for consistent keying) languages = sorted( set(r.get("Language", "").lower() for r in json_data if r.get("Language")), ) # Build the language display names JSON for JS lang_display = {} for lang in languages: lang_display[lang] = LANGUAGE_DISPLAY_NAMES.get(lang, lang) # Serialise data for JS embedding js_data = json.dumps(json_data, ensure_ascii=False) js_image_map = json.dumps(image_map, ensure_ascii=False) js_lang_display = json.dumps(lang_display, ensure_ascii=False) generation_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") html = _build_html( campaign_name=campaign_name, season=season, json_filename=json_filename, js_data=js_data, js_image_map=js_image_map, js_lang_display=js_lang_display, languages=languages, lang_display=lang_display, generation_time=generation_time, ) output_path = os.path.join(output_dir, "index.html") with open(output_path, "w", encoding="utf-8") as f: f.write(html) print(f"HTML site generated: {output_path}") print(f" Campaign: {campaign_name} | Season: {season}") print(f" Languages: {len(languages)} | Records: {len(json_data)}") copied_count = sum(1 for v in image_map.values() if v is not None) print(f" Images copied: {copied_count}/{len(image_map)}") return output_path def _build_html( campaign_name, season, json_filename, js_data, js_image_map, js_lang_display, languages, lang_display, generation_time, ): """Build the complete self-contained HTML string.""" # Build language option tags lang_options = "" for lang in languages: if lang == "en-gb": continue # en-gb is always shown as the master column display = lang_display.get(lang, lang) lang_options += f' \n' # Pick a sensible default target language (first non-en-gb, or de-de if available) default_target = "" if "de-de" in languages: default_target = "de-de" elif "fr-fr" in languages: default_target = "fr-fr" else: for l in languages: if l != "en-gb": default_target = l break html = f""" H&M EMS - {campaign_name} | {season}
H&M
Campaign {campaign_name} Season {season}
Source: {os.path.basename(json_filename)}
Generated: {generation_time}
0 products · {len(languages)} languages · 0 edits · 0 approved
""" return html # ========== CLI entry point ========== if __name__ == "__main__": import sys if len(sys.argv) < 2: print("Usage: python html_generator.py [output_dir] [image_base_path]") print() print("Arguments:") print(" json_file Path to the processed JSON file") print(" output_dir Output directory (default: ./html_output/)") print(" image_base_path Base path for campaign images (default: standard Box path)") sys.exit(1) json_file = sys.argv[1] # Default image base path default_image_base = ( "/Users/pauljohns/Library/CloudStorage/Box-Box/" "H&M - Global Team/H_M_GLOBAL_TEAM/HM/COMPANY_ASSETS/" "CAMPAIGN_IMAGES/2025" ) # Determine output directory if len(sys.argv) >= 3: out_dir = sys.argv[2] else: prefix = _get_campaign_prefix(json_file) out_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "html_output", prefix) # Determine image base path img_base = sys.argv[3] if len(sys.argv) >= 4 else default_image_base # Load JSON with open(json_file, "r", encoding="utf-8") as f: data = json.load(f) print(f"Loaded {len(data)} records from {os.path.basename(json_file)}") generate_html_site(data, out_dir, json_file, img_base)