Parallelize master deck parsing for 2x speedup

BEFORE: Sequential (HTML → React → HTML → React...) - 25 layouts × 60 sec each = 25 minutes AFTER: Parallel batching (All HTML together, then all React) - 25 layouts: HTML batch (2-3 min) + React batch (2-3 min) = 5-6 minutes - 2x-4x faster depending on Google API concurrency limits Implementation: - asyncio.gather() for parallel HTML generation - asyncio.gather() for parallel React generation - Maintains error handling per layout Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-02-27 22:12:18 +00:00 · 2026-02-27 22:12:18 +00:00 · 8adbc965a4
commit 8adbc965a4
parent cf7a9ec305
1 changed files with 71 additions and 28 deletions
--- a/backend/services/master_deck_parser_service.py
+++ b/backend/services/master_deck_parser_service.py
@ -504,6 +504,8 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
            regions_to_description,
        )

+        # Step 1: Prepare all layout entries with metadata
+        layout_entries = []
        for idx, lm in enumerate(primary_metas):
            screenshot_path = layout_screenshot_map.get(idx)

@ -517,55 +519,96 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
                        lp_layout_type = classify_layout_from_regions(regions)
                        lp_region_desc = regions_to_description(regions)
                except Exception as lp_err:
-                    print(f"[MasterDeckParser] LayoutParser skipped for {idx}: {lp_err}")
+                    print(f"[LayoutAnalysis] Detection failed: {lp_err}")

            layout_entry = {
                "index": idx,
                "layout_name": lm["layout_name"],
                "layout_type": lp_layout_type or _guess_layout_type(lm["layout_name"]),
-                "xml_snippet": format_geometry_for_llm(extract_geometry_from_oxml(lm["xml_content"])), # Replaced direct HTML with geometric JSON
+                "xml_snippet": format_geometry_for_llm(extract_geometry_from_oxml(lm["xml_content"])),
                "fonts": list(
                    {normalize_font_family_name(f) for f in extract_fonts_from_oxml(lm["xml_content"]) if f}
                ),
                "html": None,
                "react_code": None,
                "screenshot_path": screenshot_path,
+                "lp_region_desc": lp_region_desc,  # Store for LLM context
+                "xml_content": lm["xml_content"],
            }
+            layout_entries.append(layout_entry)
+
+        # Step 2: Parallel HTML generation for all layouts with screenshots
+        if llm_provider:
+            print(f"[MasterDeckParser] PARALLEL MODE: Generating HTML for {llm_count} layouts...")
+
+            async def generate_html_for_layout(entry):
+                """Generate HTML for a single layout."""
+                if not entry["screenshot_path"] or not os.path.exists(entry["screenshot_path"]):
+                    return None

-            # Run LLM pipeline if provider available and we have a screenshot
-            if llm_provider and screenshot_path and os.path.exists(screenshot_path):
                try:
-                    print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — generating HTML...")
-                    with open(screenshot_path, "rb") as img_f:
+                    with open(entry["screenshot_path"], "rb") as img_f:
                        img_b64 = base64.b64encode(img_f.read()).decode("utf-8")

-                    # Include LayoutParser region info in LLM context
-                    xml_context = lm["xml_content"]
-                    if lp_region_desc:
-                        xml_context = f"{lp_region_desc}\n\n---\n\n{xml_context}"
+                    xml_context = entry["xml_content"]
+                    if entry["lp_region_desc"]:
+                        xml_context = f"{entry['lp_region_desc']}\n\n---\n\n{xml_context}"

                    html = await _llm_generate_html(
-                        llm_provider, img_b64, xml_context,
-                        layout_entry["fonts"] or None,
+                        llm_provider, img_b64, xml_context, entry["fonts"] or None
                    )
-                    html = html.replace("```html", "").replace("```", "")
-                    layout_entry["html"] = html
-
-                    print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — generating React...")
-                    react_code = await _llm_generate_react(
-                        llm_provider, html, img_b64,
-                    )
-                    react_code = react_code.replace("```tsx", "").replace("```", "")
-                    layout_entry["react_code"] = react_code
-                    print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — done ({len(react_code)} chars)")
-
+                    return html.replace("```html", "").replace("```", "")
                except Exception as e:
-                    print(f"[MasterDeckParser] LLM FAILED for layout {idx} ({lm['layout_name']}): {e}")
-                    traceback.print_exc()
-                    layout_entry["html"] = None
-                    layout_entry["react_code"] = None
+                    print(f"[MasterDeckParser] HTML gen failed for {entry['layout_name']}: {e}")
+                    return None

-            layouts_result.append(layout_entry)
+            # Parallel HTML generation
+            html_tasks = [generate_html_for_layout(entry) for entry in layout_entries]
+            html_results = await asyncio.gather(*html_tasks, return_exceptions=True)
+
+            # Assign HTML results
+            for entry, html in zip(layout_entries, html_results):
+                if html and not isinstance(html, Exception):
+                    entry["html"] = html
+
+            print(f"[MasterDeckParser] HTML generation complete. Generating React...")
+
+            # Step 3: Parallel React generation for layouts with HTML
+            async def generate_react_for_layout(entry, idx):
+                """Generate React for a single layout."""
+                if not entry["html"] or not entry["screenshot_path"]:
+                    return None
+
+                try:
+                    with open(entry["screenshot_path"], "rb") as img_f:
+                        img_b64 = base64.b64encode(img_f.read()).decode("utf-8")
+
+                    react_code = await _llm_generate_react(llm_provider, entry["html"], img_b64)
+                    react_cleaned = react_code.replace("```tsx", "").replace("```", "")
+                    print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {entry['layout_name']} — done ({len(react_cleaned)} chars)")
+                    return react_cleaned
+                except Exception as e:
+                    print(f"[MasterDeckParser] React gen failed for {entry['layout_name']}: {e}")
+                    return None
+
+            # Parallel React generation
+            react_tasks = [
+                generate_react_for_layout(entry, idx)
+                for idx, entry in enumerate(layout_entries)
+            ]
+            react_results = await asyncio.gather(*react_tasks, return_exceptions=True)
+
+            # Assign React results
+            for entry, react in zip(layout_entries, react_results):
+                if react and not isinstance(react, Exception):
+                    entry["react_code"] = react
+
+        # Clean up temporary fields
+        for entry in layout_entries:
+            entry.pop("lp_region_desc", None)
+            entry.pop("xml_content", None)
+
+        layouts_result = layout_entries

    parsed_config = {
        "theme": theme_info,