Parallelize master deck parsing for 2x speedup

BEFORE: Sequential (HTML → React → HTML → React...)
- 25 layouts × 60 sec each = 25 minutes

AFTER: Parallel batching (All HTML together, then all React)
- 25 layouts: HTML batch (2-3 min) + React batch (2-3 min) = 5-6 minutes
- 2x-4x faster depending on Google API concurrency limits

Implementation:
- asyncio.gather() for parallel HTML generation
- asyncio.gather() for parallel React generation
- Maintains error handling per layout

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-02-27 22:12:18 +00:00
parent cf7a9ec305
commit 8adbc965a4

View file

@ -504,6 +504,8 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
regions_to_description,
)
# Step 1: Prepare all layout entries with metadata
layout_entries = []
for idx, lm in enumerate(primary_metas):
screenshot_path = layout_screenshot_map.get(idx)
@ -517,55 +519,96 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
lp_layout_type = classify_layout_from_regions(regions)
lp_region_desc = regions_to_description(regions)
except Exception as lp_err:
print(f"[MasterDeckParser] LayoutParser skipped for {idx}: {lp_err}")
print(f"[LayoutAnalysis] Detection failed: {lp_err}")
layout_entry = {
"index": idx,
"layout_name": lm["layout_name"],
"layout_type": lp_layout_type or _guess_layout_type(lm["layout_name"]),
"xml_snippet": format_geometry_for_llm(extract_geometry_from_oxml(lm["xml_content"])), # Replaced direct HTML with geometric JSON
"xml_snippet": format_geometry_for_llm(extract_geometry_from_oxml(lm["xml_content"])),
"fonts": list(
{normalize_font_family_name(f) for f in extract_fonts_from_oxml(lm["xml_content"]) if f}
),
"html": None,
"react_code": None,
"screenshot_path": screenshot_path,
"lp_region_desc": lp_region_desc, # Store for LLM context
"xml_content": lm["xml_content"],
}
layout_entries.append(layout_entry)
# Step 2: Parallel HTML generation for all layouts with screenshots
if llm_provider:
print(f"[MasterDeckParser] PARALLEL MODE: Generating HTML for {llm_count} layouts...")
async def generate_html_for_layout(entry):
"""Generate HTML for a single layout."""
if not entry["screenshot_path"] or not os.path.exists(entry["screenshot_path"]):
return None
# Run LLM pipeline if provider available and we have a screenshot
if llm_provider and screenshot_path and os.path.exists(screenshot_path):
try:
print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — generating HTML...")
with open(screenshot_path, "rb") as img_f:
with open(entry["screenshot_path"], "rb") as img_f:
img_b64 = base64.b64encode(img_f.read()).decode("utf-8")
# Include LayoutParser region info in LLM context
xml_context = lm["xml_content"]
if lp_region_desc:
xml_context = f"{lp_region_desc}\n\n---\n\n{xml_context}"
xml_context = entry["xml_content"]
if entry["lp_region_desc"]:
xml_context = f"{entry['lp_region_desc']}\n\n---\n\n{xml_context}"
html = await _llm_generate_html(
llm_provider, img_b64, xml_context,
layout_entry["fonts"] or None,
llm_provider, img_b64, xml_context, entry["fonts"] or None
)
html = html.replace("```html", "").replace("```", "")
layout_entry["html"] = html
print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — generating React...")
react_code = await _llm_generate_react(
llm_provider, html, img_b64,
)
react_code = react_code.replace("```tsx", "").replace("```", "")
layout_entry["react_code"] = react_code
print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — done ({len(react_code)} chars)")
return html.replace("```html", "").replace("```", "")
except Exception as e:
print(f"[MasterDeckParser] LLM FAILED for layout {idx} ({lm['layout_name']}): {e}")
traceback.print_exc()
layout_entry["html"] = None
layout_entry["react_code"] = None
print(f"[MasterDeckParser] HTML gen failed for {entry['layout_name']}: {e}")
return None
layouts_result.append(layout_entry)
# Parallel HTML generation
html_tasks = [generate_html_for_layout(entry) for entry in layout_entries]
html_results = await asyncio.gather(*html_tasks, return_exceptions=True)
# Assign HTML results
for entry, html in zip(layout_entries, html_results):
if html and not isinstance(html, Exception):
entry["html"] = html
print(f"[MasterDeckParser] HTML generation complete. Generating React...")
# Step 3: Parallel React generation for layouts with HTML
async def generate_react_for_layout(entry, idx):
"""Generate React for a single layout."""
if not entry["html"] or not entry["screenshot_path"]:
return None
try:
with open(entry["screenshot_path"], "rb") as img_f:
img_b64 = base64.b64encode(img_f.read()).decode("utf-8")
react_code = await _llm_generate_react(llm_provider, entry["html"], img_b64)
react_cleaned = react_code.replace("```tsx", "").replace("```", "")
print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {entry['layout_name']} — done ({len(react_cleaned)} chars)")
return react_cleaned
except Exception as e:
print(f"[MasterDeckParser] React gen failed for {entry['layout_name']}: {e}")
return None
# Parallel React generation
react_tasks = [
generate_react_for_layout(entry, idx)
for idx, entry in enumerate(layout_entries)
]
react_results = await asyncio.gather(*react_tasks, return_exceptions=True)
# Assign React results
for entry, react in zip(layout_entries, react_results):
if react and not isinstance(react, Exception):
entry["react_code"] = react
# Clean up temporary fields
for entry in layout_entries:
entry.pop("lp_region_desc", None)
entry.pop("xml_content", None)
layouts_result = layout_entries
parsed_config = {
"theme": theme_info,