Parallelize master deck parsing for 2x speedup
BEFORE: Sequential (HTML → React → HTML → React...) - 25 layouts × 60 sec each = 25 minutes AFTER: Parallel batching (All HTML together, then all React) - 25 layouts: HTML batch (2-3 min) + React batch (2-3 min) = 5-6 minutes - 2x-4x faster depending on Google API concurrency limits Implementation: - asyncio.gather() for parallel HTML generation - asyncio.gather() for parallel React generation - Maintains error handling per layout Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
cf7a9ec305
commit
8adbc965a4
1 changed files with 71 additions and 28 deletions
|
|
@ -504,6 +504,8 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
|
|||
regions_to_description,
|
||||
)
|
||||
|
||||
# Step 1: Prepare all layout entries with metadata
|
||||
layout_entries = []
|
||||
for idx, lm in enumerate(primary_metas):
|
||||
screenshot_path = layout_screenshot_map.get(idx)
|
||||
|
||||
|
|
@ -517,55 +519,96 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
|
|||
lp_layout_type = classify_layout_from_regions(regions)
|
||||
lp_region_desc = regions_to_description(regions)
|
||||
except Exception as lp_err:
|
||||
print(f"[MasterDeckParser] LayoutParser skipped for {idx}: {lp_err}")
|
||||
print(f"[LayoutAnalysis] Detection failed: {lp_err}")
|
||||
|
||||
layout_entry = {
|
||||
"index": idx,
|
||||
"layout_name": lm["layout_name"],
|
||||
"layout_type": lp_layout_type or _guess_layout_type(lm["layout_name"]),
|
||||
"xml_snippet": format_geometry_for_llm(extract_geometry_from_oxml(lm["xml_content"])), # Replaced direct HTML with geometric JSON
|
||||
"xml_snippet": format_geometry_for_llm(extract_geometry_from_oxml(lm["xml_content"])),
|
||||
"fonts": list(
|
||||
{normalize_font_family_name(f) for f in extract_fonts_from_oxml(lm["xml_content"]) if f}
|
||||
),
|
||||
"html": None,
|
||||
"react_code": None,
|
||||
"screenshot_path": screenshot_path,
|
||||
"lp_region_desc": lp_region_desc, # Store for LLM context
|
||||
"xml_content": lm["xml_content"],
|
||||
}
|
||||
layout_entries.append(layout_entry)
|
||||
|
||||
# Step 2: Parallel HTML generation for all layouts with screenshots
|
||||
if llm_provider:
|
||||
print(f"[MasterDeckParser] PARALLEL MODE: Generating HTML for {llm_count} layouts...")
|
||||
|
||||
async def generate_html_for_layout(entry):
|
||||
"""Generate HTML for a single layout."""
|
||||
if not entry["screenshot_path"] or not os.path.exists(entry["screenshot_path"]):
|
||||
return None
|
||||
|
||||
# Run LLM pipeline if provider available and we have a screenshot
|
||||
if llm_provider and screenshot_path and os.path.exists(screenshot_path):
|
||||
try:
|
||||
print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — generating HTML...")
|
||||
with open(screenshot_path, "rb") as img_f:
|
||||
with open(entry["screenshot_path"], "rb") as img_f:
|
||||
img_b64 = base64.b64encode(img_f.read()).decode("utf-8")
|
||||
|
||||
# Include LayoutParser region info in LLM context
|
||||
xml_context = lm["xml_content"]
|
||||
if lp_region_desc:
|
||||
xml_context = f"{lp_region_desc}\n\n---\n\n{xml_context}"
|
||||
xml_context = entry["xml_content"]
|
||||
if entry["lp_region_desc"]:
|
||||
xml_context = f"{entry['lp_region_desc']}\n\n---\n\n{xml_context}"
|
||||
|
||||
html = await _llm_generate_html(
|
||||
llm_provider, img_b64, xml_context,
|
||||
layout_entry["fonts"] or None,
|
||||
llm_provider, img_b64, xml_context, entry["fonts"] or None
|
||||
)
|
||||
html = html.replace("```html", "").replace("```", "")
|
||||
layout_entry["html"] = html
|
||||
|
||||
print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — generating React...")
|
||||
react_code = await _llm_generate_react(
|
||||
llm_provider, html, img_b64,
|
||||
)
|
||||
react_code = react_code.replace("```tsx", "").replace("```", "")
|
||||
layout_entry["react_code"] = react_code
|
||||
print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — done ({len(react_code)} chars)")
|
||||
|
||||
return html.replace("```html", "").replace("```", "")
|
||||
except Exception as e:
|
||||
print(f"[MasterDeckParser] LLM FAILED for layout {idx} ({lm['layout_name']}): {e}")
|
||||
traceback.print_exc()
|
||||
layout_entry["html"] = None
|
||||
layout_entry["react_code"] = None
|
||||
print(f"[MasterDeckParser] HTML gen failed for {entry['layout_name']}: {e}")
|
||||
return None
|
||||
|
||||
layouts_result.append(layout_entry)
|
||||
# Parallel HTML generation
|
||||
html_tasks = [generate_html_for_layout(entry) for entry in layout_entries]
|
||||
html_results = await asyncio.gather(*html_tasks, return_exceptions=True)
|
||||
|
||||
# Assign HTML results
|
||||
for entry, html in zip(layout_entries, html_results):
|
||||
if html and not isinstance(html, Exception):
|
||||
entry["html"] = html
|
||||
|
||||
print(f"[MasterDeckParser] HTML generation complete. Generating React...")
|
||||
|
||||
# Step 3: Parallel React generation for layouts with HTML
|
||||
async def generate_react_for_layout(entry, idx):
|
||||
"""Generate React for a single layout."""
|
||||
if not entry["html"] or not entry["screenshot_path"]:
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(entry["screenshot_path"], "rb") as img_f:
|
||||
img_b64 = base64.b64encode(img_f.read()).decode("utf-8")
|
||||
|
||||
react_code = await _llm_generate_react(llm_provider, entry["html"], img_b64)
|
||||
react_cleaned = react_code.replace("```tsx", "").replace("```", "")
|
||||
print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {entry['layout_name']} — done ({len(react_cleaned)} chars)")
|
||||
return react_cleaned
|
||||
except Exception as e:
|
||||
print(f"[MasterDeckParser] React gen failed for {entry['layout_name']}: {e}")
|
||||
return None
|
||||
|
||||
# Parallel React generation
|
||||
react_tasks = [
|
||||
generate_react_for_layout(entry, idx)
|
||||
for idx, entry in enumerate(layout_entries)
|
||||
]
|
||||
react_results = await asyncio.gather(*react_tasks, return_exceptions=True)
|
||||
|
||||
# Assign React results
|
||||
for entry, react in zip(layout_entries, react_results):
|
||||
if react and not isinstance(react, Exception):
|
||||
entry["react_code"] = react
|
||||
|
||||
# Clean up temporary fields
|
||||
for entry in layout_entries:
|
||||
entry.pop("lp_region_desc", None)
|
||||
entry.pop("xml_content", None)
|
||||
|
||||
layouts_result = layout_entries
|
||||
|
||||
parsed_config = {
|
||||
"theme": theme_info,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue