Fix layout-to-screenshot mapping in master deck parser

In layouts mode, screenshots were matched by array index (0,1,2...) which broke when PPTX had more slideLayouts than actual slides. Now builds an explicit mapping from slideLayout filename to the first slide that uses it, so each layout gets the correct screenshot. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 14:46:10 +00:00 · 2026-02-27 14:46:10 +00:00 · 71ebbf3626
commit 71ebbf3626
parent e8295d6e71
1 changed files with 68 additions and 4 deletions
--- a/backend/services/master_deck_parser_service.py
+++ b/backend/services/master_deck_parser_service.py
@ -53,6 +53,51 @@ LAYOUT_TYPE_HINTS = {
 }


+def _build_layout_to_slide_map(pptx_path: str, temp_dir: str) -> dict:
+    """Build mapping from slideLayout filename → first slide index that uses it.
+
+    Returns e.g. {"slideLayout2.xml": 0, "slideLayout5.xml": 3, ...}
+    This lets us pick the right screenshot for each layout in 'layouts' mode.
+    """
+    extract_dir = os.path.join(temp_dir, "pptx_extract")
+    if not os.path.exists(extract_dir):
+        with zipfile.ZipFile(pptx_path, "r") as zf:
+            zf.extractall(extract_dir)
+
+    slides_dir = os.path.join(extract_dir, "ppt", "slides")
+    if not os.path.exists(slides_dir):
+        return {}
+
+    slide_files = sorted(
+        [f for f in os.listdir(slides_dir) if f.startswith("slide") and f.endswith(".xml")],
+        key=lambda x: int(x.replace("slide", "").replace(".xml", "")),
+    )
+
+    RELS_NS = {"r": "http://schemas.openxmlformats.org/package/2006/relationships"}
+    layout_to_slide: dict = {}  # slideLayout filename → first slide index
+
+    for slide_idx, sf in enumerate(slide_files):
+        rels_path = os.path.join(slides_dir, "_rels", sf + ".rels")
+        if not os.path.exists(rels_path):
+            continue
+        try:
+            with open(rels_path, "r", encoding="utf-8") as f:
+                rels_root = ET.fromstring(f.read())
+            # Try with namespace first, then without
+            for rel in list(rels_root.findall("r:Relationship", RELS_NS)) + list(rels_root.iter()):
+                target = rel.get("Target", "")
+                if "slideLayout" in target:
+                    layout_file = target.split("/")[-1]
+                    # Only store the FIRST slide that uses this layout
+                    if layout_file not in layout_to_slide:
+                        layout_to_slide[layout_file] = slide_idx
+                    break
+        except Exception:
+            pass
+
+    return layout_to_slide
+
+
 def _extract_slide_layout_xmls(pptx_path: str, temp_dir: str) -> List[dict]:
    """Extract slide layout XMLs from ppt/slideLayouts/ and return metadata."""
    extract_dir = os.path.join(temp_dir, "pptx_extract")
@ -471,12 +516,16 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
        slide_xmls = _extract_slide_xmls(pptx_path, temp_dir)

        # 2. Choose primary source based on parse_mode
+        #    Build layout→slide map so we can match screenshots to layouts
+        layout_to_slide_map = _build_layout_to_slide_map(pptx_path, temp_dir)
+        print(f"[MasterDeckParser] Layout→slide mapping: {layout_to_slide_map}")
+
        if parse_mode == "layouts":
-            # Legacy mode: use all slideLayout XMLs from ppt/slideLayouts/
+            # Unique slideLayout XMLs from ppt/slideLayouts/
            primary_metas = _extract_slide_layout_xmls(pptx_path, temp_dir)
            print(f"[MasterDeckParser] Mode=layouts: {len(primary_metas)} slideLayouts")
        else:
-            # Default "slides" mode: use actual slides with layout name resolution
+            # "slides" mode: actual slides with layout name resolution
            primary_metas = _extract_slides_with_layout_info(pptx_path, temp_dir)
            print(f"[MasterDeckParser] Mode=slides: {len(primary_metas)} actual slides")

@ -525,7 +574,22 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
        # 6. Process each item through LLM pipeline
        llm_provider = _detect_llm_provider()
        layouts_result = []
-        llm_count = min(len(primary_metas), len(screenshots))
+
+        # Build per-layout screenshot mapping
+        # In "slides" mode: screenshots[idx] maps directly (1:1)
+        # In "layouts" mode: use layout_to_slide_map to find the right screenshot
+        layout_screenshot_map: dict = {}  # layout index → screenshot path
+        if parse_mode == "layouts":
+            for idx, lm in enumerate(primary_metas):
+                layout_filename = lm.get("filename", "")
+                slide_idx = layout_to_slide_map.get(layout_filename)
+                if slide_idx is not None and slide_idx < len(screenshots):
+                    layout_screenshot_map[idx] = screenshots[slide_idx]
+        else:
+            for idx in range(min(len(primary_metas), len(screenshots))):
+                layout_screenshot_map[idx] = screenshots[idx]
+
+        llm_count = len(layout_screenshot_map)
        print(f"[MasterDeckParser] LLM provider: {llm_provider['provider'] if llm_provider else 'NONE'}")
        print(f"[MasterDeckParser] Processing {len(primary_metas)} items, {llm_count} with screenshots for LLM")

@ -537,7 +601,7 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
        )

        for idx, lm in enumerate(primary_metas):
-            screenshot_path = screenshots[idx] if idx < len(screenshots) else None
+            screenshot_path = layout_screenshot_map.get(idx)

            # Try LayoutParser classification if a screenshot is available
            lp_layout_type = None