From 71ebbf36264f7c6dc823164f3e3a326508a08066 Mon Sep 17 00:00:00 2001 From: Vadym Samoilenko Date: Fri, 27 Feb 2026 14:46:10 +0000 Subject: [PATCH] Fix layout-to-screenshot mapping in master deck parser In layouts mode, screenshots were matched by array index (0,1,2...) which broke when PPTX had more slideLayouts than actual slides. Now builds an explicit mapping from slideLayout filename to the first slide that uses it, so each layout gets the correct screenshot. Co-Authored-By: Claude Opus 4.6 --- .../services/master_deck_parser_service.py | 72 +++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/backend/services/master_deck_parser_service.py b/backend/services/master_deck_parser_service.py index ab8b1f2..7fc1dd9 100644 --- a/backend/services/master_deck_parser_service.py +++ b/backend/services/master_deck_parser_service.py @@ -53,6 +53,51 @@ LAYOUT_TYPE_HINTS = { } +def _build_layout_to_slide_map(pptx_path: str, temp_dir: str) -> dict: + """Build mapping from slideLayout filename → first slide index that uses it. + + Returns e.g. {"slideLayout2.xml": 0, "slideLayout5.xml": 3, ...} + This lets us pick the right screenshot for each layout in 'layouts' mode. + """ + extract_dir = os.path.join(temp_dir, "pptx_extract") + if not os.path.exists(extract_dir): + with zipfile.ZipFile(pptx_path, "r") as zf: + zf.extractall(extract_dir) + + slides_dir = os.path.join(extract_dir, "ppt", "slides") + if not os.path.exists(slides_dir): + return {} + + slide_files = sorted( + [f for f in os.listdir(slides_dir) if f.startswith("slide") and f.endswith(".xml")], + key=lambda x: int(x.replace("slide", "").replace(".xml", "")), + ) + + RELS_NS = {"r": "http://schemas.openxmlformats.org/package/2006/relationships"} + layout_to_slide: dict = {} # slideLayout filename → first slide index + + for slide_idx, sf in enumerate(slide_files): + rels_path = os.path.join(slides_dir, "_rels", sf + ".rels") + if not os.path.exists(rels_path): + continue + try: + with open(rels_path, "r", encoding="utf-8") as f: + rels_root = ET.fromstring(f.read()) + # Try with namespace first, then without + for rel in list(rels_root.findall("r:Relationship", RELS_NS)) + list(rels_root.iter()): + target = rel.get("Target", "") + if "slideLayout" in target: + layout_file = target.split("/")[-1] + # Only store the FIRST slide that uses this layout + if layout_file not in layout_to_slide: + layout_to_slide[layout_file] = slide_idx + break + except Exception: + pass + + return layout_to_slide + + def _extract_slide_layout_xmls(pptx_path: str, temp_dir: str) -> List[dict]: """Extract slide layout XMLs from ppt/slideLayouts/ and return metadata.""" extract_dir = os.path.join(temp_dir, "pptx_extract") @@ -471,12 +516,16 @@ async def _do_parse(deck_id: uuid.UUID) -> dict: slide_xmls = _extract_slide_xmls(pptx_path, temp_dir) # 2. Choose primary source based on parse_mode + # Build layout→slide map so we can match screenshots to layouts + layout_to_slide_map = _build_layout_to_slide_map(pptx_path, temp_dir) + print(f"[MasterDeckParser] Layout→slide mapping: {layout_to_slide_map}") + if parse_mode == "layouts": - # Legacy mode: use all slideLayout XMLs from ppt/slideLayouts/ + # Unique slideLayout XMLs from ppt/slideLayouts/ primary_metas = _extract_slide_layout_xmls(pptx_path, temp_dir) print(f"[MasterDeckParser] Mode=layouts: {len(primary_metas)} slideLayouts") else: - # Default "slides" mode: use actual slides with layout name resolution + # "slides" mode: actual slides with layout name resolution primary_metas = _extract_slides_with_layout_info(pptx_path, temp_dir) print(f"[MasterDeckParser] Mode=slides: {len(primary_metas)} actual slides") @@ -525,7 +574,22 @@ async def _do_parse(deck_id: uuid.UUID) -> dict: # 6. Process each item through LLM pipeline llm_provider = _detect_llm_provider() layouts_result = [] - llm_count = min(len(primary_metas), len(screenshots)) + + # Build per-layout screenshot mapping + # In "slides" mode: screenshots[idx] maps directly (1:1) + # In "layouts" mode: use layout_to_slide_map to find the right screenshot + layout_screenshot_map: dict = {} # layout index → screenshot path + if parse_mode == "layouts": + for idx, lm in enumerate(primary_metas): + layout_filename = lm.get("filename", "") + slide_idx = layout_to_slide_map.get(layout_filename) + if slide_idx is not None and slide_idx < len(screenshots): + layout_screenshot_map[idx] = screenshots[slide_idx] + else: + for idx in range(min(len(primary_metas), len(screenshots))): + layout_screenshot_map[idx] = screenshots[idx] + + llm_count = len(layout_screenshot_map) print(f"[MasterDeckParser] LLM provider: {llm_provider['provider'] if llm_provider else 'NONE'}") print(f"[MasterDeckParser] Processing {len(primary_metas)} items, {llm_count} with screenshots for LLM") @@ -537,7 +601,7 @@ async def _do_parse(deck_id: uuid.UUID) -> dict: ) for idx, lm in enumerate(primary_metas): - screenshot_path = screenshots[idx] if idx < len(screenshots) else None + screenshot_path = layout_screenshot_map.get(idx) # Try LayoutParser classification if a screenshot is available lp_layout_type = None