diff --git a/backend/services/master_deck_parser_service.py b/backend/services/master_deck_parser_service.py index ab8b1f2..7fc1dd9 100644 --- a/backend/services/master_deck_parser_service.py +++ b/backend/services/master_deck_parser_service.py @@ -53,6 +53,51 @@ LAYOUT_TYPE_HINTS = { } +def _build_layout_to_slide_map(pptx_path: str, temp_dir: str) -> dict: + """Build mapping from slideLayout filename → first slide index that uses it. + + Returns e.g. {"slideLayout2.xml": 0, "slideLayout5.xml": 3, ...} + This lets us pick the right screenshot for each layout in 'layouts' mode. + """ + extract_dir = os.path.join(temp_dir, "pptx_extract") + if not os.path.exists(extract_dir): + with zipfile.ZipFile(pptx_path, "r") as zf: + zf.extractall(extract_dir) + + slides_dir = os.path.join(extract_dir, "ppt", "slides") + if not os.path.exists(slides_dir): + return {} + + slide_files = sorted( + [f for f in os.listdir(slides_dir) if f.startswith("slide") and f.endswith(".xml")], + key=lambda x: int(x.replace("slide", "").replace(".xml", "")), + ) + + RELS_NS = {"r": "http://schemas.openxmlformats.org/package/2006/relationships"} + layout_to_slide: dict = {} # slideLayout filename → first slide index + + for slide_idx, sf in enumerate(slide_files): + rels_path = os.path.join(slides_dir, "_rels", sf + ".rels") + if not os.path.exists(rels_path): + continue + try: + with open(rels_path, "r", encoding="utf-8") as f: + rels_root = ET.fromstring(f.read()) + # Try with namespace first, then without + for rel in list(rels_root.findall("r:Relationship", RELS_NS)) + list(rels_root.iter()): + target = rel.get("Target", "") + if "slideLayout" in target: + layout_file = target.split("/")[-1] + # Only store the FIRST slide that uses this layout + if layout_file not in layout_to_slide: + layout_to_slide[layout_file] = slide_idx + break + except Exception: + pass + + return layout_to_slide + + def _extract_slide_layout_xmls(pptx_path: str, temp_dir: str) -> List[dict]: """Extract slide layout XMLs from ppt/slideLayouts/ and return metadata.""" extract_dir = os.path.join(temp_dir, "pptx_extract") @@ -471,12 +516,16 @@ async def _do_parse(deck_id: uuid.UUID) -> dict: slide_xmls = _extract_slide_xmls(pptx_path, temp_dir) # 2. Choose primary source based on parse_mode + # Build layout→slide map so we can match screenshots to layouts + layout_to_slide_map = _build_layout_to_slide_map(pptx_path, temp_dir) + print(f"[MasterDeckParser] Layout→slide mapping: {layout_to_slide_map}") + if parse_mode == "layouts": - # Legacy mode: use all slideLayout XMLs from ppt/slideLayouts/ + # Unique slideLayout XMLs from ppt/slideLayouts/ primary_metas = _extract_slide_layout_xmls(pptx_path, temp_dir) print(f"[MasterDeckParser] Mode=layouts: {len(primary_metas)} slideLayouts") else: - # Default "slides" mode: use actual slides with layout name resolution + # "slides" mode: actual slides with layout name resolution primary_metas = _extract_slides_with_layout_info(pptx_path, temp_dir) print(f"[MasterDeckParser] Mode=slides: {len(primary_metas)} actual slides") @@ -525,7 +574,22 @@ async def _do_parse(deck_id: uuid.UUID) -> dict: # 6. Process each item through LLM pipeline llm_provider = _detect_llm_provider() layouts_result = [] - llm_count = min(len(primary_metas), len(screenshots)) + + # Build per-layout screenshot mapping + # In "slides" mode: screenshots[idx] maps directly (1:1) + # In "layouts" mode: use layout_to_slide_map to find the right screenshot + layout_screenshot_map: dict = {} # layout index → screenshot path + if parse_mode == "layouts": + for idx, lm in enumerate(primary_metas): + layout_filename = lm.get("filename", "") + slide_idx = layout_to_slide_map.get(layout_filename) + if slide_idx is not None and slide_idx < len(screenshots): + layout_screenshot_map[idx] = screenshots[slide_idx] + else: + for idx in range(min(len(primary_metas), len(screenshots))): + layout_screenshot_map[idx] = screenshots[idx] + + llm_count = len(layout_screenshot_map) print(f"[MasterDeckParser] LLM provider: {llm_provider['provider'] if llm_provider else 'NONE'}") print(f"[MasterDeckParser] Processing {len(primary_metas)} items, {llm_count} with screenshots for LLM") @@ -537,7 +601,7 @@ async def _do_parse(deck_id: uuid.UUID) -> dict: ) for idx, lm in enumerate(primary_metas): - screenshot_path = screenshots[idx] if idx < len(screenshots) else None + screenshot_path = layout_screenshot_map.get(idx) # Try LayoutParser classification if a screenshot is available lp_layout_type = None