Fix layout-to-screenshot mapping in master deck parser

In layouts mode, screenshots were matched by array index (0,1,2...)
which broke when PPTX had more slideLayouts than actual slides.
Now builds an explicit mapping from slideLayout filename to the
first slide that uses it, so each layout gets the correct screenshot.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-02-27 14:46:10 +00:00
parent e8295d6e71
commit 71ebbf3626

View file

@ -53,6 +53,51 @@ LAYOUT_TYPE_HINTS = {
}
def _build_layout_to_slide_map(pptx_path: str, temp_dir: str) -> dict:
"""Build mapping from slideLayout filename → first slide index that uses it.
Returns e.g. {"slideLayout2.xml": 0, "slideLayout5.xml": 3, ...}
This lets us pick the right screenshot for each layout in 'layouts' mode.
"""
extract_dir = os.path.join(temp_dir, "pptx_extract")
if not os.path.exists(extract_dir):
with zipfile.ZipFile(pptx_path, "r") as zf:
zf.extractall(extract_dir)
slides_dir = os.path.join(extract_dir, "ppt", "slides")
if not os.path.exists(slides_dir):
return {}
slide_files = sorted(
[f for f in os.listdir(slides_dir) if f.startswith("slide") and f.endswith(".xml")],
key=lambda x: int(x.replace("slide", "").replace(".xml", "")),
)
RELS_NS = {"r": "http://schemas.openxmlformats.org/package/2006/relationships"}
layout_to_slide: dict = {} # slideLayout filename → first slide index
for slide_idx, sf in enumerate(slide_files):
rels_path = os.path.join(slides_dir, "_rels", sf + ".rels")
if not os.path.exists(rels_path):
continue
try:
with open(rels_path, "r", encoding="utf-8") as f:
rels_root = ET.fromstring(f.read())
# Try with namespace first, then without
for rel in list(rels_root.findall("r:Relationship", RELS_NS)) + list(rels_root.iter()):
target = rel.get("Target", "")
if "slideLayout" in target:
layout_file = target.split("/")[-1]
# Only store the FIRST slide that uses this layout
if layout_file not in layout_to_slide:
layout_to_slide[layout_file] = slide_idx
break
except Exception:
pass
return layout_to_slide
def _extract_slide_layout_xmls(pptx_path: str, temp_dir: str) -> List[dict]:
"""Extract slide layout XMLs from ppt/slideLayouts/ and return metadata."""
extract_dir = os.path.join(temp_dir, "pptx_extract")
@ -471,12 +516,16 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
slide_xmls = _extract_slide_xmls(pptx_path, temp_dir)
# 2. Choose primary source based on parse_mode
# Build layout→slide map so we can match screenshots to layouts
layout_to_slide_map = _build_layout_to_slide_map(pptx_path, temp_dir)
print(f"[MasterDeckParser] Layout→slide mapping: {layout_to_slide_map}")
if parse_mode == "layouts":
# Legacy mode: use all slideLayout XMLs from ppt/slideLayouts/
# Unique slideLayout XMLs from ppt/slideLayouts/
primary_metas = _extract_slide_layout_xmls(pptx_path, temp_dir)
print(f"[MasterDeckParser] Mode=layouts: {len(primary_metas)} slideLayouts")
else:
# Default "slides" mode: use actual slides with layout name resolution
# "slides" mode: actual slides with layout name resolution
primary_metas = _extract_slides_with_layout_info(pptx_path, temp_dir)
print(f"[MasterDeckParser] Mode=slides: {len(primary_metas)} actual slides")
@ -525,7 +574,22 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
# 6. Process each item through LLM pipeline
llm_provider = _detect_llm_provider()
layouts_result = []
llm_count = min(len(primary_metas), len(screenshots))
# Build per-layout screenshot mapping
# In "slides" mode: screenshots[idx] maps directly (1:1)
# In "layouts" mode: use layout_to_slide_map to find the right screenshot
layout_screenshot_map: dict = {} # layout index → screenshot path
if parse_mode == "layouts":
for idx, lm in enumerate(primary_metas):
layout_filename = lm.get("filename", "")
slide_idx = layout_to_slide_map.get(layout_filename)
if slide_idx is not None and slide_idx < len(screenshots):
layout_screenshot_map[idx] = screenshots[slide_idx]
else:
for idx in range(min(len(primary_metas), len(screenshots))):
layout_screenshot_map[idx] = screenshots[idx]
llm_count = len(layout_screenshot_map)
print(f"[MasterDeckParser] LLM provider: {llm_provider['provider'] if llm_provider else 'NONE'}")
print(f"[MasterDeckParser] Processing {len(primary_metas)} items, {llm_count} with screenshots for LLM")
@ -537,7 +601,7 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
)
for idx, lm in enumerate(primary_metas):
screenshot_path = screenshots[idx] if idx < len(screenshots) else None
screenshot_path = layout_screenshot_map.get(idx)
# Try LayoutParser classification if a screenshot is available
lp_layout_type = None