- Step 10: Extended file upload for Excel/CSV/images/URLs (openpyxl, trafilatura) - Step 11: Content intelligence service with rule-based + LLM classification - Step 12: Slide mapping engine mapping content blocks to master deck layouts - Step 13: Chart data extractor, native PPTX chart service (bar/line/pie/gantt/waterfall), ChartDataEditor skeleton Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
305 lines
12 KiB
Python
305 lines
12 KiB
Python
"""Slide Mapping Engine: map classified content blocks to master deck layouts."""
|
|
from typing import Dict, List, Optional
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from models.content_models import ClassifiedContent, ContentBlock, ContentBlockType
|
|
from models.llm_message import LLMSystemMessage, LLMUserMessage
|
|
from services.llm_client import LLMClient
|
|
from utils.llm_provider import get_model
|
|
|
|
|
|
class SlideMapping(BaseModel):
|
|
content_block_indices: List[int] # which content blocks go on this slide
|
|
layout_id: str
|
|
layout_name: str
|
|
slide_type: str
|
|
content_summary: str
|
|
attachment_ids: List[str] = []
|
|
|
|
|
|
# Map content block types to preferred layout types (as stored in MasterDeckModel.layouts[].layout_type)
|
|
_BLOCK_TO_LAYOUT_TYPE: Dict[ContentBlockType, List[str]] = {
|
|
ContentBlockType.metric: ["metrics", "kpi", "data", "chart", "content"],
|
|
ContentBlockType.quote: ["quote", "testimonial", "content"],
|
|
ContentBlockType.table: ["table", "chart", "data", "content"],
|
|
ContentBlockType.timeline: ["timeline", "process", "content"],
|
|
ContentBlockType.comparison: ["comparison", "two_column", "content"],
|
|
ContentBlockType.list_items: ["content", "bullet", "list"],
|
|
ContentBlockType.narrative: ["content", "text", "description"],
|
|
ContentBlockType.image_reference: ["picture", "image", "content"],
|
|
ContentBlockType.call_to_action: ["content", "title_slide"],
|
|
}
|
|
|
|
|
|
class SlideMappingEngine:
|
|
|
|
async def map(
|
|
self,
|
|
classified_content: ClassifiedContent,
|
|
layouts: List[dict],
|
|
n_slides: int,
|
|
instructions: Optional[str] = None,
|
|
) -> List[SlideMapping]:
|
|
"""Map classified content blocks to master deck layouts.
|
|
|
|
Args:
|
|
classified_content: Output from ContentIntelligenceService.classify()
|
|
layouts: MasterDeckModel.layouts list — each dict has layout_name, layout_type, index, etc.
|
|
n_slides: Target number of slides
|
|
instructions: Optional user instructions
|
|
|
|
Returns:
|
|
Ordered list of SlideMapping
|
|
"""
|
|
if not layouts:
|
|
return self._fallback_mapping(classified_content, n_slides)
|
|
|
|
# Build layout index by type for fast lookup
|
|
layout_by_type: Dict[str, List[dict]] = {}
|
|
for layout in layouts:
|
|
lt = (layout.get("layout_type") or "custom").lower()
|
|
layout_by_type.setdefault(lt, []).append(layout)
|
|
|
|
blocks = classified_content.blocks
|
|
|
|
# 1. Always start with a title slide
|
|
mappings: List[SlideMapping] = []
|
|
title_layout = self._find_layout(layout_by_type, ["title_slide", "title"], layouts)
|
|
mappings.append(
|
|
SlideMapping(
|
|
content_block_indices=[],
|
|
layout_id=str(title_layout.get("index", 0)),
|
|
layout_name=title_layout.get("layout_name", "Title"),
|
|
slide_type="title_slide",
|
|
content_summary=classified_content.title or "Presentation Title",
|
|
)
|
|
)
|
|
|
|
# 2. If many blocks, add agenda/section header
|
|
if len(blocks) > 5:
|
|
section_layout = self._find_layout(
|
|
layout_by_type, ["section_header", "section", "content"], layouts
|
|
)
|
|
sections = list(
|
|
{b.source_section for b in blocks if b.source_section}
|
|
)
|
|
mappings.append(
|
|
SlideMapping(
|
|
content_block_indices=[],
|
|
layout_id=str(section_layout.get("index", 0)),
|
|
layout_name=section_layout.get("layout_name", "Agenda"),
|
|
slide_type="section_header",
|
|
content_summary="Agenda: " + ", ".join(sections[:6]),
|
|
)
|
|
)
|
|
|
|
# 3. Map each content block to a layout
|
|
remaining_slots = n_slides - len(mappings)
|
|
block_mappings = self._assign_blocks_to_slides(
|
|
blocks, layout_by_type, layouts, remaining_slots
|
|
)
|
|
mappings.extend(block_mappings)
|
|
|
|
# 4. If we have more slides than content, add transitional slides
|
|
while len(mappings) < n_slides:
|
|
content_layout = self._find_layout(
|
|
layout_by_type, ["content", "blank"], layouts
|
|
)
|
|
mappings.append(
|
|
SlideMapping(
|
|
content_block_indices=[],
|
|
layout_id=str(content_layout.get("index", 0)),
|
|
layout_name=content_layout.get("layout_name", "Content"),
|
|
slide_type="content",
|
|
content_summary="Additional content",
|
|
)
|
|
)
|
|
|
|
# 5. Trim if over target
|
|
if len(mappings) > n_slides:
|
|
# Keep title + agenda, trim lowest-priority from the rest
|
|
fixed = mappings[:2] if len(mappings) > 2 else mappings[:1]
|
|
rest = mappings[len(fixed):]
|
|
rest = rest[: n_slides - len(fixed)]
|
|
mappings = fixed + rest
|
|
|
|
# 6. Optional LLM refinement for ambiguous mappings
|
|
if instructions:
|
|
mappings = await self._llm_refine(
|
|
mappings, classified_content, layouts, instructions, n_slides
|
|
)
|
|
|
|
return mappings
|
|
|
|
def _find_layout(
|
|
self,
|
|
layout_by_type: Dict[str, List[dict]],
|
|
preferred_types: List[str],
|
|
all_layouts: List[dict],
|
|
) -> dict:
|
|
"""Find best matching layout by type preference, fallback to first layout."""
|
|
for lt in preferred_types:
|
|
if lt in layout_by_type and layout_by_type[lt]:
|
|
return layout_by_type[lt][0]
|
|
return all_layouts[0] if all_layouts else {"index": 0, "layout_name": "Default", "layout_type": "content"}
|
|
|
|
def _assign_blocks_to_slides(
|
|
self,
|
|
blocks: List[ContentBlock],
|
|
layout_by_type: Dict[str, List[dict]],
|
|
all_layouts: List[dict],
|
|
max_slides: int,
|
|
) -> List[SlideMapping]:
|
|
"""Assign content blocks to slides, respecting max_slides constraint."""
|
|
if max_slides <= 0:
|
|
return []
|
|
|
|
mappings: List[SlideMapping] = []
|
|
|
|
if len(blocks) <= max_slides:
|
|
# One block per slide
|
|
for i, block in enumerate(blocks):
|
|
preferred = _BLOCK_TO_LAYOUT_TYPE.get(block.type, ["content"])
|
|
layout = self._find_layout(layout_by_type, preferred, all_layouts)
|
|
mappings.append(
|
|
SlideMapping(
|
|
content_block_indices=[i],
|
|
layout_id=str(layout.get("index", 0)),
|
|
layout_name=layout.get("layout_name", "Content"),
|
|
slide_type=block.type.value,
|
|
content_summary=block.raw_text[:120],
|
|
)
|
|
)
|
|
else:
|
|
# More blocks than slides — merge low-priority blocks
|
|
# Sort by priority descending, take top max_slides groups
|
|
sorted_blocks = sorted(
|
|
enumerate(blocks), key=lambda x: -x[1].priority
|
|
)
|
|
|
|
# High-priority blocks get their own slide
|
|
high_priority = sorted_blocks[:max_slides]
|
|
overflow = sorted_blocks[max_slides:]
|
|
|
|
# Group overflow with nearest high-priority block
|
|
for idx, block in high_priority:
|
|
preferred = _BLOCK_TO_LAYOUT_TYPE.get(block.type, ["content"])
|
|
layout = self._find_layout(layout_by_type, preferred, all_layouts)
|
|
mappings.append(
|
|
SlideMapping(
|
|
content_block_indices=[idx],
|
|
layout_id=str(layout.get("index", 0)),
|
|
layout_name=layout.get("layout_name", "Content"),
|
|
slide_type=block.type.value,
|
|
content_summary=block.raw_text[:120],
|
|
)
|
|
)
|
|
|
|
# Distribute overflow blocks across existing slides
|
|
for i, (idx, block) in enumerate(overflow):
|
|
target = i % len(mappings)
|
|
mappings[target].content_block_indices.append(idx)
|
|
|
|
# Re-sort mappings by original block order
|
|
mappings.sort(
|
|
key=lambda m: min(m.content_block_indices) if m.content_block_indices else 999
|
|
)
|
|
|
|
return mappings
|
|
|
|
async def _llm_refine(
|
|
self,
|
|
mappings: List[SlideMapping],
|
|
content: ClassifiedContent,
|
|
layouts: List[dict],
|
|
instructions: str,
|
|
n_slides: int,
|
|
) -> List[SlideMapping]:
|
|
"""Use LLM to refine layout assignments based on user instructions."""
|
|
client = LLMClient()
|
|
model = get_model()
|
|
|
|
layout_info = "\n".join(
|
|
f"- Index {l.get('index')}: {l.get('layout_name')} (type: {l.get('layout_type')})"
|
|
for l in layouts
|
|
)
|
|
|
|
current_mapping = "\n".join(
|
|
f"Slide {i + 1}: [{m.slide_type}] {m.content_summary[:80]} → layout '{m.layout_name}'"
|
|
for i, m in enumerate(mappings)
|
|
)
|
|
|
|
messages = [
|
|
LLMSystemMessage(
|
|
content="You refine slide-to-layout mappings for presentations. "
|
|
"Given the current mapping and user instructions, suggest layout changes. "
|
|
"Return a JSON with 'changes' array of {slide_index: int, new_layout_index: int} objects. "
|
|
"Only include slides that need changing. Return empty array if no changes needed."
|
|
),
|
|
LLMUserMessage(
|
|
content=f"Available layouts:\n{layout_info}\n\n"
|
|
f"Current mapping:\n{current_mapping}\n\n"
|
|
f"User instructions: {instructions}"
|
|
),
|
|
]
|
|
|
|
schema = {
|
|
"type": "object",
|
|
"properties": {
|
|
"changes": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"slide_index": {"type": "integer"},
|
|
"new_layout_index": {"type": "integer"},
|
|
},
|
|
"required": ["slide_index", "new_layout_index"],
|
|
},
|
|
}
|
|
},
|
|
"required": ["changes"],
|
|
}
|
|
|
|
try:
|
|
result = await client.generate_structured(
|
|
model=model, messages=messages, response_format=schema
|
|
)
|
|
for change in result.get("changes", []):
|
|
si = change.get("slide_index", -1)
|
|
li = change.get("new_layout_index", -1)
|
|
if 0 <= si < len(mappings) and 0 <= li < len(layouts):
|
|
mappings[si].layout_id = str(li)
|
|
mappings[si].layout_name = layouts[li].get("layout_name", "Content")
|
|
except Exception:
|
|
pass # Keep original mapping on LLM failure
|
|
|
|
return mappings
|
|
|
|
def _fallback_mapping(
|
|
self, content: ClassifiedContent, n_slides: int
|
|
) -> List[SlideMapping]:
|
|
"""Fallback when no master deck layouts are available."""
|
|
mappings = [
|
|
SlideMapping(
|
|
content_block_indices=[],
|
|
layout_id="0",
|
|
layout_name="Title",
|
|
slide_type="title_slide",
|
|
content_summary=content.title or "Presentation",
|
|
)
|
|
]
|
|
|
|
for i, block in enumerate(content.blocks[: n_slides - 1]):
|
|
mappings.append(
|
|
SlideMapping(
|
|
content_block_indices=[i],
|
|
layout_id="0",
|
|
layout_name="Content",
|
|
slide_type=block.type.value,
|
|
content_summary=block.raw_text[:120],
|
|
)
|
|
)
|
|
|
|
return mappings
|