Based on PPTAgent (EMNLP 2025) and DocPres research findings:
1. Brief summarization (summarize_brief.py)
- For content >800 chars: single LLM call extracts {overview, sections[{title,
key_points, data_points}]} before outline generation
- Prevents "lost middle" context loss in long documents
- BriefStructure.to_outline_context() formats sections for outline prompt
- BriefStructure.get_section_text(idx) returns targeted excerpt per slide
2. Section attribution in SlideOutlineModel
- Added source_section_idx: Optional[int] field
- LLM sets this during outline generation to map each slide → brief section
- Used to pass targeted section text to per-slide content generation
instead of full brief (reduces hallucination, improves accuracy)
3. Narrative continuity in slide content generation
- prev_slide_title passed to each content generation call
- Injected in user prompt: "ensure this slide continues naturally from..."
- Batch-safe: titles collected from completed batch before next starts
4. Source section text in content generation
- source_section_text parameter added to get_slide_content_from_type_and_outline
- Injected as "Source Material for This Slide" in user prompt
- Only data points present in the excerpt should be used
5. Richer layout catalog
- PresentationLayoutModel.to_catalog_string() added
- Includes field names + maxLength constraints alongside layout descriptions
- Helps LLM make informed layout choices based on content type
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
66 lines
2.5 KiB
Python
66 lines
2.5 KiB
Python
from typing import List, Optional
|
|
from fastapi import HTTPException
|
|
from pydantic import BaseModel, Field
|
|
|
|
from models.presentation_structure_model import PresentationStructureModel
|
|
|
|
|
|
class SlideLayoutModel(BaseModel):
|
|
id: str
|
|
name: Optional[str] = None
|
|
description: Optional[str] = None
|
|
json_schema: dict
|
|
|
|
|
|
class PresentationLayoutModel(BaseModel):
|
|
name: str
|
|
ordered: bool = Field(default=False)
|
|
slides: List[SlideLayoutModel]
|
|
|
|
def get_slide_layout_index(self, slide_layout_id: str) -> int:
|
|
for index, slide in enumerate(self.slides):
|
|
if slide.id == slide_layout_id:
|
|
return index
|
|
raise HTTPException(
|
|
status_code=404, detail=f"Slide layout {slide_layout_id} not found"
|
|
)
|
|
|
|
def to_presentation_structure(self):
|
|
return PresentationStructureModel(
|
|
slides=[index for index in range(len(self.slides))]
|
|
)
|
|
|
|
def to_string(self):
|
|
message = f"## Presentation Layout\n\n"
|
|
for index, slide in enumerate(self.slides):
|
|
message += f"### Slide Layout: {index}: \n"
|
|
message += f"- Name: {slide.name or slide.json_schema.get('title')} \n"
|
|
message += f"- Description: {slide.description} \n\n"
|
|
return message
|
|
|
|
def to_catalog_string(self) -> str:
|
|
"""Richer layout catalog for LLM layout-selection prompts.
|
|
|
|
Includes placeholder field names and their character limits so the LLM
|
|
can make more informed layout choices based on content type.
|
|
"""
|
|
lines = ["## Available Slide Layouts\n"]
|
|
for index, slide in enumerate(self.slides):
|
|
name = slide.name or slide.json_schema.get("title", f"Layout {index}")
|
|
lines.append(f"### Layout {index}: {name}")
|
|
if slide.description:
|
|
lines.append(f"Purpose: {slide.description}")
|
|
|
|
# Extract field names + constraints from json_schema
|
|
props = slide.json_schema.get("properties", {})
|
|
fields = []
|
|
for field_name, field_def in props.items():
|
|
if field_name.startswith("__"):
|
|
continue # skip internal fields
|
|
max_len = field_def.get("maxLength") or field_def.get("maxItems")
|
|
constraint = f" (max {max_len})" if max_len else ""
|
|
fields.append(f"{field_name}{constraint}")
|
|
if fields:
|
|
lines.append(f"Fields: {', '.join(fields)}")
|
|
lines.append("")
|
|
return "\n".join(lines)
|