From f73291285d978b5a0afe5ca8c4227f22dfe40d01 Mon Sep 17 00:00:00 2001 From: Vadym Samoilenko Date: Thu, 19 Mar 2026 20:22:22 +0000 Subject: [PATCH] Improve presentation pipeline: brief summarization + section attribution + narrative continuity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on PPTAgent (EMNLP 2025) and DocPres research findings: 1. Brief summarization (summarize_brief.py) - For content >800 chars: single LLM call extracts {overview, sections[{title, key_points, data_points}]} before outline generation - Prevents "lost middle" context loss in long documents - BriefStructure.to_outline_context() formats sections for outline prompt - BriefStructure.get_section_text(idx) returns targeted excerpt per slide 2. Section attribution in SlideOutlineModel - Added source_section_idx: Optional[int] field - LLM sets this during outline generation to map each slide → brief section - Used to pass targeted section text to per-slide content generation instead of full brief (reduces hallucination, improves accuracy) 3. Narrative continuity in slide content generation - prev_slide_title passed to each content generation call - Injected in user prompt: "ensure this slide continues naturally from..." - Batch-safe: titles collected from completed batch before next starts 4. Source section text in content generation - source_section_text parameter added to get_slide_content_from_type_and_outline - Injected as "Source Material for This Slide" in user prompt - Only data points present in the excerpt should be used 5. Richer layout catalog - PresentationLayoutModel.to_catalog_string() added - Includes field names + maxLength constraints alongside layout descriptions - Helps LLM make informed layout choices based on content type Co-Authored-By: Claude Sonnet 4.6 --- backend/api/v1/ppt/endpoints/presentation.py | 50 +++++++-- backend/models/presentation_layout.py | 27 +++++ backend/models/presentation_outline_model.py | 6 +- .../generate_presentation_outlines.py | 23 +++- .../utils/llm_calls/generate_slide_content.py | 34 +++++- backend/utils/llm_calls/summarize_brief.py | 106 ++++++++++++++++++ 6 files changed, 231 insertions(+), 15 deletions(-) create mode 100644 backend/utils/llm_calls/summarize_brief.py diff --git a/backend/api/v1/ppt/endpoints/presentation.py b/backend/api/v1/ppt/endpoints/presentation.py index 0084a9c..c1f97b8 100644 --- a/backend/api/v1/ppt/endpoints/presentation.py +++ b/backend/api/v1/ppt/endpoints/presentation.py @@ -40,6 +40,7 @@ from services.image_generation_service import ImageGenerationService from utils.dict_utils import deep_update from utils.export_utils import export_presentation from utils.llm_calls.generate_presentation_outlines import generate_ppt_outline +from utils.llm_calls.summarize_brief import summarize_brief from models.sql.slide import SlideModel from models.sse_response import SSECompleteResponse, SSEErrorResponse, SSEResponse @@ -578,6 +579,13 @@ async def generate_presentation_handler( if documents: additional_context = "\n\n".join(documents) + # Pre-process long content into structured sections to prevent LLM + # "lost middle" problem and enable per-slide section attribution. + full_content = request.content + if additional_context: + full_content = f"{request.content}\n\n{additional_context}" + brief_structure = await summarize_brief(full_content) + # Finding number of slides to generate by considering table of contents n_slides_to_generate = request.n_slides if request.include_table_of_contents: @@ -604,6 +612,7 @@ async def generate_presentation_handler( request.instructions, request.include_title_slide, request.web_search, + brief_structure=brief_structure, ): if isinstance(chunk, HTTPException): @@ -740,6 +749,9 @@ async def generate_presentation_handler( slide_layout_indices = presentation_structure.slides slide_layouts = [layout_model.slides[idx] for idx in slide_layout_indices] + # Build a title lookup from already-generated slides for narrative continuity + generated_titles: List[Optional[str]] = [] + # Schedule slide content generation and asset fetching in batches of 10 batch_size = 10 for start in range(0, len(slide_layouts), batch_size): @@ -748,19 +760,37 @@ async def generate_presentation_handler( print(f"Generating slides from {start} to {end}") # Generate contents for this batch concurrently - content_tasks = [ - get_slide_content_from_type_and_outline( - slide_layouts[i], - presentation_outlines.slides[i], - request.language, - request.tone.value, - request.verbosity.value, - request.instructions, + content_tasks = [] + for i in range(start, end): + outline = presentation_outlines.slides[i] + + # Narrative continuity: pass the title of the preceding slide + prev_title = generated_titles[i - 1] if i > 0 and i - 1 < len(generated_titles) else None + + # Section attribution: pass targeted brief excerpt when available + source_text = None + if brief_structure is not None and outline.source_section_idx is not None: + source_text = brief_structure.get_section_text(outline.source_section_idx) + + content_tasks.append( + get_slide_content_from_type_and_outline( + slide_layouts[i], + outline, + request.language, + request.tone.value, + request.verbosity.value, + request.instructions, + prev_slide_title=prev_title, + source_section_text=source_text, + ) ) - for i in range(start, end) - ] + batch_contents: List[dict] = await asyncio.gather(*content_tasks) + # Record titles for next batch's narrative continuity + for content_dict in batch_contents: + generated_titles.append(content_dict.get("title")) + # Build slides for this batch batch_slides: List[SlideModel] = [] for offset, slide_content in enumerate(batch_contents): diff --git a/backend/models/presentation_layout.py b/backend/models/presentation_layout.py index 784e41f..293b362 100644 --- a/backend/models/presentation_layout.py +++ b/backend/models/presentation_layout.py @@ -37,3 +37,30 @@ class PresentationLayoutModel(BaseModel): message += f"- Name: {slide.name or slide.json_schema.get('title')} \n" message += f"- Description: {slide.description} \n\n" return message + + def to_catalog_string(self) -> str: + """Richer layout catalog for LLM layout-selection prompts. + + Includes placeholder field names and their character limits so the LLM + can make more informed layout choices based on content type. + """ + lines = ["## Available Slide Layouts\n"] + for index, slide in enumerate(self.slides): + name = slide.name or slide.json_schema.get("title", f"Layout {index}") + lines.append(f"### Layout {index}: {name}") + if slide.description: + lines.append(f"Purpose: {slide.description}") + + # Extract field names + constraints from json_schema + props = slide.json_schema.get("properties", {}) + fields = [] + for field_name, field_def in props.items(): + if field_name.startswith("__"): + continue # skip internal fields + max_len = field_def.get("maxLength") or field_def.get("maxItems") + constraint = f" (max {max_len})" if max_len else "" + fields.append(f"{field_name}{constraint}") + if fields: + lines.append(f"Fields: {', '.join(fields)}") + lines.append("") + return "\n".join(lines) diff --git a/backend/models/presentation_outline_model.py b/backend/models/presentation_outline_model.py index 01a3b2b..29af008 100644 --- a/backend/models/presentation_outline_model.py +++ b/backend/models/presentation_outline_model.py @@ -1,9 +1,13 @@ -from typing import List +from typing import List, Optional from pydantic import BaseModel class SlideOutlineModel(BaseModel): content: str + # Index into BriefStructure.sections — set during outline generation when a + # structured brief is available. Used to pass targeted section text to the + # per-slide content generation call instead of the full brief. + source_section_idx: Optional[int] = None class PresentationOutlineModel(BaseModel): diff --git a/backend/utils/llm_calls/generate_presentation_outlines.py b/backend/utils/llm_calls/generate_presentation_outlines.py index 570a4a3..b1bb2d1 100644 --- a/backend/utils/llm_calls/generate_presentation_outlines.py +++ b/backend/utils/llm_calls/generate_presentation_outlines.py @@ -1,6 +1,6 @@ import asyncio from datetime import datetime -from typing import Optional +from typing import Optional, TYPE_CHECKING from fastapi import HTTPException from models.llm_message import LLMSystemMessage, LLMUserMessage @@ -10,6 +10,9 @@ from utils.get_dynamic_models import get_presentation_outline_model_with_n_slide from utils.llm_client_error_handler import handle_llm_client_exceptions from utils.llm_provider import get_model +if TYPE_CHECKING: + from utils.llm_calls.summarize_brief import BriefStructure + def get_system_prompt( tone: Optional[str] = None, @@ -76,11 +79,24 @@ def get_user_prompt( language: str, additional_context: Optional[str] = None, content_summary: Optional[str] = None, + brief_structure=None, # Optional[BriefStructure] ): summary_section = "" if content_summary: summary_section = f"- Content Analysis Summary: {content_summary}" + brief_section = "" + if brief_structure is not None: + brief_section = f""" +## Structured Brief (pre-extracted sections — use these as the authoritative source) +{brief_structure.to_outline_context()} + +For each slide in your output, set source_section_idx to the 0-based index of the +section above that the slide primarily draws from. This enables targeted content +retrieval later. Set source_section_idx to 0 for title/intro slides and +{len(brief_structure.sections) - 1} for conclusion/closing slides. +""" + return f""" **Input:** - User provided content: {content or "Create presentation"} @@ -89,6 +105,7 @@ def get_user_prompt( - Current Date and Time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} - Additional Information: {additional_context or ""} {summary_section} + {brief_section} """ @@ -104,6 +121,7 @@ def get_messages( brand_context: Optional[str] = None, available_layouts: Optional[str] = None, content_summary: Optional[str] = None, + brief_structure=None, # Optional[BriefStructure] ): return [ LLMSystemMessage( @@ -115,6 +133,7 @@ def get_messages( LLMUserMessage( content=get_user_prompt( content, n_slides, language, additional_context, content_summary, + brief_structure, ), ), ] @@ -133,6 +152,7 @@ async def generate_ppt_outline( brand_context: Optional[str] = None, available_layouts: Optional[str] = None, content_summary: Optional[str] = None, + brief_structure=None, # Optional[BriefStructure] ): model = get_model() response_model = get_presentation_outline_model_with_n_slides(n_slides) @@ -154,6 +174,7 @@ async def generate_ppt_outline( brand_context, available_layouts, content_summary, + brief_structure, ), response_model.model_json_schema(), strict=True, diff --git a/backend/utils/llm_calls/generate_slide_content.py b/backend/utils/llm_calls/generate_slide_content.py index 764dab7..ff27419 100644 --- a/backend/utils/llm_calls/generate_slide_content.py +++ b/backend/utils/llm_calls/generate_slide_content.py @@ -96,7 +96,28 @@ def get_system_prompt( """ -def get_user_prompt(outline: str, language: str): +def get_user_prompt( + outline: str, + language: str, + prev_slide_title: Optional[str] = None, + source_section_text: Optional[str] = None, +): + prev_slide_section = "" + if prev_slide_title: + prev_slide_section = f""" +## Previous Slide +"{prev_slide_title}" — ensure this slide continues naturally from it. +""" + + source_section = "" + if source_section_text: + source_section = f""" +## Source Material for This Slide +Use the following excerpt from the brief as the primary source of facts for this slide. +Do NOT invent data points not present here. +{source_section_text} +""" + return f""" ## Current Date and Time {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} @@ -106,7 +127,8 @@ def get_user_prompt(outline: str, language: str): ## Slide Content Language {language} - + {prev_slide_section} + {source_section} ## Slide Outline {outline} """ @@ -120,6 +142,8 @@ def get_messages( instructions: Optional[str] = None, brand_context: Optional[str] = None, attachment_context: Optional[str] = None, + prev_slide_title: Optional[str] = None, + source_section_text: Optional[str] = None, ): return [ @@ -129,7 +153,7 @@ def get_messages( ), ), LLMUserMessage( - content=get_user_prompt(outline, language), + content=get_user_prompt(outline, language, prev_slide_title, source_section_text), ), ] @@ -143,6 +167,8 @@ async def get_slide_content_from_type_and_outline( instructions: Optional[str] = None, brand_context: Optional[str] = None, attachment_context: Optional[str] = None, + prev_slide_title: Optional[str] = None, + source_section_text: Optional[str] = None, ): client = LLMClient() model = get_model() @@ -217,6 +243,8 @@ async def get_slide_content_from_type_and_outline( instructions, brand_context, attachment_context, + prev_slide_title, + source_section_text, ), response_format=response_schema, strict=False, diff --git a/backend/utils/llm_calls/summarize_brief.py b/backend/utils/llm_calls/summarize_brief.py new file mode 100644 index 0000000..568ae52 --- /dev/null +++ b/backend/utils/llm_calls/summarize_brief.py @@ -0,0 +1,106 @@ +"""Brief pre-processing: extract structured sections from long content. + +For documents longer than SUMMARIZE_THRESHOLD chars, a single LLM call extracts +a structured {overview, sections[]} object. This prevents the "lost middle" +problem where LLMs miss facts buried in the middle of a long document when the +raw brief is passed directly to outline generation. + +The structured BriefStructure is then: + - Passed to outline generation as richer context + - Used for section attribution: each slide outline records which section + it draws from so the per-slide content call gets a targeted excerpt + instead of the full brief (reducing hallucination). +""" +from __future__ import annotations + +from typing import List, Optional + +from models.llm_message import LLMSystemMessage, LLMUserMessage +from pydantic import BaseModel +from services.llm_client import LLMClient +from utils.llm_provider import get_model + +SUMMARIZE_THRESHOLD = 800 # chars — below this, summarisation adds no value + + +class BriefSection(BaseModel): + title: str + key_points: List[str] + data_points: List[str] = [] # explicit numbers/stats to preserve verbatim + + +class BriefStructure(BaseModel): + overview: str + sections: List[BriefSection] + + def to_outline_context(self) -> str: + """Render for injection into the outline generation prompt.""" + lines = [f"## Brief Overview\n{self.overview}\n"] + for i, sec in enumerate(self.sections): + lines.append(f"### Section {i+1}: {sec.title}") + for pt in sec.key_points: + lines.append(f"- {pt}") + if sec.data_points: + lines.append("**Key data:**") + for dp in sec.data_points: + lines.append(f"- {dp}") + lines.append("") + return "\n".join(lines) + + def get_section_text(self, idx: int) -> Optional[str]: + """Return the full text of section[idx] for per-slide context injection.""" + if idx < 0 or idx >= len(self.sections): + return None + sec = self.sections[idx] + lines = [f"**{sec.title}**"] + lines.extend(f"- {pt}" for pt in sec.key_points) + if sec.data_points: + lines.append("Key data:") + lines.extend(f"- {dp}" for dp in sec.data_points) + return "\n".join(lines) + + +_SYSTEM = """\ +You are a document analyst. Extract structured sections from the provided brief. + +Rules: +- overview: 1-2 sentence summary of the whole document +- sections: each logical section/topic becomes one entry +- key_points: 2-5 concise bullet points per section — rephrase for clarity but do NOT invent facts +- data_points: copy numbers, percentages, statistics verbatim from the source +- Produce between 3 and 10 sections; merge very short sections +- Output valid JSON only, no markdown fences +""" + +_RESPONSE_SCHEMA = BriefStructure.model_json_schema() + + +async def summarize_brief(content: str) -> Optional[BriefStructure]: + """Extract structured sections from a brief. + + Returns None for short content (below SUMMARIZE_THRESHOLD) since the raw + content is already compact enough to pass directly. + """ + if len(content) < SUMMARIZE_THRESHOLD: + return None + + client = LLMClient() + model = get_model() + + messages = [ + LLMSystemMessage(content=_SYSTEM), + LLMUserMessage(content=f"Extract sections from this brief:\n\n{content}"), + ] + + try: + response = await client.generate_structured( + model=model, + messages=messages, + response_format=_RESPONSE_SCHEMA, + strict=False, + ) + return BriefStructure(**response) + except Exception as e: + # Non-fatal — fall back to raw content if summarisation fails + print(f"[summarize_brief] Failed ({e}), using raw content") + return None