Merge pull request #210 from presenton/feat/presentation-slide-speaker-notes

feat/presentation slide speaker notes
This commit is contained in:
Saurav Niraula 2025-08-12 16:58:55 +05:45 committed by GitHub
commit 936258bfe9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 101 additions and 13 deletions

View file

@ -221,6 +221,7 @@ async def stream_presentation(
layout_group=layout.name,
layout=slide_layout.id,
index=i,
speaker_note=slide_content.get("__speaker_note__", ""),
content=slide_content,
)
slides.append(slide)

View file

@ -14,7 +14,6 @@ from utils.llm_calls.edit_slide_html import get_edited_slide_html
from utils.llm_calls.select_slide_type_on_edit import get_slide_layout_from_prompt
from utils.process_slides import process_old_and_new_slides_and_fetch_assets
from utils.randomizers import get_random_uuid
from utils.schema_utils import remove_fields_from_schema
SLIDE_ROUTER = APIRouter(prefix="/slide", tags=["Slide"])
@ -59,6 +58,7 @@ async def edit_slide(
sql_session.add(slide)
slide.content = edited_slide_content
slide.layout = slide_layout.id
slide.speaker_note = edited_slide_content.get("__speaker_note__", "")
sql_session.add_all(new_assets)
await sql_session.commit()

View file

@ -156,6 +156,7 @@ class PptxConnectorModel(PptxShapeModel):
class PptxSlideModel(BaseModel):
background: Optional[PptxFillModel] = None
note: Optional[str] = None
shapes: List[
PptxTextBoxModel
| PptxAutoShapeBoxModel

View file

@ -12,6 +12,7 @@ class SlideModel(SQLModel, table=True):
index: int
content: dict = Field(sa_column=Column(JSON))
html_content: Optional[str]
speaker_note: str
properties: Optional[dict] = Field(sa_column=Column(JSON))
def get_new_slide(self, presentation_id: str, content: Optional[dict] = None):
@ -21,6 +22,7 @@ class SlideModel(SQLModel, table=True):
layout_group=self.layout_group,
layout=self.layout,
index=self.index,
speaker_note=self.speaker_note,
content=content or self.content,
properties=self.properties,
)

View file

@ -147,6 +147,9 @@ class PptxPresentationCreator:
if slide_model.background:
self.apply_fill_to_shape(slide.background, slide_model.background)
if slide_model.note:
slide.notes_slide.notes_text_frame.text = slide_model.note
for shape_model in slide_model.shapes:
model_type = type(shape_model)

View file

@ -3,10 +3,11 @@ from models.presentation_layout import SlideLayoutModel
from models.sql.slide import SlideModel
from services.llm_client import LLMClient
from utils.llm_provider import get_model
from utils.schema_utils import remove_fields_from_schema
from utils.schema_utils import add_field_in_schema, remove_fields_from_schema
system_prompt = """
Edit Slide data based on provided prompt, follow mentioned steps and notes and provide structured output.
Edit Slide data and speaker note based on provided prompt, follow mentioned steps and notes and provide structured output.
# Notes
- Provide output in language mentioned in **Input**.
@ -14,6 +15,8 @@ system_prompt = """
- Do not change **Image prompts** and **Icon queries** if not asked for in prompt.
- Generate **Image prompts** and **Icon queries** if asked to generate or change in prompt.
- Make sure to follow language guidelines.
- Speaker note should be normal text, not markdown.
- Speaker note should be simple, clear, concise and to the point.
**Go through all notes and steps and make sure they are followed, including mentioned constraints**
"""
@ -61,6 +64,18 @@ async def get_edited_slide_content(
response_schema = remove_fields_from_schema(
slide_layout.json_schema, ["__image_url__", "__icon_url__"]
)
response_schema = add_field_in_schema(
response_schema,
{
"__speaker_note__": {
"type": "string",
"minLength": 100,
"maxLength": 250,
"description": "Speaker note for the slide",
}
},
True,
)
client = LLMClient()
response = await client.generate_structured(

View file

@ -3,7 +3,7 @@ from models.presentation_layout import SlideLayoutModel
from models.presentation_outline_model import SlideOutlineModel
from services.llm_client import LLMClient
from utils.llm_provider import get_model
from utils.schema_utils import remove_fields_from_schema
from utils.schema_utils import add_field_in_schema, remove_fields_from_schema
system_prompt = """
Generate structured slide based on provided outline, follow mentioned steps and notes and provide structured output.
@ -11,6 +11,7 @@ system_prompt = """
# Steps
1. Analyze the outline.
2. Generate structured slide based on the outline.
3. Generate speaker note that is simple, clear, concise and to the point.
# Notes
- Slide body should not use words like "This slide", "This presentation".
@ -19,6 +20,7 @@ system_prompt = """
- Provide query to search icon on "__icon_query__" property.
- Only use markdown to highlight important points.
- Make sure to follow language guidelines.
- Speaker note should be normal text, not markdown.
**Strictly follow the max and min character limit for every property in the slide.**
"""
@ -57,6 +59,18 @@ async def get_slide_content_from_type_and_outline(
response_schema = remove_fields_from_schema(
slide_layout.json_schema, ["__image_url__", "__icon_url__"]
)
response_schema = add_field_in_schema(
response_schema,
{
"__speaker_note__": {
"type": "string",
"minLength": 100,
"maxLength": 250,
"description": "Speaker note for the slide",
}
},
True,
)
response = await client.generate_structured(
model=model,

View file

@ -45,6 +45,48 @@ def remove_fields_from_schema(schema: dict, fields_to_remove: List[str]):
return schema
def add_field_in_schema(schema: dict, field: dict, required: bool = False) -> dict:
if not isinstance(field, dict) or len(field) != 1:
raise ValueError(
"`field` must be a dict with exactly one entry: {name: schema_dict}"
)
field_name, field_schema = next(iter(field.items()))
if not isinstance(field_name, str):
raise TypeError("Field name must be a string")
if not isinstance(field_schema, dict):
raise TypeError("Field schema must be a dictionary")
updated_schema: dict = deepcopy(schema)
root_properties = updated_schema.get("properties")
if not isinstance(root_properties, dict):
updated_schema["properties"] = {}
root_properties = updated_schema["properties"]
root_properties[field_name] = field_schema
# Update root-level required based on the flag
existing_required = updated_schema.get("required")
if not isinstance(existing_required, list):
existing_required = []
if required:
if field_name not in existing_required:
existing_required.append(field_name)
else:
if field_name in existing_required:
existing_required = [name for name in existing_required if name != field_name]
if existing_required:
updated_schema["required"] = existing_required
else:
updated_schema.pop("required", None)
return updated_schema
# From OpenAI
def ensure_strict_json_schema(
json_schema: object,

View file

@ -115,7 +115,7 @@ const PresentationPage = ({ presentation_id }: { presentation_id: string }) => {
presentationData.slides &&
presentationData.slides.length > 0 &&
presentationData.slides.map((slide: any, index: number) => (
<div key={index} className="w-full">
<div key={index} className="w-full" data-speaker-note={slide.speaker_note}>
{renderSlideContent(slide, true)}
</div>
))}

View file

@ -31,9 +31,9 @@ export async function GET(request: NextRequest) {
[browser, page] = await getBrowserAndPage(id);
const screenshotsDir = getScreenshotsDir();
const slides = await getSlides(page);
const { slides, speakerNotes } = await getSlidesAndSpeakerNotes(page);
const slides_attributes = await getSlidesAttributes(slides, screenshotsDir);
await postProcessSlidesAttributes(slides_attributes, screenshotsDir);
await postProcessSlidesAttributes(slides_attributes, screenshotsDir, speakerNotes);
const slides_pptx_models = convertElementAttributesToPptxSlides(slides_attributes);
const presentation_pptx_model: PptxPresentationModel = {
slides: slides_pptx_models,
@ -100,8 +100,8 @@ function getScreenshotsDir() {
return screenshotsDir;
}
async function postProcessSlidesAttributes(slidesAttributes: SlideAttributesResult[], screenshotsDir: string) {
for (const slideAttributes of slidesAttributes) {
async function postProcessSlidesAttributes(slidesAttributes: SlideAttributesResult[], screenshotsDir: string, speakerNotes: string[]) {
for (const [index, slideAttributes] of slidesAttributes.entries()) {
for (const element of slideAttributes.elements) {
if (element.should_screenshot) {
const screenshotPath = await screenshotElement(element, screenshotsDir);
@ -111,6 +111,7 @@ async function postProcessSlidesAttributes(slidesAttributes: SlideAttributesResu
element.element = undefined;
}
}
slideAttributes.speakerNote = speakerNotes[index];
}
}
@ -190,15 +191,15 @@ async function getSlidesAttributes(slides: ElementHandle<Element>[], screenshots
const slideAttributes = await Promise.all(
slides.map((slide) => getAllChildElementsAttributes({ element: slide, screenshotsDir }))
);
return slideAttributes;
}
async function getSlides(page: Page) {
async function getSlidesAndSpeakerNotes(page: Page) {
const slides_wrapper = await getSlidesWrapper(page);
const speakerNotes = await getSpeakerNotes(slides_wrapper);
const slides = await slides_wrapper.$$(":scope > div > div");
return slides;
return { slides, speakerNotes };
}
async function getSlidesWrapper(page: Page): Promise<ElementHandle<Element>> {
@ -209,6 +210,12 @@ async function getSlidesWrapper(page: Page): Promise<ElementHandle<Element>> {
return slides_wrapper;
}
async function getSpeakerNotes(slides_wrapper: ElementHandle<Element>) {
return await slides_wrapper.evaluate((el) => {
return Array.from(el.querySelectorAll('[data-speaker-note]')).map((el) => el.getAttribute('data-speaker-note') || "");
});
}
async function getAllChildElementsAttributes({ element, rootRect = null, depth = 0, inheritedFont, inheritedBackground, inheritedBorderRadius, inheritedZIndex, inheritedOpacity, screenshotsDir }: GetAllChildElementsAttributesArgs): Promise<SlideAttributesResult> {
if (!rootRect) {
const rootAttributes = await getElementAttributes(element);

View file

@ -78,4 +78,5 @@ export interface ElementAttributes {
export interface SlideAttributesResult {
elements: ElementAttributes[];
backgroundColor?: string;
speakerNote?: string;
}

View file

@ -327,6 +327,7 @@ export interface PptxConnectorModel extends PptxShapeModel {
export interface PptxSlideModel {
background?: PptxFillModel;
shapes: (PptxTextBoxModel | PptxAutoShapeBoxModel | PptxConnectorModel | PptxPictureBoxModel)[];
note?: string;
}
export interface PptxPresentationModel {

View file

@ -61,7 +61,8 @@ export function convertElementAttributesToPptxSlides(
}).filter(Boolean);
const slide: PptxSlideModel = {
shapes: shapes as (PptxTextBoxModel | PptxAutoShapeBoxModel | PptxConnectorModel | PptxPictureBoxModel)[]
shapes: shapes as (PptxTextBoxModel | PptxAutoShapeBoxModel | PptxConnectorModel | PptxPictureBoxModel)[],
note: slideAttributes.speakerNote
};
if (slideAttributes.backgroundColor) {