From 5ec4144f9fcca3daefa4652d70f8a41ff9e21707 Mon Sep 17 00:00:00 2001 From: sauravniraula Date: Thu, 28 Aug 2025 20:35:27 +0545 Subject: [PATCH] feat: adds content and instruction on UI endpoints --- .../fastapi/api/v1/ppt/endpoints/outlines.py | 6 +- .../api/v1/ppt/endpoints/presentation.py | 21 ++++-- .../models/generate_presentation_request.py | 5 +- .../models/presentation_with_slides.py | 3 +- servers/fastapi/models/sql/presentation.py | 8 +- .../generate_presentation_outlines.py | 75 ++++++++++++------- .../generate_presentation_structure.py | 11 ++- .../utils/llm_calls/generate_slide_content.py | 62 ++++++++------- 8 files changed, 126 insertions(+), 65 deletions(-) diff --git a/servers/fastapi/api/v1/ppt/endpoints/outlines.py b/servers/fastapi/api/v1/ppt/endpoints/outlines.py index 5f485b7c..dc7f813d 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/outlines.py +++ b/servers/fastapi/api/v1/ppt/endpoints/outlines.py @@ -49,7 +49,10 @@ async def stream_outlines( slides=[chunk.to_slide_outline() for chunk in chunks] ) except Exception as e: - print(e) + raise HTTPException( + status_code=400, + detail="Failed to generate presentation outlines. Please try again.", + ) else: additional_context = "\n\n".join(documents) @@ -60,6 +63,7 @@ async def stream_outlines( presentation.n_slides, presentation.language, additional_context, + presentation.instruction, ): # Give control to the event loop await asyncio.sleep(0) diff --git a/servers/fastapi/api/v1/ppt/endpoints/presentation.py b/servers/fastapi/api/v1/ppt/endpoints/presentation.py index 440f7c89..da7c749b 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/presentation.py +++ b/servers/fastapi/api/v1/ppt/endpoints/presentation.py @@ -108,20 +108,22 @@ async def get_all_presentations(sql_session: AsyncSession = Depends(get_async_se @PRESENTATION_ROUTER.post("/create", response_model=PresentationModel) async def create_presentation( - prompt: Annotated[str, Body()], + content: Annotated[str, Body()], n_slides: Annotated[int, Body()], language: Annotated[str, Body()], file_paths: Annotated[Optional[List[str]], Body()] = None, + instruction: Annotated[Optional[str], Body()] = None, sql_session: AsyncSession = Depends(get_async_session), ): presentation_id = get_random_uuid() presentation = PresentationModel( id=presentation_id, - prompt=prompt, + content=content, n_slides=n_slides, language=language, file_paths=file_paths, + instruction=instruction, ) sql_session.add(presentation) @@ -157,6 +159,7 @@ async def prepare_presentation( await generate_presentation_structure( presentation_outline=presentation_outline_model, presentation_layout=layout, + instruction=presentation.instruction, ) ) @@ -216,7 +219,10 @@ async def stream_presentation( slide_layout = layout.slides[slide_layout_index] slide_content = await get_slide_content_from_type_and_outline( - slide_layout, outline.slides[i], presentation.language + slide_layout, + outline.slides[i], + presentation.language, + presentation.instruction, ) slide = SlideModel( @@ -341,10 +347,11 @@ async def generate_presentation_api( if not presentation_outlines: presentation_outlines_text = "" async for chunk in generate_ppt_outline( - request.prompt, + request.content, request.n_slides, request.language, additional_context, + request.instruction, ): presentation_outlines_text += chunk @@ -376,6 +383,7 @@ async def generate_presentation_api( await generate_presentation_structure( presentation_outlines, layout_model, + request.instruction, ) ) @@ -391,12 +399,13 @@ async def generate_presentation_api( # 6. Create PresentationModel presentation = PresentationModel( id=presentation_id, - prompt=request.prompt, + content=request.content, n_slides=request.n_slides, language=request.language, outlines=presentation_outlines.model_dump(), layout=layout_model.model_dump(), structure=presentation_structure.model_dump(), + instruction=request.instruction, ) image_generation_service = ImageGenerationService(get_images_directory()) @@ -409,7 +418,7 @@ async def generate_presentation_api( slide_layout = layout_model.slides[slide_layout_index] print(f"Generating content for slide {i} with layout {slide_layout.id}") slide_content = await get_slide_content_from_type_and_outline( - slide_layout, outlines[i], request.language + slide_layout, outlines[i], request.language, request.instruction ) slide = SlideModel( presentation=presentation_id, diff --git a/servers/fastapi/models/generate_presentation_request.py b/servers/fastapi/models/generate_presentation_request.py index a38f6a2e..d210c4a7 100644 --- a/servers/fastapi/models/generate_presentation_request.py +++ b/servers/fastapi/models/generate_presentation_request.py @@ -3,7 +3,10 @@ from pydantic import BaseModel, Field class GeneratePresentationRequest(BaseModel): - prompt: str = Field(..., description="The prompt for generating the presentation") + content: str = Field(..., description="The content for generating the presentation") + instruction: Optional[str] = Field( + default=None, description="The instruction for generating the presentation" + ) n_slides: int = Field(default=8, description="Number of slides to generate") language: str = Field( default="English", description="Language for the presentation" diff --git a/servers/fastapi/models/presentation_with_slides.py b/servers/fastapi/models/presentation_with_slides.py index 08bc6fdb..c6edde76 100644 --- a/servers/fastapi/models/presentation_with_slides.py +++ b/servers/fastapi/models/presentation_with_slides.py @@ -12,7 +12,7 @@ from models.sql.slide import SlideModel class PresentationWithSlides(BaseModel): id: str - prompt: str + content: str n_slides: int language: str title: Optional[str] = None @@ -21,6 +21,7 @@ class PresentationWithSlides(BaseModel): updated_at: datetime layout: Optional[PresentationLayoutModel] structure: Optional[PresentationStructureModel] + instruction: Optional[str] = None slides: List[SlideModel] def to_presentation_model(self) -> PresentationModel: diff --git a/servers/fastapi/models/sql/presentation.py b/servers/fastapi/models/sql/presentation.py index c0b65c64..e565d9a0 100644 --- a/servers/fastapi/models/sql/presentation.py +++ b/servers/fastapi/models/sql/presentation.py @@ -1,6 +1,6 @@ from datetime import datetime from typing import List, Optional -from sqlalchemy import JSON, Column, DateTime +from sqlalchemy import JSON, Column, DateTime, String from sqlmodel import Field, SQLModel from models.presentation_layout import PresentationLayoutModel @@ -11,7 +11,7 @@ from utils.randomizers import get_random_uuid class PresentationModel(SQLModel, table=True): id: str = Field(primary_key=True) - prompt: str + content: str n_slides: int language: str title: Optional[str] = None @@ -21,11 +21,12 @@ class PresentationModel(SQLModel, table=True): updated_at: datetime = Field(sa_column=Column(DateTime, default=datetime.now)) layout: Optional[dict] = Field(sa_column=Column(JSON), default=None) structure: Optional[dict] = Field(sa_column=Column(JSON), default=None) + instruction: Optional[str] = Field(sa_column=Column(String), default=None) def get_new_presentation(self): return PresentationModel( id=get_random_uuid(), - prompt=self.prompt, + content=self.content, n_slides=self.n_slides, language=self.language, title=self.title, @@ -33,6 +34,7 @@ class PresentationModel(SQLModel, table=True): outlines=self.outlines, layout=self.layout, structure=self.structure, + instruction=self.instruction, ) def get_presentation_outline(self): diff --git a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py index 9212edc1..7c57e734 100644 --- a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py +++ b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py @@ -7,47 +7,64 @@ from services.llm_client import LLMClient from utils.get_dynamic_models import get_presentation_outline_model_with_n_slides from utils.llm_provider import get_model -system_prompt = """ - You are an expert presentation creator. Generate structured presentations based on user requirements and format them according to the specified JSON schema with markdown content. - Try to use available tools for better results. - - - Provide content for each slide in markdown format. - - Make sure that flow of the presentation is logical and consistent. - - Place greater emphasis on numerical data. - - If Additional Information is provided, divide it into slides. - - Make sure no images are provided in the content. - - Make sure that content follows language guidelines. -""" - - -def get_user_prompt(prompt: str, n_slides: int, language: str, content: str): +def get_system_prompt(instruction: Optional[str] = None): return f""" - **Input:** - - Prompt: {prompt} - - Output Language: {language} - - Number of Slides: {n_slides} - - Current Date and Time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} - - Additional Information: {content} + You are an expert presentation creator. Generate structured presentations based on user requirements and format them according to the specified JSON schema with markdown content. + + Try to use available tools for better results. + + - Provide content for each slide in markdown format. + - Make sure that flow of the presentation is logical and consistent. + - Place greater emphasis on numerical data. + - If Additional Information is provided, divide it into slides. + - Make sure no images are provided in the content. + - Make sure that content follows language guidelines. + + {"# User Instruction:" if instruction else ""} + {instruction or ""} """ -def get_messages(prompt: str, n_slides: int, language: str, content: str): +def get_user_prompt( + content: str, + n_slides: int, + language: str, + additional_context: Optional[str] = None, +): + return f""" + **Input:** + - User provided content: {content} + - Output Language: {language} + - Number of Slides: {n_slides} + - Current Date and Time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} + - Additional Information: {additional_context or ""} + """ + + +def get_messages( + content: str, + n_slides: int, + language: str, + additional_context: Optional[str] = None, + instruction: Optional[str] = None, +): return [ LLMSystemMessage( - content=system_prompt, + content=get_system_prompt(instruction), ), LLMUserMessage( - content=get_user_prompt(prompt, n_slides, language, content), + content=get_user_prompt(content, n_slides, language, additional_context), ), ] async def generate_ppt_outline( - prompt: Optional[str], + content: str, n_slides: int, language: Optional[str] = None, - content: Optional[str] = None, + additional_context: Optional[str] = None, + instruction: Optional[str] = None, ): model = get_model() response_model = get_presentation_outline_model_with_n_slides(n_slides) @@ -56,7 +73,13 @@ async def generate_ppt_outline( async for chunk in client.stream_structured( model, - get_messages(prompt, n_slides, language, content), + get_messages( + content, + n_slides, + language, + additional_context, + instruction, + ), response_model.model_json_schema(), strict=True, tools=[SearchWebTool] if client.enable_web_grounding() else None, diff --git a/servers/fastapi/utils/llm_calls/generate_presentation_structure.py b/servers/fastapi/utils/llm_calls/generate_presentation_structure.py index 1bfc0cd0..42a398fc 100644 --- a/servers/fastapi/utils/llm_calls/generate_presentation_structure.py +++ b/servers/fastapi/utils/llm_calls/generate_presentation_structure.py @@ -1,3 +1,4 @@ +from typing import Optional from models.llm_message import LLMSystemMessage, LLMUserMessage from models.presentation_layout import PresentationLayoutModel from models.presentation_outline_model import PresentationOutlineModel @@ -8,7 +9,10 @@ from models.presentation_structure_model import PresentationStructureModel def get_messages( - presentation_layout: PresentationLayoutModel, n_slides: int, data: str + presentation_layout: PresentationLayoutModel, + n_slides: int, + data: str, + instruction: Optional[str] = None, ): return [ LLMSystemMessage( @@ -43,6 +47,9 @@ def get_messages( **Trust your design instincts. Focus on creating the most effective presentation for the content and audience.** + {"# User Instruction:" if instruction else ""} + {instruction or ""} + Select layout index for each of the {n_slides} slides based on what will best serve the presentation's goals. """, ), @@ -57,6 +64,7 @@ def get_messages( async def generate_presentation_structure( presentation_outline: PresentationOutlineModel, presentation_layout: PresentationLayoutModel, + instruction: Optional[str] = None, ) -> PresentationStructureModel: client = LLMClient() @@ -71,6 +79,7 @@ async def generate_presentation_structure( presentation_layout, len(presentation_outline.slides), presentation_outline.to_string(), + instruction, ), response_format=response_model.model_json_schema(), strict=True, diff --git a/servers/fastapi/utils/llm_calls/generate_slide_content.py b/servers/fastapi/utils/llm_calls/generate_slide_content.py index 33bdd993..ce51baf4 100644 --- a/servers/fastapi/utils/llm_calls/generate_slide_content.py +++ b/servers/fastapi/utils/llm_calls/generate_slide_content.py @@ -1,4 +1,5 @@ from datetime import datetime +from typing import Optional from models.llm_message import LLMSystemMessage, LLMUserMessage from models.presentation_layout import SlideLayoutModel from models.presentation_outline_model import SlideOutlineModel @@ -6,32 +7,37 @@ from services.llm_client import LLMClient from utils.llm_provider import get_model from utils.schema_utils import add_field_in_schema, remove_fields_from_schema -system_prompt = """ - Generate structured slide based on provided outline, follow mentioned steps and notes and provide structured output. - # Steps - 1. Analyze the outline. - 2. Generate structured slide based on the outline. - 3. Generate speaker note that is simple, clear, concise and to the point. +def get_system_prompt(instruction: Optional[str] = None): + return f""" + Generate structured slide based on provided outline, follow mentioned steps and notes and provide structured output. - # Notes - - Slide body should not use words like "This slide", "This presentation". - - Rephrase the slide body to make it flow naturally. - - Only use markdown to highlight important points. - - Make sure to follow language guidelines. - - Speaker note should be normal text, not markdown. - - Strictly follow the max and min character limit for every property in the slide. - - Never ever go over the max character limit. Limit your narration to make sure you never go over the max character limit. - - Number of items should not be more than max number of items specified in slide schema. If you have to put multiple points then merge them to obey max numebr of items. + # Steps + 1. Analyze the outline. + 2. Generate structured slide based on the outline. + 3. Generate speaker note that is simple, clear, concise and to the point. - # Image and Icon Output Format - image: { - __image_prompt__: string, - } - icon: { - __icon_query__: string, - } -""" + # Notes + - Slide body should not use words like "This slide", "This presentation". + - Rephrase the slide body to make it flow naturally. + - Only use markdown to highlight important points. + - Make sure to follow language guidelines. + - Speaker note should be normal text, not markdown. + - Strictly follow the max and min character limit for every property in the slide. + - Never ever go over the max character limit. Limit your narration to make sure you never go over the max character limit. + - Number of items should not be more than max number of items specified in slide schema. If you have to put multiple points then merge them to obey max numebr of items. + + # Image and Icon Output Format + image: {{ + __image_prompt__: string, + }} + icon: {{ + __icon_query__: string, + }} + + {"# User Instruction:" if instruction else ""} + {instruction or ""} + """ def get_user_prompt(outline: str, language: str): @@ -50,11 +56,11 @@ def get_user_prompt(outline: str, language: str): """ -def get_messages(outline: str, language: str): +def get_messages(outline: str, language: str, instruction: Optional[str] = None): return [ LLMSystemMessage( - content=system_prompt, + content=get_system_prompt(instruction), ), LLMUserMessage( content=get_user_prompt(outline, language), @@ -63,7 +69,10 @@ def get_messages(outline: str, language: str): async def get_slide_content_from_type_and_outline( - slide_layout: SlideLayoutModel, outline: SlideOutlineModel, language: str + slide_layout: SlideLayoutModel, + outline: SlideOutlineModel, + language: str, + instruction: Optional[str] = None, ): client = LLMClient() model = get_model() @@ -89,6 +98,7 @@ async def get_slide_content_from_type_and_outline( messages=get_messages( outline.content, language, + instruction, ), response_format=response_schema, strict=False,