diff --git a/servers/fastapi/api/v1/ppt/endpoints/presentation.py b/servers/fastapi/api/v1/ppt/endpoints/presentation.py index 0c0f4565..6ae10f8b 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/presentation.py +++ b/servers/fastapi/api/v1/ppt/endpoints/presentation.py @@ -40,7 +40,10 @@ from utils.llm_calls.generate_presentation_structure import ( from utils.llm_calls.generate_slide_content import ( get_slide_content_from_type_and_outline, ) -from utils.process_slides import process_slide_and_fetch_assets +from utils.process_slides import ( + process_slide_add_placeholder_assets, + process_slide_and_fetch_assets, +) from utils.randomizers import get_random_uuid @@ -226,6 +229,9 @@ async def stream_presentation( ) slides.append(slide) + # This will mutate slide and add placeholder assets + process_slide_add_placeholder_assets(slide) + # This will mutate slide async_assets_generation_tasks.append( process_slide_and_fetch_assets( diff --git a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py index 9f330a3c..9212edc1 100644 --- a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py +++ b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py @@ -1,7 +1,8 @@ +from datetime import datetime from typing import Optional from models.llm_message import LLMSystemMessage, LLMUserMessage -from models.llm_tools import GetCurrentDatetimeTool, SearchWebTool +from models.llm_tools import SearchWebTool from services.llm_client import LLMClient from utils.get_dynamic_models import get_presentation_outline_model_with_n_slides from utils.llm_provider import get_model @@ -26,6 +27,7 @@ def get_user_prompt(prompt: str, n_slides: int, language: str, content: str): - Prompt: {prompt} - Output Language: {language} - Number of Slides: {n_slides} + - Current Date and Time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} - Additional Information: {content} """ @@ -52,13 +54,11 @@ async def generate_ppt_outline( client = LLMClient() - tools = [SearchWebTool, GetCurrentDatetimeTool] - async for chunk in client.stream_structured( model, get_messages(prompt, n_slides, language, content), response_model.model_json_schema(), strict=True, - tools=tools if client.enable_web_grounding() else None, + tools=[SearchWebTool] if client.enable_web_grounding() else None, ): yield chunk diff --git a/servers/fastapi/utils/llm_calls/generate_slide_content.py b/servers/fastapi/utils/llm_calls/generate_slide_content.py index e8f695a0..33bdd993 100644 --- a/servers/fastapi/utils/llm_calls/generate_slide_content.py +++ b/servers/fastapi/utils/llm_calls/generate_slide_content.py @@ -1,3 +1,4 @@ +from datetime import datetime from models.llm_message import LLMSystemMessage, LLMUserMessage from models.presentation_layout import SlideLayoutModel from models.presentation_outline_model import SlideOutlineModel @@ -16,17 +17,28 @@ system_prompt = """ # Notes - Slide body should not use words like "This slide", "This presentation". - Rephrase the slide body to make it flow naturally. - - Provide prompt to generate image on "__image_prompt__" property. - - Provide query to search icon on "__icon_query__" property. - Only use markdown to highlight important points. - Make sure to follow language guidelines. - Speaker note should be normal text, not markdown. - **Strictly follow the max and min character limit for every property in the slide.** + - Strictly follow the max and min character limit for every property in the slide. + - Never ever go over the max character limit. Limit your narration to make sure you never go over the max character limit. + - Number of items should not be more than max number of items specified in slide schema. If you have to put multiple points then merge them to obey max numebr of items. + + # Image and Icon Output Format + image: { + __image_prompt__: string, + } + icon: { + __icon_query__: string, + } """ def get_user_prompt(outline: str, language: str): return f""" + ## Current Date and Time + {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} + ## Icon Query And Image Prompt Language English diff --git a/servers/fastapi/utils/process_slides.py b/servers/fastapi/utils/process_slides.py index 81a0a6b5..91c2d829 100644 --- a/servers/fastapi/utils/process_slides.py +++ b/servers/fastapi/utils/process_slides.py @@ -170,3 +170,19 @@ async def process_old_and_new_slides_and_fetch_assets( set_dict_at_path(new_slide_content, new_icon_dict_paths[i], new_icon_dict) return new_assets + + +def process_slide_add_placeholder_assets(slide: SlideModel): + + image_paths = get_dict_paths_with_key(slide.content, "__image_prompt__") + icon_paths = get_dict_paths_with_key(slide.content, "__icon_query__") + + for image_path in image_paths: + image_dict = get_dict_at_path(slide.content, image_path) + image_dict["__image_url__"] = "/static/images/placeholder.jpg" + set_dict_at_path(slide.content, image_path, image_dict) + + for icon_path in icon_paths: + icon_dict = get_dict_at_path(slide.content, icon_path) + icon_dict["__icon_url__"] = "/static/icons/placeholder.png" + set_dict_at_path(slide.content, icon_path, icon_dict)