feat(fastapi): adds endpoint to edit slide layout using its html

This commit is contained in:
sauravniraula 2025-07-25 03:31:10 +05:45
parent 4d08a05f09
commit af00557fd5
No known key found for this signature in database
GPG key ID: 60FCC1B5A5E83326
4 changed files with 130 additions and 7 deletions

View file

@ -1,10 +1,11 @@
from typing import Annotated
from typing import Annotated, Optional
from fastapi import APIRouter, Body, HTTPException
from models.sql.presentation import PresentationModel
from models.sql.slide import SlideModel
from services.database import get_sql_session
from utils.llm_calls.edit_slide import get_edited_slide_content
from utils.llm_calls.edit_slide_html import get_edited_slide_html
from utils.llm_calls.select_slide_type_on_edit import get_slide_layout_from_prompt
from utils.process_slides import process_old_and_new_slides_and_fetch_assets
from utils.randomizers import get_random_uuid
@ -14,10 +15,7 @@ SLIDE_ROUTER = APIRouter(prefix="/slide", tags=["Slide"])
@SLIDE_ROUTER.post("/edit")
async def edit_slide(
id: Annotated[str, Body()],
prompt: Annotated[str, Body()]
):
async def edit_slide(id: Annotated[str, Body()], prompt: Annotated[str, Body()]):
with get_sql_session() as sql_session:
slide = sql_session.get(SlideModel, id)
@ -53,3 +51,33 @@ async def edit_slide(
sql_session.refresh(slide)
return slide
@SLIDE_ROUTER.post("/edit-html", response_model=SlideModel)
async def edit_slide_html(
id: Annotated[str, Body()],
prompt: Annotated[str, Body()],
html: Annotated[Optional[str], Body()] = None,
):
with get_sql_session() as sql_session:
slide = sql_session.get(SlideModel, id)
if not slide:
raise HTTPException(status_code=404, detail="Slide not found")
html_to_edit = html or slide.html_content
if not html_to_edit:
raise HTTPException(status_code=400, detail="No HTML to edit")
edited_slide_html = await get_edited_slide_html(prompt, html_to_edit)
# Always assign a new unique id to the slide
# This is to ensure that the nextjs can track slide updates
slide.id = get_random_uuid()
with get_sql_session() as sql_session:
sql_session.add(slide)
slide.html_content = edited_slide_html
sql_session.commit()
sql_session.refresh(slide)
return slide

View file

@ -1,3 +1,4 @@
from typing import Optional
from sqlmodel import SQLModel, Field, Column, JSON
from utils.randomizers import get_random_uuid
@ -10,3 +11,4 @@ class SlideModel(SQLModel, table=True):
layout: str
index: int
content: dict = Field(sa_column=Column(JSON))
html_content: Optional[str]

View file

@ -7,8 +7,8 @@ from models.sql.slide import SlideModel
from google.genai.types import GenerateContentConfig
from utils.llm_provider import (
get_google_llm_client,
get_large_model,
get_llm_client,
get_small_model,
is_google_selected,
)
from utils.schema_utils import remove_fields_from_schema
@ -58,7 +58,7 @@ async def get_edited_slide_content(
slide: SlideModel,
language: Optional[str] = None,
):
model = get_small_model()
model = get_large_model()
response_schema = remove_fields_from_schema(
slide_layout.json_schema, ["__image_url__", "__icon_url__"]
)

View file

@ -0,0 +1,93 @@
import asyncio
from typing import Optional
from google.genai.types import GenerateContentConfig
from utils.llm_provider import (
get_google_llm_client,
get_large_model,
is_google_selected,
get_llm_client,
)
system_prompt = """
You are an expert HTML slide editor. Your task is to modify slide HTML content based on user prompts while maintaining proper structure, styling, and functionality.
Guidelines:
1. **Preserve Structure**: Maintain the overall HTML structure, including essential containers, classes, and IDs
2. **Content Updates**: Modify text, images, lists, and other content elements as requested
3. **Style Consistency**: Keep existing CSS classes and styling unless specifically asked to change them
4. **Responsive Design**: Ensure modifications work across different screen sizes
5. **Accessibility**: Maintain proper semantic HTML and accessibility attributes
6. **Clean Output**: Return only the modified HTML without explanations unless errors occur
Common Edit Types:
- Text content changes (headings, paragraphs, lists)
- Image updates (src, alt text, captions)
- Layout modifications (adding/removing sections)
- Style adjustments (colors, fonts, spacing via classes)
- Interactive elements (buttons, links, forms)
Error Handling:
- If the HTML structure is invalid, fix it while making requested changes
- If a request would break functionality, suggest an alternative approach
- For unclear prompts, make reasonable assumptions and note any ambiguities
Output Format:
Return the complete modified HTML. If the original HTML contains <style> or <script> tags, preserve them unless specifically asked to modify.
"""
def get_user_prompt(prompt: str, html: str):
return f"""
Please edit the following slide HTML based on this prompt:
**Edit Request:** {prompt}
**Current HTML:**
```html
{html}
```
Return the modified HTML with your changes applied.
"""
async def get_edited_slide_html(prompt: str, html: str):
model = get_large_model()
llm_response = None
if is_google_selected():
client = get_google_llm_client()
response = await asyncio.to_thread(
client.models.generate_content,
model=model,
contents=[get_user_prompt(prompt, html)],
config=GenerateContentConfig(
system_instruction=system_prompt,
response_mime_type="text/plain",
),
)
llm_response = response.text
else:
client = get_llm_client()
response = await client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": get_user_prompt(prompt, html)},
],
)
llm_response = response.choices[0].message.content
if not llm_response:
return html
return extract_html_from_response(llm_response) or html
def extract_html_from_response(response_text: str) -> Optional[str]:
start_index = response_text.find("<")
end_index = response_text.rfind(">")
if start_index != -1 and end_index != -1 and end_index > start_index:
return response_text[start_index : end_index + 1]
return None