feat(fastapi): adds endpoint to edit slide layout using its html

2025-07-25 03:31:10 +05:45 · 2025-07-25 03:31:10 +05:45 · af00557fd5
commit af00557fd5
parent 4d08a05f09
4 changed files with 130 additions and 7 deletions
--- a/servers/fastapi/api/v1/ppt/endpoints/slide.py
+++ b/servers/fastapi/api/v1/ppt/endpoints/slide.py
@ -1,10 +1,11 @@
-from typing import Annotated
+from typing import Annotated, Optional
 from fastapi import APIRouter, Body, HTTPException

 from models.sql.presentation import PresentationModel
 from models.sql.slide import SlideModel
 from services.database import get_sql_session
 from utils.llm_calls.edit_slide import get_edited_slide_content
+from utils.llm_calls.edit_slide_html import get_edited_slide_html
 from utils.llm_calls.select_slide_type_on_edit import get_slide_layout_from_prompt
 from utils.process_slides import process_old_and_new_slides_and_fetch_assets
 from utils.randomizers import get_random_uuid
@ -14,10 +15,7 @@ SLIDE_ROUTER = APIRouter(prefix="/slide", tags=["Slide"])


@SLIDE_ROUTER.post("/edit")
-async def edit_slide(
-    id: Annotated[str, Body()],
-    prompt: Annotated[str, Body()]
-):
+async def edit_slide(id: Annotated[str, Body()], prompt: Annotated[str, Body()]):

    with get_sql_session() as sql_session:
        slide = sql_session.get(SlideModel, id)
@ -53,3 +51,33 @@ async def edit_slide(
        sql_session.refresh(slide)

    return slide
+
+
+@SLIDE_ROUTER.post("/edit-html", response_model=SlideModel)
+async def edit_slide_html(
+    id: Annotated[str, Body()],
+    prompt: Annotated[str, Body()],
+    html: Annotated[Optional[str], Body()] = None,
+):
+    with get_sql_session() as sql_session:
+        slide = sql_session.get(SlideModel, id)
+        if not slide:
+            raise HTTPException(status_code=404, detail="Slide not found")
+
+    html_to_edit = html or slide.html_content
+    if not html_to_edit:
+        raise HTTPException(status_code=400, detail="No HTML to edit")
+
+    edited_slide_html = await get_edited_slide_html(prompt, html_to_edit)
+
+    # Always assign a new unique id to the slide
+    # This is to ensure that the nextjs can track slide updates
+    slide.id = get_random_uuid()
+
+    with get_sql_session() as sql_session:
+        sql_session.add(slide)
+        slide.html_content = edited_slide_html
+        sql_session.commit()
+        sql_session.refresh(slide)
+
+    return slide
--- a/servers/fastapi/models/sql/slide.py
+++ b/servers/fastapi/models/sql/slide.py
@ -1,3 +1,4 @@
+from typing import Optional
 from sqlmodel import SQLModel, Field, Column, JSON

 from utils.randomizers import get_random_uuid
@ -10,3 +11,4 @@ class SlideModel(SQLModel, table=True):
    layout: str
    index: int
    content: dict = Field(sa_column=Column(JSON))
+    html_content: Optional[str]
--- a/servers/fastapi/utils/llm_calls/edit_slide.py
+++ b/servers/fastapi/utils/llm_calls/edit_slide.py
@ -7,8 +7,8 @@ from models.sql.slide import SlideModel
 from google.genai.types import GenerateContentConfig
 from utils.llm_provider import (
    get_google_llm_client,
+    get_large_model,
    get_llm_client,
-    get_small_model,
    is_google_selected,
 )
 from utils.schema_utils import remove_fields_from_schema
@ -58,7 +58,7 @@ async def get_edited_slide_content(
    slide: SlideModel,
    language: Optional[str] = None,
 ):
-    model = get_small_model()
+    model = get_large_model()
    response_schema = remove_fields_from_schema(
        slide_layout.json_schema, ["__image_url__", "__icon_url__"]
    )
--- a/servers/fastapi/utils/llm_calls/edit_slide_html.py
+++ b/servers/fastapi/utils/llm_calls/edit_slide_html.py
@ -0,0 +1,93 @@
+import asyncio
+from typing import Optional
+from google.genai.types import GenerateContentConfig
+from utils.llm_provider import (
+    get_google_llm_client,
+    get_large_model,
+    is_google_selected,
+    get_llm_client,
+)
+
+system_prompt = """
+    You are an expert HTML slide editor. Your task is to modify slide HTML content based on user prompts while maintaining proper structure, styling, and functionality.
+
+    Guidelines:
+    1. **Preserve Structure**: Maintain the overall HTML structure, including essential containers, classes, and IDs
+    2. **Content Updates**: Modify text, images, lists, and other content elements as requested
+    3. **Style Consistency**: Keep existing CSS classes and styling unless specifically asked to change them
+    4. **Responsive Design**: Ensure modifications work across different screen sizes
+    5. **Accessibility**: Maintain proper semantic HTML and accessibility attributes
+    6. **Clean Output**: Return only the modified HTML without explanations unless errors occur
+
+    Common Edit Types:
+    - Text content changes (headings, paragraphs, lists)
+    - Image updates (src, alt text, captions)
+    - Layout modifications (adding/removing sections)
+    - Style adjustments (colors, fonts, spacing via classes)
+    - Interactive elements (buttons, links, forms)
+
+    Error Handling:
+    - If the HTML structure is invalid, fix it while making requested changes
+    - If a request would break functionality, suggest an alternative approach
+    - For unclear prompts, make reasonable assumptions and note any ambiguities
+
+    Output Format:
+    Return the complete modified HTML. If the original HTML contains <style> or <script> tags, preserve them unless specifically asked to modify.
+"""
+
+
+def get_user_prompt(prompt: str, html: str):
+    return f"""
+        Please edit the following slide HTML based on this prompt:
+
+        **Edit Request:** {prompt}
+
+        **Current HTML:**
+        ```html
+        {html}
+        ```
+
+        Return the modified HTML with your changes applied.
+    """
+
+
+async def get_edited_slide_html(prompt: str, html: str):
+    model = get_large_model()
+    llm_response = None
+    if is_google_selected():
+        client = get_google_llm_client()
+        response = await asyncio.to_thread(
+            client.models.generate_content,
+            model=model,
+            contents=[get_user_prompt(prompt, html)],
+            config=GenerateContentConfig(
+                system_instruction=system_prompt,
+                response_mime_type="text/plain",
+            ),
+        )
+        llm_response = response.text
+    else:
+        client = get_llm_client()
+        response = await client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": get_user_prompt(prompt, html)},
+            ],
+        )
+        llm_response = response.choices[0].message.content
+
+    if not llm_response:
+        return html
+
+    return extract_html_from_response(llm_response) or html
+
+
+def extract_html_from_response(response_text: str) -> Optional[str]:
+    start_index = response_text.find("<")
+    end_index = response_text.rfind(">")
+
+    if start_index != -1 and end_index != -1 and end_index > start_index:
+        return response_text[start_index : end_index + 1]
+
+    return None