From df0d4851b495402f388473b636fbda64675399c2 Mon Sep 17 00:00:00 2001
From: sauravniraula <developmentsaurav@gmail.com>
Date: Thu, 24 Jul 2025 22:18:30 +0545
Subject: [PATCH 1/4] docs(readme)

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index b183a7ce..42727476 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@
 * ✅ **API Presentation Generation** — Host as API to generate presentations over requests
 * ✅ **Ollama Support** — Run open-source models locally with Ollama integration
 * ✅ **OpenAI API Compatibility** — Use any OpenAI-compatible API endpoint with your own models
+* ✅ **Versatile Image Generation** — Choose from DALL-E 3, Gemini Flash, Pexels, or Pixabay for your visuals
 * ✅ **Runs Locally** — All code runs on your device
 * ✅ **Privacy-First** — No tracking, no data stored by us
 * ✅ **Flexible** — Generate presentations from prompts or outlines
@@ -74,7 +75,7 @@ You may want to directly provide your API KEYS as environment variables and keep
 You can also set the following environment variables to customize the image generation provider and API keys:
 
 - **IMAGE_PROVIDER=[pexels/pixabay/gemini_flash/dall-e-3]**: Select the image provider of your choice.
-  - Defaults to **dall-e-3** for OpenAI models and **gemini_flash** for Google models if not set.
+  - Defaults to **dall-e-3** for OpenAI models, **gemini_flash** for Google models if not set.
 - **PEXELS_API_KEY=[Your Pexels API Key]**: Required if using **pexels** as the image provider.
 - **PIXABAY_API_KEY=[Your Pixabay API Key]**: Required if using **pixabay** as the image provider.
 - **GOOGLE_API_KEY=[Your Google API Key]**: Required if using **gemini_flash** as the image provider.

From 30ae6ebff20a8c377dc14d8843b5a935cb4bc814 Mon Sep 17 00:00:00 2001
From: sauravniraula <developmentsaurav@gmail.com>
Date: Fri, 25 Jul 2025 02:19:12 +0545
Subject: [PATCH 2/4] chore(nextjs): wraps slide layouts with 16:9 box

---
 servers/nextjs/app/layout-preview/[slug]/page.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/servers/nextjs/app/layout-preview/[slug]/page.tsx b/servers/nextjs/app/layout-preview/[slug]/page.tsx
index 00dee152..fd093d1f 100644
--- a/servers/nextjs/app/layout-preview/[slug]/page.tsx
+++ b/servers/nextjs/app/layout-preview/[slug]/page.tsx
@@ -100,7 +100,7 @@ const GroupLayoutPreview = () => {
                                 </div>
 
                                 {/* Layout Content */}
-                                <div className="bg-gray-50">
+                                <div className="bg-gray-50 aspect-video max-w-[1280px] w-full">
                                     <LayoutComponent data={sampleData} />
                                 </div>
                             </Card>

From 4d08a05f09f8ecba90970ea3ae8961c4ddc07c52 Mon Sep 17 00:00:00 2001
From: sauravniraula <developmentsaurav@gmail.com>
Date: Fri, 25 Jul 2025 02:43:34 +0545
Subject: [PATCH 3/4] fix(fastapi): changes google genai stream to create with
 stream=True to solve json decode error

---
 .../utils/llm_calls/generate_presentation_outlines.py    | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py
index b35d84f5..90be3f9e 100644
--- a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py
+++ b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py
@@ -87,14 +87,13 @@ async def generate_ppt_outline(
 
     if not is_google_selected():
         client = get_llm_client()
-        async with client.beta.chat.completions.stream(
+        async for response in await client.chat.completions.create(
             model=model,
             messages=get_prompt_template(prompt, n_slides, language, content),
+            stream=True,
             response_format=response_model,
-        ) as stream:
-            async for event in stream:
-                if isinstance(event, ContentDeltaEvent):
-                    yield event.delta
+        ):
+            yield response.choices[0].delta
 
     else:
         client = get_google_llm_client()

From af00557fd5401edb2d52a13cf9b7cf708f3c4648 Mon Sep 17 00:00:00 2001
From: sauravniraula <developmentsaurav@gmail.com>
Date: Fri, 25 Jul 2025 03:31:10 +0545
Subject: [PATCH 4/4] feat(fastapi): adds endpoint to edit slide layout using
 its html

---
 servers/fastapi/api/v1/ppt/endpoints/slide.py | 38 +++++++-
 servers/fastapi/models/sql/slide.py           |  2 +
 servers/fastapi/utils/llm_calls/edit_slide.py |  4 +-
 .../utils/llm_calls/edit_slide_html.py        | 93 +++++++++++++++++++
 4 files changed, 130 insertions(+), 7 deletions(-)
 create mode 100644 servers/fastapi/utils/llm_calls/edit_slide_html.py

diff --git a/servers/fastapi/api/v1/ppt/endpoints/slide.py b/servers/fastapi/api/v1/ppt/endpoints/slide.py
index a6f9ee9a..c473fc03 100644
--- a/servers/fastapi/api/v1/ppt/endpoints/slide.py
+++ b/servers/fastapi/api/v1/ppt/endpoints/slide.py
@@ -1,10 +1,11 @@
-from typing import Annotated
+from typing import Annotated, Optional
 from fastapi import APIRouter, Body, HTTPException
 
 from models.sql.presentation import PresentationModel
 from models.sql.slide import SlideModel
 from services.database import get_sql_session
 from utils.llm_calls.edit_slide import get_edited_slide_content
+from utils.llm_calls.edit_slide_html import get_edited_slide_html
 from utils.llm_calls.select_slide_type_on_edit import get_slide_layout_from_prompt
 from utils.process_slides import process_old_and_new_slides_and_fetch_assets
 from utils.randomizers import get_random_uuid
@@ -14,10 +15,7 @@ SLIDE_ROUTER = APIRouter(prefix="/slide", tags=["Slide"])
 
 
 @SLIDE_ROUTER.post("/edit")
-async def edit_slide(
-    id: Annotated[str, Body()],
-    prompt: Annotated[str, Body()]
-):
+async def edit_slide(id: Annotated[str, Body()], prompt: Annotated[str, Body()]):
 
     with get_sql_session() as sql_session:
         slide = sql_session.get(SlideModel, id)
@@ -53,3 +51,33 @@ async def edit_slide(
         sql_session.refresh(slide)
 
     return slide
+
+
+@SLIDE_ROUTER.post("/edit-html", response_model=SlideModel)
+async def edit_slide_html(
+    id: Annotated[str, Body()],
+    prompt: Annotated[str, Body()],
+    html: Annotated[Optional[str], Body()] = None,
+):
+    with get_sql_session() as sql_session:
+        slide = sql_session.get(SlideModel, id)
+        if not slide:
+            raise HTTPException(status_code=404, detail="Slide not found")
+
+    html_to_edit = html or slide.html_content
+    if not html_to_edit:
+        raise HTTPException(status_code=400, detail="No HTML to edit")
+
+    edited_slide_html = await get_edited_slide_html(prompt, html_to_edit)
+
+    # Always assign a new unique id to the slide
+    # This is to ensure that the nextjs can track slide updates
+    slide.id = get_random_uuid()
+
+    with get_sql_session() as sql_session:
+        sql_session.add(slide)
+        slide.html_content = edited_slide_html
+        sql_session.commit()
+        sql_session.refresh(slide)
+
+    return slide
diff --git a/servers/fastapi/models/sql/slide.py b/servers/fastapi/models/sql/slide.py
index 2195350f..76ad01c5 100644
--- a/servers/fastapi/models/sql/slide.py
+++ b/servers/fastapi/models/sql/slide.py
@@ -1,3 +1,4 @@
+from typing import Optional
 from sqlmodel import SQLModel, Field, Column, JSON
 
 from utils.randomizers import get_random_uuid
@@ -10,3 +11,4 @@ class SlideModel(SQLModel, table=True):
     layout: str
     index: int
     content: dict = Field(sa_column=Column(JSON))
+    html_content: Optional[str]
diff --git a/servers/fastapi/utils/llm_calls/edit_slide.py b/servers/fastapi/utils/llm_calls/edit_slide.py
index 17d7e4a6..20c87c53 100644
--- a/servers/fastapi/utils/llm_calls/edit_slide.py
+++ b/servers/fastapi/utils/llm_calls/edit_slide.py
@@ -7,8 +7,8 @@ from models.sql.slide import SlideModel
 from google.genai.types import GenerateContentConfig
 from utils.llm_provider import (
     get_google_llm_client,
+    get_large_model,
     get_llm_client,
-    get_small_model,
     is_google_selected,
 )
 from utils.schema_utils import remove_fields_from_schema
@@ -58,7 +58,7 @@ async def get_edited_slide_content(
     slide: SlideModel,
     language: Optional[str] = None,
 ):
-    model = get_small_model()
+    model = get_large_model()
     response_schema = remove_fields_from_schema(
         slide_layout.json_schema, ["__image_url__", "__icon_url__"]
     )
diff --git a/servers/fastapi/utils/llm_calls/edit_slide_html.py b/servers/fastapi/utils/llm_calls/edit_slide_html.py
new file mode 100644
index 00000000..b20f3cf7
--- /dev/null
+++ b/servers/fastapi/utils/llm_calls/edit_slide_html.py
@@ -0,0 +1,93 @@
+import asyncio
+from typing import Optional
+from google.genai.types import GenerateContentConfig
+from utils.llm_provider import (
+    get_google_llm_client,
+    get_large_model,
+    is_google_selected,
+    get_llm_client,
+)
+
+system_prompt = """
+    You are an expert HTML slide editor. Your task is to modify slide HTML content based on user prompts while maintaining proper structure, styling, and functionality.
+
+    Guidelines:
+    1. **Preserve Structure**: Maintain the overall HTML structure, including essential containers, classes, and IDs
+    2. **Content Updates**: Modify text, images, lists, and other content elements as requested
+    3. **Style Consistency**: Keep existing CSS classes and styling unless specifically asked to change them
+    4. **Responsive Design**: Ensure modifications work across different screen sizes
+    5. **Accessibility**: Maintain proper semantic HTML and accessibility attributes
+    6. **Clean Output**: Return only the modified HTML without explanations unless errors occur
+
+    Common Edit Types:
+    - Text content changes (headings, paragraphs, lists)
+    - Image updates (src, alt text, captions)
+    - Layout modifications (adding/removing sections)
+    - Style adjustments (colors, fonts, spacing via classes)
+    - Interactive elements (buttons, links, forms)
+
+    Error Handling:
+    - If the HTML structure is invalid, fix it while making requested changes
+    - If a request would break functionality, suggest an alternative approach
+    - For unclear prompts, make reasonable assumptions and note any ambiguities
+
+    Output Format:
+    Return the complete modified HTML. If the original HTML contains <style> or <script> tags, preserve them unless specifically asked to modify.
+"""
+
+
+def get_user_prompt(prompt: str, html: str):
+    return f"""
+        Please edit the following slide HTML based on this prompt:
+
+        **Edit Request:** {prompt}
+
+        **Current HTML:**
+        ```html
+        {html}
+        ```
+
+        Return the modified HTML with your changes applied.
+    """
+
+
+async def get_edited_slide_html(prompt: str, html: str):
+    model = get_large_model()
+    llm_response = None
+    if is_google_selected():
+        client = get_google_llm_client()
+        response = await asyncio.to_thread(
+            client.models.generate_content,
+            model=model,
+            contents=[get_user_prompt(prompt, html)],
+            config=GenerateContentConfig(
+                system_instruction=system_prompt,
+                response_mime_type="text/plain",
+            ),
+        )
+        llm_response = response.text
+    else:
+        client = get_llm_client()
+        response = await client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": get_user_prompt(prompt, html)},
+            ],
+        )
+        llm_response = response.choices[0].message.content
+
+    if not llm_response:
+        return html
+
+    return extract_html_from_response(llm_response) or html
+
+
+def extract_html_from_response(response_text: str) -> Optional[str]:
+    start_index = response_text.find("<")
+    end_index = response_text.rfind(">")
+
+    if start_index != -1 and end_index != -1 and end_index > start_index:
+        return response_text[start_index : end_index + 1]
+
+    return None