Merge branch 'feat/custom_schema_and_layout' of github.com:presenton/presenton into feat/custom_schema_and_layout
This commit is contained in:
commit
4707ab276d
7 changed files with 137 additions and 14 deletions
|
|
@ -29,6 +29,7 @@
|
|||
* ✅ **API Presentation Generation** — Host as API to generate presentations over requests
|
||||
* ✅ **Ollama Support** — Run open-source models locally with Ollama integration
|
||||
* ✅ **OpenAI API Compatibility** — Use any OpenAI-compatible API endpoint with your own models
|
||||
* ✅ **Versatile Image Generation** — Choose from DALL-E 3, Gemini Flash, Pexels, or Pixabay for your visuals
|
||||
* ✅ **Runs Locally** — All code runs on your device
|
||||
* ✅ **Privacy-First** — No tracking, no data stored by us
|
||||
* ✅ **Flexible** — Generate presentations from prompts or outlines
|
||||
|
|
@ -74,7 +75,7 @@ You may want to directly provide your API KEYS as environment variables and keep
|
|||
You can also set the following environment variables to customize the image generation provider and API keys:
|
||||
|
||||
- **IMAGE_PROVIDER=[pexels/pixabay/gemini_flash/dall-e-3]**: Select the image provider of your choice.
|
||||
- Defaults to **dall-e-3** for OpenAI models and **gemini_flash** for Google models if not set.
|
||||
- Defaults to **dall-e-3** for OpenAI models, **gemini_flash** for Google models if not set.
|
||||
- **PEXELS_API_KEY=[Your Pexels API Key]**: Required if using **pexels** as the image provider.
|
||||
- **PIXABAY_API_KEY=[Your Pixabay API Key]**: Required if using **pixabay** as the image provider.
|
||||
- **GOOGLE_API_KEY=[Your Google API Key]**: Required if using **gemini_flash** as the image provider.
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
from typing import Annotated
|
||||
from typing import Annotated, Optional
|
||||
from fastapi import APIRouter, Body, HTTPException
|
||||
|
||||
from models.sql.presentation import PresentationModel
|
||||
from models.sql.slide import SlideModel
|
||||
from services.database import get_sql_session
|
||||
from utils.llm_calls.edit_slide import get_edited_slide_content
|
||||
from utils.llm_calls.edit_slide_html import get_edited_slide_html
|
||||
from utils.llm_calls.select_slide_type_on_edit import get_slide_layout_from_prompt
|
||||
from utils.process_slides import process_old_and_new_slides_and_fetch_assets
|
||||
from utils.randomizers import get_random_uuid
|
||||
|
|
@ -14,10 +15,7 @@ SLIDE_ROUTER = APIRouter(prefix="/slide", tags=["Slide"])
|
|||
|
||||
|
||||
@SLIDE_ROUTER.post("/edit")
|
||||
async def edit_slide(
|
||||
id: Annotated[str, Body()],
|
||||
prompt: Annotated[str, Body()]
|
||||
):
|
||||
async def edit_slide(id: Annotated[str, Body()], prompt: Annotated[str, Body()]):
|
||||
|
||||
with get_sql_session() as sql_session:
|
||||
slide = sql_session.get(SlideModel, id)
|
||||
|
|
@ -53,3 +51,33 @@ async def edit_slide(
|
|||
sql_session.refresh(slide)
|
||||
|
||||
return slide
|
||||
|
||||
|
||||
@SLIDE_ROUTER.post("/edit-html", response_model=SlideModel)
|
||||
async def edit_slide_html(
|
||||
id: Annotated[str, Body()],
|
||||
prompt: Annotated[str, Body()],
|
||||
html: Annotated[Optional[str], Body()] = None,
|
||||
):
|
||||
with get_sql_session() as sql_session:
|
||||
slide = sql_session.get(SlideModel, id)
|
||||
if not slide:
|
||||
raise HTTPException(status_code=404, detail="Slide not found")
|
||||
|
||||
html_to_edit = html or slide.html_content
|
||||
if not html_to_edit:
|
||||
raise HTTPException(status_code=400, detail="No HTML to edit")
|
||||
|
||||
edited_slide_html = await get_edited_slide_html(prompt, html_to_edit)
|
||||
|
||||
# Always assign a new unique id to the slide
|
||||
# This is to ensure that the nextjs can track slide updates
|
||||
slide.id = get_random_uuid()
|
||||
|
||||
with get_sql_session() as sql_session:
|
||||
sql_session.add(slide)
|
||||
slide.html_content = edited_slide_html
|
||||
sql_session.commit()
|
||||
sql_session.refresh(slide)
|
||||
|
||||
return slide
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from typing import Optional
|
||||
from sqlmodel import SQLModel, Field, Column, JSON
|
||||
|
||||
from utils.randomizers import get_random_uuid
|
||||
|
|
@ -10,3 +11,4 @@ class SlideModel(SQLModel, table=True):
|
|||
layout: str
|
||||
index: int
|
||||
content: dict = Field(sa_column=Column(JSON))
|
||||
html_content: Optional[str]
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ from models.sql.slide import SlideModel
|
|||
from google.genai.types import GenerateContentConfig
|
||||
from utils.llm_provider import (
|
||||
get_google_llm_client,
|
||||
get_large_model,
|
||||
get_llm_client,
|
||||
get_small_model,
|
||||
is_google_selected,
|
||||
)
|
||||
from utils.schema_utils import remove_fields_from_schema
|
||||
|
|
@ -58,7 +58,7 @@ async def get_edited_slide_content(
|
|||
slide: SlideModel,
|
||||
language: Optional[str] = None,
|
||||
):
|
||||
model = get_small_model()
|
||||
model = get_large_model()
|
||||
response_schema = remove_fields_from_schema(
|
||||
slide_layout.json_schema, ["__image_url__", "__icon_url__"]
|
||||
)
|
||||
|
|
|
|||
93
servers/fastapi/utils/llm_calls/edit_slide_html.py
Normal file
93
servers/fastapi/utils/llm_calls/edit_slide_html.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
import asyncio
|
||||
from typing import Optional
|
||||
from google.genai.types import GenerateContentConfig
|
||||
from utils.llm_provider import (
|
||||
get_google_llm_client,
|
||||
get_large_model,
|
||||
is_google_selected,
|
||||
get_llm_client,
|
||||
)
|
||||
|
||||
system_prompt = """
|
||||
You are an expert HTML slide editor. Your task is to modify slide HTML content based on user prompts while maintaining proper structure, styling, and functionality.
|
||||
|
||||
Guidelines:
|
||||
1. **Preserve Structure**: Maintain the overall HTML structure, including essential containers, classes, and IDs
|
||||
2. **Content Updates**: Modify text, images, lists, and other content elements as requested
|
||||
3. **Style Consistency**: Keep existing CSS classes and styling unless specifically asked to change them
|
||||
4. **Responsive Design**: Ensure modifications work across different screen sizes
|
||||
5. **Accessibility**: Maintain proper semantic HTML and accessibility attributes
|
||||
6. **Clean Output**: Return only the modified HTML without explanations unless errors occur
|
||||
|
||||
Common Edit Types:
|
||||
- Text content changes (headings, paragraphs, lists)
|
||||
- Image updates (src, alt text, captions)
|
||||
- Layout modifications (adding/removing sections)
|
||||
- Style adjustments (colors, fonts, spacing via classes)
|
||||
- Interactive elements (buttons, links, forms)
|
||||
|
||||
Error Handling:
|
||||
- If the HTML structure is invalid, fix it while making requested changes
|
||||
- If a request would break functionality, suggest an alternative approach
|
||||
- For unclear prompts, make reasonable assumptions and note any ambiguities
|
||||
|
||||
Output Format:
|
||||
Return the complete modified HTML. If the original HTML contains <style> or <script> tags, preserve them unless specifically asked to modify.
|
||||
"""
|
||||
|
||||
|
||||
def get_user_prompt(prompt: str, html: str):
|
||||
return f"""
|
||||
Please edit the following slide HTML based on this prompt:
|
||||
|
||||
**Edit Request:** {prompt}
|
||||
|
||||
**Current HTML:**
|
||||
```html
|
||||
{html}
|
||||
```
|
||||
|
||||
Return the modified HTML with your changes applied.
|
||||
"""
|
||||
|
||||
|
||||
async def get_edited_slide_html(prompt: str, html: str):
|
||||
model = get_large_model()
|
||||
llm_response = None
|
||||
if is_google_selected():
|
||||
client = get_google_llm_client()
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=model,
|
||||
contents=[get_user_prompt(prompt, html)],
|
||||
config=GenerateContentConfig(
|
||||
system_instruction=system_prompt,
|
||||
response_mime_type="text/plain",
|
||||
),
|
||||
)
|
||||
llm_response = response.text
|
||||
else:
|
||||
client = get_llm_client()
|
||||
response = await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": get_user_prompt(prompt, html)},
|
||||
],
|
||||
)
|
||||
llm_response = response.choices[0].message.content
|
||||
|
||||
if not llm_response:
|
||||
return html
|
||||
|
||||
return extract_html_from_response(llm_response) or html
|
||||
|
||||
|
||||
def extract_html_from_response(response_text: str) -> Optional[str]:
|
||||
start_index = response_text.find("<")
|
||||
end_index = response_text.rfind(">")
|
||||
|
||||
if start_index != -1 and end_index != -1 and end_index > start_index:
|
||||
return response_text[start_index : end_index + 1]
|
||||
|
||||
return None
|
||||
|
|
@ -87,14 +87,13 @@ async def generate_ppt_outline(
|
|||
|
||||
if not is_google_selected():
|
||||
client = get_llm_client()
|
||||
async with client.beta.chat.completions.stream(
|
||||
async for response in await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=get_prompt_template(prompt, n_slides, language, content),
|
||||
stream=True,
|
||||
response_format=response_model,
|
||||
) as stream:
|
||||
async for event in stream:
|
||||
if isinstance(event, ContentDeltaEvent):
|
||||
yield event.delta
|
||||
):
|
||||
yield response.choices[0].delta
|
||||
|
||||
else:
|
||||
client = get_google_llm_client()
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ const GroupLayoutPreview = () => {
|
|||
</div>
|
||||
|
||||
{/* Layout Content */}
|
||||
<div className="bg-gray-50">
|
||||
<div className="bg-gray-50 aspect-video max-w-[1280px] w-full">
|
||||
<LayoutComponent data={sampleData} />
|
||||
</div>
|
||||
</Card>
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue