diff --git a/README.md b/README.md index b183a7ce..42727476 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ * ✅ **API Presentation Generation** — Host as API to generate presentations over requests * ✅ **Ollama Support** — Run open-source models locally with Ollama integration * ✅ **OpenAI API Compatibility** — Use any OpenAI-compatible API endpoint with your own models +* ✅ **Versatile Image Generation** — Choose from DALL-E 3, Gemini Flash, Pexels, or Pixabay for your visuals * ✅ **Runs Locally** — All code runs on your device * ✅ **Privacy-First** — No tracking, no data stored by us * ✅ **Flexible** — Generate presentations from prompts or outlines @@ -74,7 +75,7 @@ You may want to directly provide your API KEYS as environment variables and keep You can also set the following environment variables to customize the image generation provider and API keys: - **IMAGE_PROVIDER=[pexels/pixabay/gemini_flash/dall-e-3]**: Select the image provider of your choice. - - Defaults to **dall-e-3** for OpenAI models and **gemini_flash** for Google models if not set. + - Defaults to **dall-e-3** for OpenAI models, **gemini_flash** for Google models if not set. - **PEXELS_API_KEY=[Your Pexels API Key]**: Required if using **pexels** as the image provider. - **PIXABAY_API_KEY=[Your Pixabay API Key]**: Required if using **pixabay** as the image provider. - **GOOGLE_API_KEY=[Your Google API Key]**: Required if using **gemini_flash** as the image provider. diff --git a/servers/fastapi/api/v1/ppt/endpoints/presentation.py b/servers/fastapi/api/v1/ppt/endpoints/presentation.py index 36ba523c..9c24ca3d 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/presentation.py +++ b/servers/fastapi/api/v1/ppt/endpoints/presentation.py @@ -3,17 +3,23 @@ import json import os import random from typing import Annotated, List, Optional -import uuid +import uuid, aiohttp from fastapi import APIRouter, Body, HTTPException from fastapi.responses import StreamingResponse from sqlalchemy import delete from sqlmodel import select - +from models.presentation_outline_model import PresentationOutlineModel, SlideOutlineModel from models.pptx_models import PptxPresentationModel -from models.presentation_outline_model import SlideOutlineModel from models.presentation_layout import PresentationLayoutModel from models.presentation_structure_model import PresentationStructureModel from models.presentation_with_slides import PresentationWithSlides +from models.generate_presentation_api import ( + GeneratePresentationRequest, + PresentationAndPath, + PresentationPathAndEditPath, +) +from services.get_layout_by_name import get_layout_by_name +from utils.llm_calls.generate_presentation_outlines import generate_ppt_outline from models.sql.slide import SlideModel from models.sse_response import SSECompleteResponse, SSEResponse from services import TEMP_FILE_SERVICE @@ -297,3 +303,187 @@ async def create_pptx(pptx_model: Annotated[PptxPresentationModel, Body()]): pptx_creator.save(pptx_path) return pptx_path + +@PRESENTATION_ROUTER.post("/generate") +async def generate_presentation_api(data: Annotated[GeneratePresentationRequest, Body()]): + presentation_id = str(uuid.uuid4()) + print("**" * 40) + print(f"Generating presentation with ID: {presentation_id}") + print(f"Received Body as JSON: {data.model_dump_json(indent=2)}") + + # 1. Save uploaded files + file_paths = [] + if data.documents: + temp_dir = TEMP_FILE_SERVICE.create_temp_dir() + for upload in data.documents: + file_path = os.path.join(temp_dir, upload.filename) + with open(file_path, "wb") as f: + f.write(await upload.read()) + file_paths.append(file_path) + + # 2. Create Presentation Summary (if documents are provided) + summary = None + if file_paths: + temp_dir = TEMP_FILE_SERVICE.create_temp_dir(presentation_id) + documents_loader = DocumentsLoader(file_paths=file_paths) + await documents_loader.load_documents(temp_dir) + summary = await generate_document_summary(documents_loader.documents) + + # 3. Generate Outlines + presentation_content_text = "" + async for chunk in generate_ppt_outline( + data.prompt, + data.n_slides, + data.language, + summary, + ): + presentation_content_text += chunk + + presentation_content_json = json.loads(presentation_content_text) + presentation_content = PresentationOutlineModel(**presentation_content_json) + outlines = presentation_content.slides[:data.n_slides] + total_outlines = len(outlines) + + print("-" * 40) + print("Generated Presentation Content:", presentation_content_text) + print(f"Generated {total_outlines} outlines for the presentation") + print(f"Presentation Title: {presentation_content.title}") + + # 4. Parse Layouts + layout = await get_layout_by_name(data.layout) + total_slide_layouts = len(layout.slides) + + # 5. Generate Structure + if layout.ordered: + presentation_structure = layout.to_presentation_structure() + else: + presentation_structure: PresentationStructureModel = ( + await generate_presentation_structure( + presentation_outline=PresentationOutlineModel( + title=presentation_content.title, + slides=outlines, + notes=presentation_content.notes, + ), + presentation_layout=layout, + ) + ) + + presentation_structure.slides = presentation_structure.slides[:total_outlines] + for index in range(total_outlines): + random_slide_index = random.randint(0, total_slide_layouts - 1) + if index >= total_outlines: + presentation_structure.slides.append(random_slide_index) + continue + if presentation_structure.slides[index] >= total_slide_layouts: + presentation_structure.slides[index] = random_slide_index + + # 6. Create and Save PresentationModel + presentation = PresentationModel( + id=presentation_id, + prompt=data.prompt, + n_slides=data.n_slides, + language=data.language, + title=presentation_content.title, + summary=summary, + outlines=[each.model_dump() for each in outlines], + notes=presentation_content.notes, + layout=layout.model_dump(), + structure=presentation_structure.model_dump(), + ) + with get_sql_session() as sql_session: + sql_session.add(presentation) + sql_session.commit() + sql_session.refresh(presentation) + + # 7. Generate slide content and save slides + slides: List[SlideModel] = [] + slide_contents: List[dict] = [] + for i, slide_layout_index in enumerate(presentation_structure.slides): + slide_layout = layout.slides[slide_layout_index] + print(f"Generating content for slide {i} with layout {slide_layout.id}") + slide_content = await get_slide_content_from_type_and_outline( + slide_layout, outlines[i] + ) + print(f"Generated content for slide {i}: {json.dumps(slide_content, indent=2)}") + slide = SlideModel( + presentation=presentation_id, + layout_group=layout.name, + layout=slide_layout.id, + index=i, + content=slide_content, + ) + slides.append(slide) + slide_contents.append(slide_content) + + # Process slides to fetch assets (images, icons, etc.) + print("Processing slides to fetch assets") + for slide in slides: + try: + await process_slide_and_fetch_assets(slide) + print(f"Processed slide {slide.index} successfully") + except Exception as e: + print(f"Error processing slide {slide.index}: {e}") + + with get_sql_session() as sql_session: + sql_session.add_all(slides) + sql_session.commit() + + # 8. Export as PPTX + if data.export_as == "pptx": + print("-" * 40) + print("Exporting Presentation as PPTX") + + # Get the converted PPTX model from your existing Next.js service + async with aiohttp.ClientSession() as session: + async with session.get( + f"http://localhost/api/presentation_to_pptx_model?id={presentation_id}" + ) as response: + if response.status != 200: + error_text = await response.text() + print(f"Failed to get PPTX model: {error_text}") + raise HTTPException(status_code=500, detail="Failed to convert presentation to PPTX model") + pptx_model_data = await response.json() + print(f"Received PPTX model data: {json.dumps(pptx_model_data, indent=2)}") + + # Create PPTX file using the converted model + pptx_model = PptxPresentationModel(**pptx_model_data) + print(f"Creating PPTX with model: {pptx_model.model_dump_json(indent=2)}") + temp_dir = TEMP_FILE_SERVICE.create_temp_dir() + pptx_creator = PptxPresentationCreator(pptx_model, temp_dir) + await pptx_creator.create_ppt() + + export_directory = get_exports_directory() + pptx_path = os.path.join( + export_directory, f"{presentation_content.title}.pptx" + ) + pptx_creator.save(pptx_path) + + presentation_and_path = PresentationAndPath( + presentation_id=presentation_id, + path=pptx_path, + ) + else: + print("-" * 40) + print("Exporting Presentation as PDF") + + async with aiohttp.ClientSession() as session: + async with session.post( + "http://localhost/api/export-as-pdf", + json={ + "id": presentation_id, + "title": presentation_content.title, + }, + ) as response: + response_json = await response.json() + + print(f"Received PDF export response: {json.dumps(response_json, indent=2)}") + + presentation_and_path = PresentationAndPath( + presentation_id=presentation_id, + path=response_json["path"], + ) + + return PresentationPathAndEditPath( + **presentation_and_path.model_dump(), + edit_path=f"/presentation?id={presentation_id}", + ) diff --git a/servers/fastapi/api/v1/ppt/endpoints/slide.py b/servers/fastapi/api/v1/ppt/endpoints/slide.py index a6f9ee9a..c473fc03 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/slide.py +++ b/servers/fastapi/api/v1/ppt/endpoints/slide.py @@ -1,10 +1,11 @@ -from typing import Annotated +from typing import Annotated, Optional from fastapi import APIRouter, Body, HTTPException from models.sql.presentation import PresentationModel from models.sql.slide import SlideModel from services.database import get_sql_session from utils.llm_calls.edit_slide import get_edited_slide_content +from utils.llm_calls.edit_slide_html import get_edited_slide_html from utils.llm_calls.select_slide_type_on_edit import get_slide_layout_from_prompt from utils.process_slides import process_old_and_new_slides_and_fetch_assets from utils.randomizers import get_random_uuid @@ -14,10 +15,7 @@ SLIDE_ROUTER = APIRouter(prefix="/slide", tags=["Slide"]) @SLIDE_ROUTER.post("/edit") -async def edit_slide( - id: Annotated[str, Body()], - prompt: Annotated[str, Body()] -): +async def edit_slide(id: Annotated[str, Body()], prompt: Annotated[str, Body()]): with get_sql_session() as sql_session: slide = sql_session.get(SlideModel, id) @@ -53,3 +51,33 @@ async def edit_slide( sql_session.refresh(slide) return slide + + +@SLIDE_ROUTER.post("/edit-html", response_model=SlideModel) +async def edit_slide_html( + id: Annotated[str, Body()], + prompt: Annotated[str, Body()], + html: Annotated[Optional[str], Body()] = None, +): + with get_sql_session() as sql_session: + slide = sql_session.get(SlideModel, id) + if not slide: + raise HTTPException(status_code=404, detail="Slide not found") + + html_to_edit = html or slide.html_content + if not html_to_edit: + raise HTTPException(status_code=400, detail="No HTML to edit") + + edited_slide_html = await get_edited_slide_html(prompt, html_to_edit) + + # Always assign a new unique id to the slide + # This is to ensure that the nextjs can track slide updates + slide.id = get_random_uuid() + + with get_sql_session() as sql_session: + sql_session.add(slide) + slide.html_content = edited_slide_html + sql_session.commit() + sql_session.refresh(slide) + + return slide diff --git a/servers/fastapi/models/generate_presentation_api.py b/servers/fastapi/models/generate_presentation_api.py new file mode 100644 index 00000000..ea8c3d4d --- /dev/null +++ b/servers/fastapi/models/generate_presentation_api.py @@ -0,0 +1,19 @@ +from typing import List, Optional, Literal +from pydantic import BaseModel, Field +from fastapi import UploadFile + +class GeneratePresentationRequest(BaseModel): + prompt: str + n_slides: int = Field(default=8, ge=5, le=15) + language: str = Field(default="English") + layout: str = Field(default="default") + documents: Optional[List[UploadFile]] = None + export_as: Literal["pptx", "pdf"] = Field(default="pptx") + + +class PresentationAndPath(BaseModel): + presentation_id: str + path: str + +class PresentationPathAndEditPath(PresentationAndPath): + edit_path: str diff --git a/servers/fastapi/models/sql/slide.py b/servers/fastapi/models/sql/slide.py index 2195350f..76ad01c5 100644 --- a/servers/fastapi/models/sql/slide.py +++ b/servers/fastapi/models/sql/slide.py @@ -1,3 +1,4 @@ +from typing import Optional from sqlmodel import SQLModel, Field, Column, JSON from utils.randomizers import get_random_uuid @@ -10,3 +11,4 @@ class SlideModel(SQLModel, table=True): layout: str index: int content: dict = Field(sa_column=Column(JSON)) + html_content: Optional[str] diff --git a/servers/fastapi/services/get_layout_by_name.py b/servers/fastapi/services/get_layout_by_name.py new file mode 100644 index 00000000..fdcf95bc --- /dev/null +++ b/servers/fastapi/services/get_layout_by_name.py @@ -0,0 +1,18 @@ +import aiohttp +from fastapi import HTTPException +from models.presentation_layout import PresentationLayoutModel +from typing import List + +async def get_layout_by_name(layout_name: str) -> PresentationLayoutModel: + url = f"http://localhost/api/layout?group={layout_name}" + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + if response.status != 200: + error_text = await response.text() + raise HTTPException( + status_code=404, + detail=f"Layout '{layout_name}' not found: {error_text}" + ) + layout_json = await response.json() + # Parse the JSON into your Pydantic model + return PresentationLayoutModel(**layout_json) diff --git a/servers/fastapi/tests/test_presentation_generation_api.py b/servers/fastapi/tests/test_presentation_generation_api.py new file mode 100644 index 00000000..5e5f9be5 --- /dev/null +++ b/servers/fastapi/tests/test_presentation_generation_api.py @@ -0,0 +1,189 @@ +from unittest.mock import patch, AsyncMock, MagicMock +import pytest +from fastapi.testclient import TestClient +from fastapi import FastAPI +from models.presentation_layout import PresentationLayoutModel +from models.presentation_structure_model import PresentationStructureModel +from api.v1.ppt.endpoints.presentation import PRESENTATION_ROUTER + +class MockAiohttpResponse: + def __init__(self, status=200, json_data=None): + self.status = status + self._json_data = json_data or {"path": "/tmp/exports/test.pdf"} + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + pass + + async def json(self): + return self._json_data + + async def text(self): + return str(self._json_data) + +class MockAiohttpSession: + def __init__(self, *args, **kwargs): + pass + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + pass + + def post(self, *args, **kwargs): + return MockAiohttpResponse() + + def get(self, *args, **kwargs): + pptx_model_data = { + "slides": [], + "title": "Test", + "notes": [], + "layout": {}, + "structure": {}, + } + return MockAiohttpResponse(json_data=pptx_model_data) + +@pytest.fixture +def app(): + app = FastAPI() + app.include_router(PRESENTATION_ROUTER, prefix="/api/v1/ppt") + return app + +@pytest.fixture +def client(app): + return TestClient(app) + +@pytest.fixture +def mock_get_layout(): + async def _mock_get_layout_by_name(layout_name: str): + mock_slide = MagicMock() + mock_slide.name = "Mock Slide" + mock_slide.json_schema = {"title": "Mock Slide Title"} + mock_slide.description = "Mock slide description" + mock_layout = MagicMock(spec=PresentationLayoutModel) + mock_layout.name = layout_name + mock_layout.ordered = True + mock_layout.slides = [mock_slide] + mock_layout.model_dump = lambda: {} + mock_layout.to_presentation_structure = lambda: PresentationStructureModel( + slides=[index for index in range(len(mock_layout.slides))] + ) + def to_string(): + message = f"## Presentation Layout\n\n" + for index, slide in enumerate(mock_layout.slides): + message += f"### Slide Layout: {index}: \n" + message += f"- Name: {slide.name or slide.json_schema.get('title')} \n" + message += f"- Description: {slide.description} \n\n" + return message + mock_layout.to_string = to_string + return mock_layout + return _mock_get_layout_by_name + +async def mock_generate_ppt_outline(*args, **kwargs): + yield '{"title": "Test", "slides": [{"title": "Slide 1", "body": "Body 1"}], "notes": []}' + +@pytest.fixture(autouse=True) +def patch_presentation_api(monkeypatch, mock_get_layout): + # Patch all dependencies used in the API + patches = [ + patch('api.v1.ppt.endpoints.presentation.get_layout_by_name', new=AsyncMock(side_effect=mock_get_layout)), + patch('api.v1.ppt.endpoints.presentation.TEMP_FILE_SERVICE.create_temp_dir', return_value='/tmp/mockdir'), + patch('api.v1.ppt.endpoints.presentation.DocumentsLoader'), + patch('api.v1.ppt.endpoints.presentation.generate_document_summary', new_callable=AsyncMock, return_value="mock_summary"), + patch('api.v1.ppt.endpoints.presentation.generate_ppt_outline', side_effect=mock_generate_ppt_outline), + patch('api.v1.ppt.endpoints.presentation.get_sql_session'), + patch('api.v1.ppt.endpoints.presentation.get_slide_content_from_type_and_outline', new_callable=AsyncMock, return_value={"mock": "slide_content"}), + patch('api.v1.ppt.endpoints.presentation.process_slide_and_fetch_assets', new_callable=AsyncMock), + patch('api.v1.ppt.endpoints.presentation.get_exports_directory', return_value='/tmp/exports'), + patch('api.v1.ppt.endpoints.presentation.PptxPresentationCreator'), + patch('api.v1.ppt.endpoints.presentation.aiohttp.ClientSession', return_value=MockAiohttpSession()), + ] + mocks = [p.start() for p in patches] + + # Setup DocumentsLoader mock + docs_loader = mocks[2] + docs_loader.return_value.load_documents = AsyncMock() + docs_loader.return_value.documents = [] + + # Setup PptxPresentationCreator mock for pptx test + pptx_creator = mocks[9] + pptx_creator.return_value.create_ppt = AsyncMock() + pptx_creator.return_value.save = MagicMock() + + yield + + for p in patches: + p.stop() + +class TestPresentationGenerationAPI: + def test_generate_presentation_export_as_pdf(self, client): + response = client.post( + "/api/v1/ppt/presentation/generate", + json={ + "prompt": "Create a presentation about artificial intelligence and machine learning", + "n_slides": 5, + "language": "English", + "export_as": "pdf", + "layout": "general" + } + ) + assert response.status_code == 200 + assert "presentation_id" in response.json() + assert "pdf" in response.json()["path"] + + def test_generate_presentation_export_as_pptx(self, client): + response = client.post( + "/api/v1/ppt/presentation/generate", + json={ + "prompt": "Create a presentation about artificial intelligence and machine learning", + "n_slides": 5, + "language": "English", + "export_as": "pptx", + "layout": "general" + } + ) + assert response.status_code == 200 + assert "presentation_id" in response.json() + assert "pptx" in response.json()["path"] + + def test_generate_presentation_with_no_prompt(self, client): + response = client.post( + "/api/v1/ppt/presentation/generate", + json={ + "n_slides": 5, + "language": "English", + "export_as": "pdf", + "layout": "general" + } + ) + assert response.status_code == 422 + + + def test_generate_presentation_with_n_slides_less_than_one(self, client): + response = client.post( + "/api/v1/ppt/presentation/generate", + json={ + "prompt": "Create a presentation about artificial intelligence and machine learning", + "n_slides": 0, + "language": "English", + "export_as": "pdf", + "layout": "general" + } + ) + assert response.status_code == 422 + + def test_generate_presentation_with_invalid_export_type(self, client): + response = client.post( + "/api/v1/ppt/presentation/generate", + json={ + "prompt": "Create a presentation about artificial intelligence and machine learning", + "n_slides": 5, + "language": "English", + "export_as": "invalid_type", + "layout": "general" + } + ) + assert response.status_code == 422 diff --git a/servers/fastapi/utils/llm_calls/edit_slide.py b/servers/fastapi/utils/llm_calls/edit_slide.py index 17d7e4a6..20c87c53 100644 --- a/servers/fastapi/utils/llm_calls/edit_slide.py +++ b/servers/fastapi/utils/llm_calls/edit_slide.py @@ -7,8 +7,8 @@ from models.sql.slide import SlideModel from google.genai.types import GenerateContentConfig from utils.llm_provider import ( get_google_llm_client, + get_large_model, get_llm_client, - get_small_model, is_google_selected, ) from utils.schema_utils import remove_fields_from_schema @@ -58,7 +58,7 @@ async def get_edited_slide_content( slide: SlideModel, language: Optional[str] = None, ): - model = get_small_model() + model = get_large_model() response_schema = remove_fields_from_schema( slide_layout.json_schema, ["__image_url__", "__icon_url__"] ) diff --git a/servers/fastapi/utils/llm_calls/edit_slide_html.py b/servers/fastapi/utils/llm_calls/edit_slide_html.py new file mode 100644 index 00000000..b20f3cf7 --- /dev/null +++ b/servers/fastapi/utils/llm_calls/edit_slide_html.py @@ -0,0 +1,93 @@ +import asyncio +from typing import Optional +from google.genai.types import GenerateContentConfig +from utils.llm_provider import ( + get_google_llm_client, + get_large_model, + is_google_selected, + get_llm_client, +) + +system_prompt = """ + You are an expert HTML slide editor. Your task is to modify slide HTML content based on user prompts while maintaining proper structure, styling, and functionality. + + Guidelines: + 1. **Preserve Structure**: Maintain the overall HTML structure, including essential containers, classes, and IDs + 2. **Content Updates**: Modify text, images, lists, and other content elements as requested + 3. **Style Consistency**: Keep existing CSS classes and styling unless specifically asked to change them + 4. **Responsive Design**: Ensure modifications work across different screen sizes + 5. **Accessibility**: Maintain proper semantic HTML and accessibility attributes + 6. **Clean Output**: Return only the modified HTML without explanations unless errors occur + + Common Edit Types: + - Text content changes (headings, paragraphs, lists) + - Image updates (src, alt text, captions) + - Layout modifications (adding/removing sections) + - Style adjustments (colors, fonts, spacing via classes) + - Interactive elements (buttons, links, forms) + + Error Handling: + - If the HTML structure is invalid, fix it while making requested changes + - If a request would break functionality, suggest an alternative approach + - For unclear prompts, make reasonable assumptions and note any ambiguities + + Output Format: + Return the complete modified HTML. If the original HTML contains