Merge branch 'pdf-pptx-layout' of https://github.com/presenton/presenton into pdf-pptx-layout

merge
This commit is contained in:
Suraj Jha 2025-08-06 13:27:32 +05:45
commit 1aee51ca31
203 changed files with 11323 additions and 64098 deletions

View file

@ -1,11 +1,11 @@
.venv
.env
.next
node_modules
out
build
.git
.gitignore
tmp
debug
.fastembed_cache
servers/fastapi/tmp
servers/fastapi/debug
servers/fastapi/.venv
servers/nextjs/node_modules
servers/nextjs/.next
container.db

6
.gitignore vendored
View file

@ -11,4 +11,8 @@ app_data
tmp
debug
.fastembed_cache
my-doc.txt
generated_models
nltk
chroma
container.db

View file

@ -2,12 +2,10 @@ FROM python:3.11-slim-bookworm
# Install Node.js and npm
RUN apt-get update && apt-get install -y \
nginx \
curl \
redis-server
curl
# Install Node.js 20 using NodeSource repository
# Install Node.js 20 using NodeSource repository
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt-get install -y nodejs
@ -18,13 +16,17 @@ WORKDIR /app
# Set environment variables
ENV APP_DATA_DIRECTORY=/app_data
ENV TEMP_DIRECTORY=/tmp/presenton
ENV PYTHONPATH="${PYTHONPATH}:/app/servers/fastapi"
# Install ollama
RUN curl -fsSL https://ollama.com/install.sh | sh
# Install dependencies for FastAPI
COPY servers/fastapi/requirements.txt ./
RUN pip install -r requirements.txt
RUN pip install aiohttp aiomysql aiosqlite asyncpg fastapi[standard] \
pathvalidate pdfplumber nltk chromadb sqlmodel \
anthropic google-genai openai fastmcp
RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu
# Install dependencies for Next.js
WORKDIR /app/servers/nextjs
@ -43,18 +45,15 @@ RUN npm run build
WORKDIR /app
# Copy FastAPI and start script
# Copy FastAPI
COPY servers/fastapi/ ./servers/fastapi/
COPY start.js LICENSE NOTICE ./
# Copy nginx configuration
COPY nginx.conf /etc/nginx/nginx.conf
# Copy start script
COPY docker-start.sh /app/docker-start.sh
# Expose the port
EXPOSE 80
# Start the servers
CMD ["/bin/bash", "/app/docker-start.sh"]
CMD ["node", "/app/start.js"]

View file

@ -4,10 +4,6 @@ FROM python:3.11-slim-bookworm
RUN apt-get update && apt-get install -y \
nginx \
curl \
redis-server \
default-libmysqlclient-dev \
build-essential \
pkg-config \
libreoffice \
fontconfig \
imagemagick
@ -15,9 +11,9 @@ RUN apt-get update && apt-get install -y \
RUN sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /etc/ImageMagick-6/policy.xml
# Install Node.js 20 using NodeSource repository
# Install Node.js 20 using NodeSource repository
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt-get install -y nodejs
apt-get install -y nodejs
# Change working directory
@ -28,18 +24,21 @@ RUN ls -a
# Set environment variables
ENV APP_DATA_DIRECTORY=/app_data
ENV TEMP_DIRECTORY=/tmp/presenton
ENV PYTHONPATH="${PYTHONPATH}:/app/servers/fastapi"
# Install ollama
RUN curl -fsSL https://ollama.com/install.sh | sh
RUN curl -fsSL http://ollama.com/install.sh | sh
# Install dependencies for FastAPI
COPY servers/fastapi/requirements.txt ./
RUN pip install -r requirements.txt
RUN pip install aiohttp aiomysql aiosqlite asyncpg fastapi[standard] \
pathvalidate pdfplumber nltk chromadb sqlmodel \
anthropic google-genai openai fastmcp
RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu
# Install dependencies for Next.js
WORKDIR /node_dependencies
COPY servers/nextjs/package.json servers/nextjs/package-lock.json ./
RUN npm install
RUN npm install
# Install chrome for puppeteer
RUN npx puppeteer browsers install chrome@138.0.7204.94 --install-deps
@ -53,4 +52,4 @@ COPY nginx.conf /etc/nginx/nginx.conf
EXPOSE 80
# Start the servers
CMD ["/bin/bash", "/app/docker-dev-start.sh"]
CMD ["node", "/app/start.js", "--dev"]

116
README.md
View file

@ -33,17 +33,22 @@
## ✨ More Freedom with AI Presentations
* ✅ **Custom layouts/templates/themes** — Create custom layouts with HTML and Tailwind, support any presentation design
* ✅ **Bring Your Own Key** — Only pay for what you use. OpenAI, Gemini (More coming soon...)
* ✅ **API Presentation Generation** — Host as API to generate presentations over requests
* ✅ **Ollama Support** — Run open-source models locally with Ollama integration
* ✅ **OpenAI API Compatibility** — Use any OpenAI-compatible API endpoint with your own models
* ✅ **Versatile Image Generation** — Choose from DALL-E 3, Gemini Flash, Pexels, or Pixabay for your visuals
* ✅ **Runs Locally** — All code runs on your device
* ✅ **Privacy-First** — No tracking, no data stored by us
* ✅ **Flexible** — Generate presentations from prompts or outlines
* ✅ **Export Ready** — Save as PowerPoint (PPTX) and PDF
* ✅ **Fully Open-Source** — Apache 2.0 licensed
Presenton gives you complete control over your AI presentation workflow. Choose your models, customize your experience, and keep your data private.
* ✅ **Custom Layouts & Themes** — Create unlimited presentation designs with HTML and Tailwind CSS
* ✅ **Flexible Generation** — Build presentations from prompts or uploaded documents
* ✅ **Export Ready** — Save as PowerPoint (PPTX) and PDF with professional formatting
* ✅ **Bring Your Own Key** — Use your own API keys for OpenAI, Google Gemini, Anthropic Claude, or any compatible provider. Only pay for what you use, no hidden fees or subscriptions.
* ✅ **Ollama Integration** — Run open-source models locally with full privacy
* ✅ **OpenAI API Compatible** — Connect to any OpenAI-compatible endpoint with your own models
* ✅ **Multi-Provider Support** — Mix and match text and image generation providers
* ✅ **Versatile Image Generation** — Choose from DALL-E 3, Gemini Flash, Pexels, or Pixabay
* ✅ **Rich Media Support** — Icons, charts, and custom graphics for professional presentations
* ✅ **Runs Locally** — All processing happens on your device, no cloud dependencies
* ✅ **Privacy-First** — Zero tracking, no data stored by us, complete data sovereignty
* ✅ **API Deployment** — Host as your own API service for your team
* ✅ **Fully Open-Source** — Apache 2.0 licensed, inspect, modify, and contribute
* ✅ **Docker Ready** — One-command deployment with GPU support for local models
## Deploy on Cloud (one click deployment)
[![Deploy on Railway](https://railway.com/button.svg)](https://railway.com/deploy/presenton-ai-presentations?referralCode=ubp0kk)
@ -72,14 +77,20 @@ Open http://localhost:5000 on browser of your choice to use Presenton.
You may want to directly provide your API KEYS as environment variables and keep them hidden. You can set these environment variables to achieve it.
- **CAN_CHANGE_KEYS=[true/false]**: Set this to **false** if you want to keep API Keys hidden and make them unmodifiable.
- **LLM=[openai/google/ollama/custom]**: Select **LLM** of your choice.
- **LLM=[openai/google/anthropic/ollama/custom]**: Select **LLM** of your choice.
- **OPENAI_API_KEY=[Your OpenAI API Key]**: Provide this if **LLM** is set to **openai**
- **OPENAI_MODEL=[OpenAI Model ID]**: Provide this if **LLM** is set to **openai** (default: "gpt-4.1")
- **GOOGLE_API_KEY=[Your Google API Key]**: Provide this if **LLM** is set to **google**
- **GOOGLE_MODEL=[Google Model ID]**: Provide this if **LLM** is set to **google** (default: "models/gemini-2.0-flash")
- **ANTHROPIC_API_KEY=[Your Anthropic API Key]**: Provide this if **LLM** is set to **anthropic**
- **ANTHROPIC_MODEL=[Anthropic Model ID]**: Provide this if **LLM** is set to **anthropic** (default: "claude-3-5-sonnet-20241022")
- **OLLAMA_URL=[Custom Ollama URL]**: Provide this if you want to custom Ollama URL and **LLM** is set to **ollama**
- **OLLAMA_MODEL=[Ollama Model ID]**: Provide this if **LLM** is set to **ollama**
- **CUSTOM_LLM_URL=[Custom OpenAI Compatible URL]**: Provide this if **LLM** is set to **custom**
- **CUSTOM_LLM_API_KEY=[Custom OpenAI Compatible API KEY]**: Provide this if **LLM** is set to **custom**
- **CUSTOM_MODEL=[Custom Model ID]**: Provide this if **LLM** is set to **custom**
- **TOOL_CALLS=[Enable/Disable Tool Calls on Custom LLM]**: If **true**, **LLM** will use Tool Call instead of Json Schema for Structured Output.
- **DISABLE_THINKING=[Enable/Disable Thinking on Custom LLM]**: If **true**, Thinking will be disabled.
You can also set the following environment variables to customize the image generation provider and API keys:
@ -97,11 +108,21 @@ You can also set the following environment variables to customize the image gene
docker run -it --name presenton -p 5000:80 -e LLM="openai" -e OPENAI_API_KEY="******" -e IMAGE_PROVIDER="dall-e-3" -e CAN_CHANGE_KEYS="false" -v "./app_data:/app_data" ghcr.io/presenton/presenton:latest
```
### Using Google
```bash
docker run -it --name presenton -p 5000:80 -e LLM="google" -e GOOGLE_API_KEY="******" -e IMAGE_PROVIDER="gemini_flash" -e CAN_CHANGE_KEYS="false" -v "./app_data:/app_data" ghcr.io/presenton/presenton:latest
```
### Using Ollama
```bash
docker run -it --name presenton -p 5000:80 -e LLM="ollama" -e OLLAMA_MODEL="llama3.2:3b" -e IMAGE_PROVIDER="pexels" -e PEXELS_API_KEY="*******" -e CAN_CHANGE_KEYS="false" -v "./app_data:/app_data" ghcr.io/presenton/presenton:latest
```
### Using Anthropic
```bash
docker run -it --name presenton -p 5000:80 -e LLM="anthropic" -e ANTHROPIC_API_KEY="******" -e IMAGE_PROVIDER="pexels" -e PEXELS_API_KEY="******" -e CAN_CHANGE_KEYS="false" -v "./app_data:/app_data" ghcr.io/presenton/presenton:latest
```
### Using OpenAI Compatible API
```bash
docker run -it -p 5000:80 -e CAN_CHANGE_KEYS="false" -e LLM="custom" -e CUSTOM_LLM_URL="http://*****" -e CUSTOM_LLM_API_KEY="*****" -e CUSTOM_MODEL="llama3.2:3b" -e IMAGE_PROVIDER="pexels" -e PEXELS_API_KEY="********" -v "./app_data:/app_data" ghcr.io/presenton/presenton:latest
@ -182,10 +203,79 @@ For detailed info checkout [API documentation](https://docs.presenton.ai/using-p
- [Create Presentations from CSV using AI](https://docs.presenton.ai/tutorial/generate-presentation-from-csv)
- [Create Data Reports Using AI](https://docs.presenton.ai/tutorial/create-data-reports-using-ai)
## 🏗️ MCP Architecture Overview
![Demo](readme_assets/mcpdemo.gif)
Presenton is built on a modular architecture featuring a FastAPI backend and a Next.js frontend. At its core is the **MCP (Model Context Protocol) server**, which orchestrates the entire presentation generation workflow using a robust state machine. This architecture ensures flexibility, reliability, and extensibility.
### MCP Workflow Highlights
- **Session Management:** Each presentation runs in its own session for isolation and tracking.
- **Outline Generation:** Automatically creates outlines, with or without input files.
- **Layout Selection:** Choose from built-in or custom layouts.
- **Content & Asset Generation:** Generates slide text, images, and icons using your selected AI models.
- **Export Options:** Seamlessly export presentations as PDF or PPTX files.
All workflow logic and tool APIs are organized in the `app_mcp` package. The orchestrator handles state transitions and error management, making it easy to extend or customize.
#### Key Files & Directories
- `.vscode/mcp.json`: VS Code integration and MCP server configuration.
- `servers/fastapi/app_mcp/`: Backend workflow logic and tool registration.
---
## ⚡ Quick Start: VS Code Integration
1. **Configure MCP:** Make sure `.vscode/mcp.json` points to your running MCP server (see example below).
2. **Start a Presentation:** Use the VS Code command palette or chat to run `start_presentation` with your topic.
3. **Advance Workflow:** Use `continue_workflow` to progress through outline, layout, and slide generation steps.
4. **Export:** Use `export_presentation` to download your presentation as PDF or PPTX.
5. **Check Progress:** Use `get_status` at any time to view your workflow status.
#### Example `.vscode/mcp.json`
```jsonc
{
"servers": {
"my-mcp-server-5f58fb2c": {
"url": "http://localhost:5000/mcp/",
"type": "http"
}
},
"inputs": []
}
```
---
### 🗣️ Using Chat Commands in VS Code
You can interact with Presenton directly from the VS Code chat window:
- **Step-by-step Workflow:**
Type a prompt like:
```plaintext
I want to create a presentation on "Artificial Intelligence in Healthcare". Can you please show me the step by step and verify things to me so that I can be sure that the presentation is good?
```
- **Direct Commands:**
For a faster workflow, use direct commands such as:
```plaintext
Start a presentation on "Artificial Intelligence in Healthcare" with general layout and 10 slides.
```
This integration gives you full control—whether you want a guided, step-by-step experience or prefer to automate the entire process with a single command.
---
## Roadmap
- [x] Support for custom HTML templates by developers
- [x] Support for accessing custom templates over API
- [ ] Implement MCP server
- [x] Implement MCP server
- [ ] Ability for users to change system prompt
- [X] Support external SQL database

View file

@ -13,13 +13,18 @@ services:
- CAN_CHANGE_KEYS=${CAN_CHANGE_KEYS}
- LLM=${LLM}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- OPENAI_MODEL=${OPENAI_MODEL}
- GOOGLE_API_KEY=${GOOGLE_API_KEY}
- GOOGLE_MODEL=${GOOGLE_MODEL}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- ANTHROPIC_MODEL=${ANTHROPIC_MODEL}
- OLLAMA_URL=${OLLAMA_URL}
- OLLAMA_MODEL=${OLLAMA_MODEL}
- CUSTOM_LLM_URL=${CUSTOM_LLM_URL}
- CUSTOM_LLM_API_KEY=${CUSTOM_LLM_API_KEY}
- CUSTOM_MODEL=${CUSTOM_MODEL}
- PEXELS_API_KEY=${PEXELS_API_KEY}
- EXTENDED_REASONING=${EXTENDED_REASONING}
- DATABASE_URL=${DATABASE_URL}
production-gpu:
@ -43,13 +48,18 @@ services:
- CAN_CHANGE_KEYS=${CAN_CHANGE_KEYS}
- LLM=${LLM}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- OPENAI_MODEL=${OPENAI_MODEL}
- GOOGLE_API_KEY=${GOOGLE_API_KEY}
- GOOGLE_MODEL=${GOOGLE_MODEL}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- ANTHROPIC_MODEL=${ANTHROPIC_MODEL}
- OLLAMA_URL=${OLLAMA_URL}
- OLLAMA_MODEL=${OLLAMA_MODEL}
- CUSTOM_LLM_URL=${CUSTOM_LLM_URL}
- CUSTOM_LLM_API_KEY=${CUSTOM_LLM_API_KEY}
- CUSTOM_MODEL=${CUSTOM_MODEL}
- PEXELS_API_KEY=${PEXELS_API_KEY}
- EXTENDED_REASONING=${EXTENDED_REASONING}
- DATABASE_URL=${DATABASE_URL}
development:
@ -62,19 +72,22 @@ services:
- .:/app
- ./app_data:/app_data
environment:
- NODE_ENV=development
- CAN_CHANGE_KEYS=${CAN_CHANGE_KEYS}
- LLM=${LLM}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- OPENAI_MODEL=${OPENAI_MODEL}
- GOOGLE_API_KEY=${GOOGLE_API_KEY}
- GOOGLE_MODEL=${GOOGLE_MODEL}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- ANTHROPIC_MODEL=${ANTHROPIC_MODEL}
- OLLAMA_URL=${OLLAMA_URL}
- OLLAMA_MODEL=${OLLAMA_MODEL}
- CUSTOM_LLM_URL=${CUSTOM_LLM_URL}
- CUSTOM_LLM_API_KEY=${CUSTOM_LLM_API_KEY}
- CUSTOM_MODEL=${CUSTOM_MODEL}
- PEXELS_API_KEY=${PEXELS_API_KEY}
- EXTENDED_REASONING=${EXTENDED_REASONING}
- DATABASE_URL=${DATABASE_URL}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
development-gpu:
build:
@ -93,11 +106,14 @@ services:
- .:/app
- ./app_data:/app_data
environment:
- NODE_ENV=development
- CAN_CHANGE_KEYS=${CAN_CHANGE_KEYS}
- LLM=${LLM}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- OPENAI_MODEL=${OPENAI_MODEL}
- GOOGLE_API_KEY=${GOOGLE_API_KEY}
- GOOGLE_MODEL=${GOOGLE_MODEL}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- ANTHROPIC_MODEL=${ANTHROPIC_MODEL}
- OLLAMA_URL=${OLLAMA_URL}
- OLLAMA_MODEL=${OLLAMA_MODEL}
- CUSTOM_LLM_URL=${CUSTOM_LLM_URL}
@ -105,4 +121,4 @@ services:
- CUSTOM_MODEL=${CUSTOM_MODEL}
- PEXELS_API_KEY=${PEXELS_API_KEY}
- DATABASE_URL=${DATABASE_URL}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- EXTENDED_REASONING=${EXTENDED_REASONING}

View file

@ -1,13 +0,0 @@
#!/bin/bash
echo "Starting development server..."
if [ -d "/node_dependencies/node_modules" ]; then
rm -rf /app/servers/nextjs/node_modules
mv /node_dependencies/node_modules /app/servers/nextjs
fi
ollama serve &
service nginx start
service redis-server start
node /app/start.js

View file

@ -1,8 +0,0 @@
#!/bin/bash
echo "Starting production server..."
ollama serve &
service nginx start
service redis-server start
node /app/start.js

View file

@ -30,6 +30,13 @@ http {
proxy_connect_timeout 30m;
}
# MCP
location /mcp/ {
proxy_pass http://localhost:8001;
proxy_read_timeout 30m;
proxy_connect_timeout 30m;
}
location /docs {
proxy_pass http://localhost:8000/docs;
proxy_read_timeout 30m;

BIN
readme_assets/mcpdemo.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 MiB

View file

@ -0,0 +1 @@
3.11

View file

@ -1,14 +1,8 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from api.lifespan import app_lifespan
from api.middlewares import UserConfigEnvUpdateMiddleware
from api.v1.ppt.router import API_V1_PPT_ROUTER
from utils.asset_directory_utils import get_exports_directory, get_images_directory, get_uploads_directory
import os
from utils.get_env import get_app_data_directory_env
# Import models to ensure they are registered with SQLModel
from models.sql.presentation_layout_code import PresentationLayoutCodeModel
app = FastAPI(lifespan=app_lifespan)
@ -17,38 +11,6 @@ app = FastAPI(lifespan=app_lifespan)
# Routers
app.include_router(API_V1_PPT_ROUTER)
# Helper function to get fonts directory
def get_fonts_directory() -> str:
"""Get the fonts directory path, create if it doesn't exist"""
app_data_dir = get_app_data_directory_env() or "/tmp/presenton"
fonts_dir = os.path.join(app_data_dir, "fonts")
os.makedirs(fonts_dir, exist_ok=True)
return fonts_dir
# Static files
app.mount("/static", StaticFiles(directory="static"), name="static")
app.mount(
"/app_data/images",
StaticFiles(directory=get_images_directory()),
name="app_data/images",
)
app.mount(
"/app_data/exports",
StaticFiles(directory=get_exports_directory()),
name="app_data/exports",
)
app.mount(
"/app_data/uploads",
StaticFiles(directory=get_uploads_directory()),
name="app_data/uploads",
)
app.mount(
"/app_data/fonts",
StaticFiles(directory=get_fonts_directory()),
name="app_data/fonts",
)
# Middlewares
origins = ["*"]
app.add_middleware(

View file

@ -1,9 +1,11 @@
import json
from datetime import datetime
from fastapi import HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from models.ollama_model_status import OllamaModelStatus
from services import REDIS_SERVICE
from models.sql.ollama_pull_status import OllamaPullStatus
from services.database import get_container_db_async_session
from utils.ollama import pull_ollama_model
@ -15,6 +17,8 @@ async def pull_ollama_model_background_task(model: str):
)
log_event_count = 0
session = await get_container_db_async_session().__anext__()
try:
async for event in pull_ollama_model(model):
log_event_count += 1
@ -30,18 +34,13 @@ async def pull_ollama_model_background_task(model: str):
if "status" in event:
saved_model_status.status = event["status"]
REDIS_SERVICE.set(
f"ollama_models/{model}",
json.dumps(saved_model_status.model_dump(mode="json")),
)
await upsert_ollama_pull_status(session, model, saved_model_status)
except Exception as e:
saved_model_status.status = "error"
saved_model_status.done = True
REDIS_SERVICE.set(
f"ollama_models/{model}",
json.dumps(saved_model_status.model_dump(mode="json")),
)
await upsert_ollama_pull_status(session, model, saved_model_status)
await session.close()
raise HTTPException(
status_code=500,
detail=f"Failed to pull model: {e}",
@ -51,9 +50,27 @@ async def pull_ollama_model_background_task(model: str):
saved_model_status.status = "pulled"
saved_model_status.downloaded = saved_model_status.size
REDIS_SERVICE.set(
f"ollama_models/{model}",
json.dumps(saved_model_status.model_dump(mode="json")),
)
await upsert_ollama_pull_status(session, model, saved_model_status)
await session.close()
return saved_model_status
async def upsert_ollama_pull_status(
session: AsyncSession, model: str, model_status: OllamaModelStatus
):
stmt = select(OllamaPullStatus).where(OllamaPullStatus.id == model)
result = await session.execute(stmt)
existing_record = result.scalar_one_or_none()
if existing_record:
existing_record.status = model_status.model_dump(mode="json")
existing_record.last_updated = datetime.now()
else:
new_record = OllamaPullStatus(
id=model,
status=model_status.model_dump(mode="json"),
last_updated=datetime.now(),
)
session.add(new_record)
await session.commit()
await session.flush()

View file

@ -0,0 +1,16 @@
from typing import Annotated, List
from fastapi import APIRouter, Body, HTTPException
from utils.available_models import list_available_anthropic_models
ANTHROPIC_ROUTER = APIRouter(prefix="/anthropic", tags=["Anthropic"])
@ANTHROPIC_ROUTER.post("/models/available", response_model=List[str])
async def get_available_models(
api_key: Annotated[str, Body(embed=True)],
):
try:
return await list_available_anthropic_models(api_key)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

View file

@ -1,14 +0,0 @@
from typing import Annotated, List, Optional
from fastapi import APIRouter, Body
from utils.custom_llm_provider import list_available_custom_models
CUSTOM_LLM_ROUTER = APIRouter(prefix="/custom_llm", tags=["Custom LLM"])
@CUSTOM_LLM_ROUTER.post("/models/available", response_model=List[str])
async def get_available_models(
url: Annotated[Optional[str], Body()] = None,
api_key: Annotated[Optional[str], Body()] = None,
):
return await list_available_custom_models(url, api_key)

View file

@ -0,0 +1,14 @@
from typing import Annotated, List
from fastapi import APIRouter, Body, HTTPException
from utils.available_models import list_available_google_models
GOOGLE_ROUTER = APIRouter(prefix="/google", tags=["Google"])
@GOOGLE_ROUTER.post("/models/available", response_model=List[str])
async def get_available_models(api_key: Annotated[str, Body(embed=True)]):
try:
return await list_available_google_models(api_key)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

View file

@ -0,0 +1,27 @@
from fastapi import APIRouter, HTTPException
import aiohttp
from typing import List, Any
from utils.get_layout_by_name import get_layout_by_name
from models.presentation_layout import PresentationLayoutModel
LAYOUTS_ROUTER = APIRouter(prefix="/layouts", tags=["Layouts"])
@LAYOUTS_ROUTER.get("/", summary="Get available layouts")
async def get_layouts():
url = "http://localhost:3000/api/layouts" # Adjust port if needed
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status != 200:
error_text = await response.text()
raise HTTPException(
status_code=response.status,
detail=f"Failed to fetch layouts: {error_text}"
)
layouts_json = await response.json()
# Optionally, parse into a Pydantic model if you have one matching the structure
return layouts_json
@LAYOUTS_ROUTER.get("/{layout_name}", summary="Get layout details by ID")
async def get_layout_detail(layout_name: str) -> PresentationLayoutModel:
return await get_layout_by_name(layout_name)

View file

@ -1,12 +1,15 @@
from datetime import datetime, timedelta
import json
from typing import List
from fastapi import APIRouter, BackgroundTasks, HTTPException
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from api.v1.ppt.background_tasks import pull_ollama_model_background_task
from constants.supported_ollama_models import SUPPORTED_OLLAMA_MODELS
from models.ollama_model_metadata import OllamaModelMetadata
from models.ollama_model_status import OllamaModelStatus
from services import REDIS_SERVICE
from models.sql.ollama_pull_status import OllamaPullStatus
from services.database import get_container_db_async_session
from utils.ollama import list_pulled_ollama_models
OLLAMA_ROUTER = APIRouter(prefix="/ollama", tags=["Ollama"])
@ -23,7 +26,11 @@ async def get_available_models():
@OLLAMA_ROUTER.get("/model/pull", response_model=OllamaModelStatus)
async def pull_model(model: str, background_tasks: BackgroundTasks):
async def pull_model(
model: str,
background_tasks: BackgroundTasks,
session: AsyncSession = Depends(get_container_db_async_session),
):
if model not in SUPPORTED_OLLAMA_MODELS:
raise HTTPException(
@ -46,21 +53,27 @@ async def pull_model(model: str, background_tasks: BackgroundTasks):
detail=f"Failed to check pulled models: {e}",
)
saved_model_status = REDIS_SERVICE.get(f"ollama_models/{model}")
saved_pull_status = None
saved_model_status = None
try:
saved_pull_status = await session.get(OllamaPullStatus, model)
saved_model_status = saved_pull_status.status
except Exception as e:
pass
# If the model is being pulled, return the model
if saved_model_status:
saved_model_status_json = json.loads(saved_model_status)
# If the model is being pulled, return the model
# ? If the model status is pulled in redis but was not found while listing pulled models,
# ? it means the model was deleted and we need to pull it again
if (
saved_model_status_json["status"] == "error"
or saved_model_status_json["status"] == "pulled"
saved_model_status["status"] == "error"
or saved_model_status["status"] == "pulled"
or saved_pull_status.last_updated < (datetime.now() - timedelta(seconds=10))
):
REDIS_SERVICE.delete(f"ollama_models/{model}")
await session.delete(saved_pull_status)
else:
return saved_model_status_json
return saved_model_status
# If the model is not being pulled, pull the model
background_tasks.add_task(pull_ollama_model_background_task, model)

View file

@ -0,0 +1,17 @@
from typing import Annotated, List
from fastapi import APIRouter, Body, HTTPException
from utils.available_models import list_available_openai_compatible_models
OPENAI_ROUTER = APIRouter(prefix="/openai", tags=["OpenAI"])
@OPENAI_ROUTER.post("/models/available", response_model=List[str])
async def get_available_models(
url: Annotated[str, Body()],
api_key: Annotated[str, Body()],
):
try:
return await list_available_openai_compatible_models(url, api_key)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

View file

@ -7,7 +7,10 @@ from sqlalchemy.ext.asyncio import AsyncSession
from models.presentation_outline_model import PresentationOutlineModel
from models.sql.presentation import PresentationModel
from models.sse_response import SSECompleteResponse, SSEResponse, SSEStatusResponse
from services import TEMP_FILE_SERVICE
from services.database import get_async_session
from services.documents_loader import DocumentsLoader
from services.score_based_chunker import ScoreBasedChunker
from utils.llm_calls.generate_presentation_outlines import generate_ppt_outline
OUTLINES_ROUTER = APIRouter(prefix="/outlines", tags=["Outlines"])
@ -22,39 +25,66 @@ async def stream_outlines(
if not presentation:
raise HTTPException(status_code=404, detail="Presentation not found")
temp_dir = TEMP_FILE_SERVICE.create_temp_dir()
async def inner():
yield SSEStatusResponse(
status="Generating presentation outlines..."
).to_string()
presentation_content_text = ""
async for chunk in generate_ppt_outline(
presentation.prompt,
presentation.n_slides,
presentation.language,
presentation.summary,
):
# Give control to the event loop
await asyncio.sleep(0)
presentation_outlines = None
additional_context = ""
if presentation.file_paths:
documents_loader = DocumentsLoader(file_paths=presentation.file_paths)
await documents_loader.load_documents(temp_dir)
documents = documents_loader.documents
if documents:
additional_context = documents[0]
chunker = ScoreBasedChunker()
try:
chunks = await chunker.get_n_chunks(
documents[0], presentation.n_slides
)
presentation_outlines = PresentationOutlineModel(
slides=[chunk.to_slide_outline() for chunk in chunks]
)
except Exception as e:
print(e)
yield SSEResponse(
event="response",
data=json.dumps({"type": "chunk", "chunk": chunk}),
).to_string()
presentation_content_text += chunk
if not presentation_outlines:
presentation_outlines_text = ""
async for chunk in generate_ppt_outline(
presentation.prompt,
presentation.n_slides,
presentation.language,
additional_context,
):
# Give control to the event loop
await asyncio.sleep(0)
presentation_content_json = json.loads(presentation_content_text)
yield SSEResponse(
event="response",
data=json.dumps({"type": "chunk", "chunk": chunk}),
).to_string()
presentation_outlines_text += chunk
presentation_content = PresentationOutlineModel(**presentation_content_json)
presentation_content.slides = presentation_content.slides[
presentation_outlines_json = json.loads(presentation_outlines_text)
presentation_outlines = PresentationOutlineModel(
**presentation_outlines_json
)
presentation_outlines.slides = presentation_outlines.slides[
: presentation.n_slides
]
presentation.title = presentation_content.title
presentation.outlines = [
each.model_dump() for each in presentation_content.slides
]
presentation.notes = presentation_content.notes
presentation.outlines = presentation_outlines.model_dump()
presentation.title = (
presentation_outlines.slides[0][:50]
.replace("#", "")
.replace("/", "")
.replace("\\", "")
.replace("\n", "")
)
sql_session.add(presentation)
await sql_session.commit()

View file

@ -5,21 +5,19 @@ import random
from typing import Annotated, List, Literal, Optional
from fastapi import APIRouter, Body, Depends, File, HTTPException, UploadFile
from fastapi.responses import StreamingResponse
from sqlalchemy import String, cast, delete
from sqlalchemy import delete
from sqlalchemy.ext.asyncio import AsyncSession
from sqlmodel import select
from constants.documents import UPLOAD_ACCEPTED_FILE_TYPES
from models.presentation_and_path import PresentationPathAndEditPath
from models.presentation_from_template import GetPresentationUsingTemplateRequest
from models.presentation_outline_model import (
PresentationOutlineModel,
SlideOutlineModel,
)
from models.presentation_outline_model import PresentationOutlineModel
from models.pptx_models import PptxPresentationModel
from models.presentation_layout import PresentationLayoutModel
from models.presentation_structure_model import PresentationStructureModel
from models.presentation_with_slides import PresentationWithSlides
from services.get_layout_by_name import get_layout_by_name
from services.score_based_chunker import ScoreBasedChunker
from utils.get_layout_by_name import get_layout_by_name
from services.icon_finder_service import IconFinderService
from services.image_generation_service import ImageGenerationService
from utils.dict_utils import deep_update
@ -33,7 +31,6 @@ from services.documents_loader import DocumentsLoader
from models.sql.presentation import PresentationModel
from services.pptx_presentation_creator import PptxPresentationCreator
from utils.asset_directory_utils import get_exports_directory, get_images_directory
from utils.llm_calls.generate_document_summary import generate_document_summary
from utils.llm_calls.generate_presentation_structure import (
generate_presentation_structure,
)
@ -112,20 +109,12 @@ async def create_presentation(
):
presentation_id = get_random_uuid()
summary = None
if file_paths:
temp_dir = TEMP_FILE_SERVICE.create_temp_dir(presentation_id)
documents_loader = DocumentsLoader(file_paths=file_paths)
await documents_loader.load_documents(temp_dir)
summary = await generate_document_summary(documents_loader.documents)
presentation = PresentationModel(
id=presentation_id,
prompt=prompt,
n_slides=n_slides,
language=language,
summary=summary,
file_paths=file_paths,
)
sql_session.add(presentation)
@ -137,7 +126,7 @@ async def create_presentation(
@PRESENTATION_ROUTER.post("/prepare", response_model=PresentationModel)
async def prepare_presentation(
presentation_id: Annotated[str, Body()],
outlines: Annotated[List[SlideOutlineModel], Body()],
outlines: Annotated[List[str], Body()],
layout: Annotated[PresentationLayoutModel, Body()],
title: Annotated[Optional[str], Body()] = None,
sql_session: AsyncSession = Depends(get_async_session),
@ -172,7 +161,7 @@ async def prepare_presentation(
presentation_structure.slides[index] = random_slide_index
sql_session.add(presentation)
presentation.outlines = [each.model_dump() for each in outlines]
presentation.outlines = PresentationOutlineModel(slides=outlines).model_dump()
presentation.title = title or presentation.title
presentation.set_layout(layout)
presentation.set_structure(presentation_structure)
@ -217,9 +206,11 @@ async def stream_presentation(
).to_string()
for i, slide_layout_index in enumerate(structure.slides):
slide_layout = layout.slides[slide_layout_index]
slide_content = await get_slide_content_from_type_and_outline(
slide_layout, outline.slides[i], presentation.language
)
slide = SlideModel(
presentation=presentation_id,
layout_group=layout.name,
@ -236,9 +227,6 @@ async def stream_presentation(
)
)
# Give control to the event loop
await asyncio.sleep(0)
yield SSEResponse(
event="response",
data=json.dumps({"type": "chunk", "chunk": slide.model_dump_json()}),
@ -328,37 +316,48 @@ async def generate_presentation_api(
presentation_id = get_random_uuid()
temp_dir = TEMP_FILE_SERVICE.create_temp_dir()
# 1. Save uploaded files
file_paths = []
if files:
temp_dir = TEMP_FILE_SERVICE.create_temp_dir()
for upload in files:
file_path = os.path.join(temp_dir, upload.filename)
with open(file_path, "wb") as f:
f.write(await upload.read())
file_paths.append(file_path)
# 2. Create Presentation Summary (if documents are provided)
summary = None
# 3. Generate Outlines
presentation_outlines = None
additional_context = ""
if file_paths:
temp_dir = TEMP_FILE_SERVICE.create_temp_dir(presentation_id)
documents_loader = DocumentsLoader(file_paths=file_paths)
await documents_loader.load_documents(temp_dir)
summary = await generate_document_summary(documents_loader.documents)
documents = documents_loader.documents
if documents:
additional_context = documents[0]
chunker = ScoreBasedChunker()
try:
chunks = await chunker.get_n_chunks(documents[0], n_slides)
presentation_outlines = PresentationOutlineModel(
slides=[chunk.to_slide_outline() for chunk in chunks]
)
except Exception as e:
print(e)
# 3. Generate Outlines
presentation_content_text = ""
async for chunk in generate_ppt_outline(
prompt,
n_slides,
language,
summary,
):
presentation_content_text += chunk
if not presentation_outlines:
presentation_outlines_text = ""
async for chunk in generate_ppt_outline(
prompt,
n_slides,
language,
additional_context,
):
presentation_outlines_text += chunk
presentation_content_json = json.loads(presentation_content_text)
presentation_content = PresentationOutlineModel(**presentation_content_json)
outlines = presentation_content.slides[:n_slides]
presentation_outlines_json = json.loads(presentation_outlines_text)
presentation_outlines = PresentationOutlineModel(**presentation_outlines_json)
outlines = presentation_outlines.slides[:n_slides]
total_outlines = len(outlines)
print("-" * 40)
@ -374,12 +373,8 @@ async def generate_presentation_api(
else:
presentation_structure: PresentationStructureModel = (
await generate_presentation_structure(
presentation_outline=PresentationOutlineModel(
title=presentation_content.title,
slides=outlines,
notes=presentation_content.notes,
),
presentation_layout=layout_model,
presentation_outlines,
layout_model,
)
)
@ -398,10 +393,7 @@ async def generate_presentation_api(
prompt=prompt,
n_slides=n_slides,
language=language,
title=presentation_content.title,
summary=summary,
outlines=[each.model_dump() for each in outlines],
notes=presentation_content.notes,
outlines=presentation_outlines.model_dump(),
layout=layout_model.model_dump(),
structure=presentation_structure.model_dump(),
)
@ -447,7 +439,7 @@ async def generate_presentation_api(
# 9. Export
presentation_and_path = await export_presentation(
presentation_id, presentation_content.title, export_as
presentation_id, presentation.title or get_random_uuid(), export_as
)
return PresentationPathAndEditPath(
@ -475,7 +467,6 @@ async def from_template(
new_slide_data = list(filter(lambda x: x.index == each_slide.index, data.data))
if new_slide_data:
updated_content = deep_update(each_slide.content, new_slide_data[0].content)
print(f"Updated content for slide {each_slide.index}: {updated_content}")
new_slides.append(
each_slide.get_new_slide(new_presentation.id, updated_content)
)
@ -485,7 +476,7 @@ async def from_template(
await sql_session.commit()
presentation_and_path = await export_presentation(
new_presentation.id, new_presentation.title, data.export_as
new_presentation.id, new_presentation.title or get_random_uuid(), data.export_as
)
return PresentationPathAndEditPath(

View file

@ -1,3 +1,4 @@
import importlib
from typing import Annotated, Optional
from fastapi import APIRouter, Body, Depends, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
@ -13,6 +14,7 @@ from utils.llm_calls.edit_slide_html import get_edited_slide_html
from utils.llm_calls.select_slide_type_on_edit import get_slide_layout_from_prompt
from utils.process_slides import process_old_and_new_slides_and_fetch_assets
from utils.randomizers import get_random_uuid
from utils.schema_utils import remove_fields_from_schema
SLIDE_ROUTER = APIRouter(prefix="/slide", tags=["Slide"])
@ -32,12 +34,12 @@ async def edit_slide(
raise HTTPException(status_code=404, detail="Presentation not found")
presentation_layout = presentation.get_layout()
slide_layout = await get_slide_layout_from_prompt(
prompt, presentation_layout, slide
)
edited_slide_content = await get_edited_slide_content(
prompt, slide_layout, slide, presentation.language
prompt, slide, presentation.language, slide_layout
)
image_generation_service = ImageGenerationService(get_images_directory())

View file

@ -74,6 +74,11 @@ class GetLayoutsResponse(BaseModel):
message: Optional[str] = None
class DeleteLayoutResponse(BaseModel):
success: bool
message: Optional[str] = None
class PresentationSummary(BaseModel):
presentation_id: str
layout_count: int
@ -868,4 +873,40 @@ async def get_presentations_summary(
raise HTTPException(
status_code=500,
detail=f"Internal server error while retrieving presentations summary: {str(e)}"
)
)
# ENDPOINT : Delete a layout
@LAYOUT_MANAGEMENT_ROUTER.delete(
"/delete-layouts/{presentation_id}",
response_model=DeleteLayoutResponse,
responses={
200: {"model": DeleteLayoutResponse, "description": "Layout deleted successfully"},
404: {"model": ErrorResponse, "description": "Presentation Layouts not found"},
500: {"model": ErrorResponse, "description": "Internal server error"}
}
)
async def delete_layouts(presentation_id: str, session: AsyncSession = Depends(get_async_session)):
try:
# Validate presentation_id format (basic UUID check)
if not presentation_id or len(presentation_id.strip()) == 0:
raise HTTPException(
status_code=400,
detail="Presentation ID cannot be empty"
)
# Delete Presentation with all layouts
await session.execute(delete(PresentationLayoutCodeModel).where(PresentationLayoutCodeModel.presentation_id == presentation_id))
await session.commit()
return DeleteLayoutResponse(
success=True,
message=f"Successfully deleted layout(s) for presentation {presentation_id}"
)
except HTTPException:
raise
except Exception as e:
print(f"Error deleting layouts for presentation {presentation_id}: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Internal server error while deleting layouts: {str(e)}"
)

View file

@ -2,8 +2,10 @@ from fastapi import APIRouter
from api.v1.ppt.endpoints.slide_to_html import SLIDE_TO_HTML_ROUTER, HTML_TO_REACT_ROUTER, HTML_EDIT_ROUTER, LAYOUT_MANAGEMENT_ROUTER
from api.v1.ppt.endpoints.presentation import PRESENTATION_ROUTER
from api.v1.ppt.endpoints.anthropic import ANTHROPIC_ROUTER
from api.v1.ppt.endpoints.google import GOOGLE_ROUTER
from api.v1.ppt.endpoints.openai import OPENAI_ROUTER
from api.v1.ppt.endpoints.files import FILES_ROUTER
from api.v1.ppt.endpoints.custom_llm import CUSTOM_LLM_ROUTER
from api.v1.ppt.endpoints.pptx_slides import PPTX_SLIDES_ROUTER
from api.v1.ppt.endpoints.pdf_slides import PDF_SLIDES_ROUTER
from api.v1.ppt.endpoints.fonts import FONTS_ROUTER
@ -29,8 +31,7 @@ API_V1_PPT_ROUTER.include_router(LAYOUT_MANAGEMENT_ROUTER)
API_V1_PPT_ROUTER.include_router(IMAGES_ROUTER)
API_V1_PPT_ROUTER.include_router(ICONS_ROUTER)
API_V1_PPT_ROUTER.include_router(OLLAMA_ROUTER)
API_V1_PPT_ROUTER.include_router(CUSTOM_LLM_ROUTER)
API_V1_PPT_ROUTER.include_router(PDF_SLIDES_ROUTER)
API_V1_PPT_ROUTER.include_router(OPENAI_ROUTER)
API_V1_PPT_ROUTER.include_router(ANTHROPIC_ROUTER)
API_V1_PPT_ROUTER.include_router(GOOGLE_ROUTER)

View file

@ -0,0 +1 @@
# This file marks the mcp directory as a Python package.

View file

@ -0,0 +1,13 @@
from fastmcp import FastMCP
from app_mcp.tools import register_tools
from app_mcp.services.workflow_orchestrator import WorkflowOrchestrator
def create_mcp_server():
mcp = FastMCP("PresentonMCP")
orchestrator = WorkflowOrchestrator()
register_tools(mcp, orchestrator)
return mcp
uvicorn_config = {
"reload": True,
}

View file

@ -0,0 +1,119 @@
from app_mcp.services.state_machine.states import PresentationState
TRANSITIONS = {
PresentationState.INIT: {
PresentationState.OUTLINE_REQUESTED
},
# Outline generation flow (now includes file processing)
PresentationState.OUTLINE_REQUESTED: {
PresentationState.OUTLINE_GENERATED,
PresentationState.OUTLINE_FAILED
},
PresentationState.OUTLINE_GENERATED: {
PresentationState.OUTLINE_APPROVED,
PresentationState.OUTLINE_REQUESTED,
PresentationState.OUTLINE_FAILED
},
PresentationState.OUTLINE_APPROVED: {
PresentationState.LAYOUT_REQUESTED
},
# Layout selection flow
PresentationState.LAYOUT_REQUESTED: {
PresentationState.LAYOUT_SELECTED
},
PresentationState.LAYOUT_SELECTED: {
PresentationState.GENERATION_IN_PROGRESS,
PresentationState.LAYOUT_REQUESTED
},
# Presentation generation flow
PresentationState.GENERATION_IN_PROGRESS: {
PresentationState.PRESENTATION_READY,
PresentationState.GENERATION_FAILED
},
PresentationState.PRESENTATION_READY: {
PresentationState.EXPORT_REQUESTED,
PresentationState.EDIT_REQUESTED,
PresentationState.OUTLINE_REQUESTED
},
# Export flow
PresentationState.EXPORT_REQUESTED: {
PresentationState.EXPORT_IN_PROGRESS
},
PresentationState.EXPORT_IN_PROGRESS: {
PresentationState.EXPORT_COMPLETE,
PresentationState.EXPORT_FAILED
},
PresentationState.EXPORT_COMPLETE: {
PresentationState.EDIT_REQUESTED,
PresentationState.EXPORT_REQUESTED,
PresentationState.INIT
},
# Edit and revision flow
PresentationState.EDIT_REQUESTED: {
PresentationState.TEMPLATE_EDITING
},
PresentationState.TEMPLATE_EDITING: {
PresentationState.PRESENTATION_READY,
PresentationState.EDIT_FAILED
},
# Error recovery transitions
PresentationState.OUTLINE_FAILED: {
PresentationState.OUTLINE_REQUESTED,
PresentationState.INIT
},
PresentationState.GENERATION_FAILED: {
PresentationState.LAYOUT_SELECTED,
PresentationState.OUTLINE_APPROVED
},
PresentationState.EXPORT_FAILED: {
PresentationState.EXPORT_REQUESTED,
PresentationState.PRESENTATION_READY
},
PresentationState.EDIT_FAILED: {
PresentationState.EDIT_REQUESTED,
PresentationState.PRESENTATION_READY
}
}
SUGGESTIONS = {
PresentationState.INIT: "Start with outline generation (files will be processed automatically if provided)",
PresentationState.OUTLINE_REQUESTED: "Generating presentation outline with file analysis if applicable",
PresentationState.OUTLINE_GENERATED: "Review and approve outline",
PresentationState.OUTLINE_APPROVED: "Select presentation layout",
PresentationState.LAYOUT_SELECTED: "Generate presentation",
PresentationState.PRESENTATION_READY: "Export presentation or request edits",
PresentationState.EXPORT_REQUESTED: "Choose export format and generate",
PresentationState.EXPORT_COMPLETE: "Download presentation or start new one",
PresentationState.EDIT_REQUESTED: "Make template-based edits",
}
PROGRESS_WEIGHTS = {
PresentationState.INIT: 0,
PresentationState.OUTLINE_REQUESTED: 20,
PresentationState.OUTLINE_GENERATED: 35,
PresentationState.OUTLINE_APPROVED: 40,
PresentationState.LAYOUT_REQUESTED: 45,
PresentationState.LAYOUT_SELECTED: 50,
PresentationState.GENERATION_IN_PROGRESS: 70,
PresentationState.PRESENTATION_READY: 85,
PresentationState.EXPORT_REQUESTED: 90,
PresentationState.EXPORT_IN_PROGRESS: 95,
PresentationState.EXPORT_COMPLETE: 100,
PresentationState.TEMPLATE_EDITING: 60,
}
ERROR_STATES = {
PresentationState.OUTLINE_FAILED,
PresentationState.GENERATION_FAILED,
PresentationState.EXPORT_FAILED,
PresentationState.EDIT_FAILED
}

View file

@ -0,0 +1,19 @@
from typing import Dict, Set, Optional, Any
from dataclasses import dataclass
@dataclass
class StateContext:
"""Context data that travels with the state machine"""
presentation_id: Optional[str] = None
title: Optional[str] = None
outlines: Optional[list] = None
layout: Optional[str] = None
file_paths: Optional[list] = None
export_format: Optional[str] = None
export_path: Optional[str] = None
error_message: Optional[str] = None
metadata: Dict[str, Any] = None
def __post_init__(self):
if self.metadata is None:
self.metadata = {}

View file

@ -0,0 +1,101 @@
from typing import Dict, Set, Any
from app_mcp.services.state_machine.context import StateContext
from app_mcp.services.state_machine.states import PresentationState
from app_mcp.services.state_machine.constants import TRANSITIONS, SUGGESTIONS, PROGRESS_WEIGHTS, ERROR_STATES
class PresentationStateMachine:
def __init__(self):
self.state = PresentationState.INIT
self.context = StateContext()
self._state_history = [PresentationState.INIT]
self._transitions = TRANSITIONS
self._error_states = ERROR_STATES
self._suggestions = SUGGESTIONS
self._progress_weights = PROGRESS_WEIGHTS
def transition(self, new_state: PresentationState, context_updates: Dict[str, Any] = None):
"""
Transition to new state with optional context updates
Args:
new_state (PresentationState): The state to transition to
context_updates (Dict[str, Any], optional): Context data to update during transition
Raises:
ValueError: If the transition is not valid
"""
if not self.is_valid_transition(new_state):
raise ValueError(f"Invalid transition from {self.state} to {new_state}")
# Update context if provided
if context_updates:
for key, value in context_updates.items():
if hasattr(self.context, key):
setattr(self.context, key, value)
else:
self.context.metadata[key] = value
# Record state history
self._state_history.append(new_state)
self.state = new_state
def is_valid_transition(self, new_state: PresentationState) -> bool:
"""Check if transition to new state is valid"""
return new_state in self._transitions.get(self.state, set())
def get_available_transitions(self) -> Set[PresentationState]:
"""Get all valid transitions from current state"""
return self._transitions.get(self.state, set())
def can_transition_to(self, target_state: PresentationState) -> bool:
"""Check if can transition to target state"""
return target_state in self.get_available_transitions()
def is_terminal_state(self) -> bool:
"""Check if current state is terminal (no outgoing transitions)"""
return len(self.get_available_transitions()) == 0
def is_error_state(self) -> bool:
"""Check if current state is an error state"""
return self.state in self._error_states
def get_workflow_progress(self) -> float:
"""Calculate workflow progress as percentage"""
return self._progress_weights.get(self.state, 0)
def get_next_suggested_action(self) -> str:
"""Get suggested next action based on current state"""
return self._suggestions.get(self.state, "No suggestions available")
def reset(self):
"""Reset state machine to initial state"""
self.state = PresentationState.INIT
self.context = StateContext()
self._state_history = [PresentationState.INIT]
def get_state_history(self) -> list:
"""Get history of states visited"""
return self._state_history.copy()
def rollback_to_previous_state(self) -> bool:
"""Rollback to previous state if possible"""
if len(self._state_history) < 2:
return False
# Remove current state from history
self._state_history.pop()
previous_state = self._state_history[-1]
if self.is_valid_transition(previous_state):
self.state = previous_state
return True
else:
self._state_history.append(self.state)
return False
def __str__(self):
return f"PresentationStateMachine(state={self.state.name}, progress={self.get_workflow_progress()}%)"
def __repr__(self):
return (f"PresentationStateMachine(state={self.state.name}, "
f"context={self.context}, "
f"history_length={len(self._state_history)})")

View file

@ -0,0 +1,35 @@
from enum import Enum, auto
class PresentationState(Enum):
"""
Represents the various states in the presentation workflow.
"""
INIT = auto()
# Outline generation phase (now includes file processing)
OUTLINE_REQUESTED = auto()
OUTLINE_GENERATED = auto()
OUTLINE_APPROVED = auto()
# Layout selection phase
LAYOUT_REQUESTED = auto()
LAYOUT_SELECTED = auto()
# Presentation generation phase
GENERATION_IN_PROGRESS = auto()
PRESENTATION_READY = auto()
# Export phase
EXPORT_REQUESTED = auto()
EXPORT_IN_PROGRESS = auto()
EXPORT_COMPLETE = auto()
# Edit and revision loops
EDIT_REQUESTED = auto()
TEMPLATE_EDITING = auto()
# Error states
OUTLINE_FAILED = auto()
GENERATION_FAILED = auto()
EXPORT_FAILED = auto()
EDIT_FAILED = auto()

View file

@ -0,0 +1,308 @@
from typing import Dict, Any, Optional, List
from dataclasses import asdict
from app_mcp.services.state_machine.machine import PresentationStateMachine
from app_mcp.services.state_machine.states import PresentationState
from utils.user_config import update_env_with_user_config
from app_mcp.wrapper.generate_outline import generate_outline
from app_mcp.wrapper.presentation_generation import process_post_outline_workflow
from app_mcp.wrapper.presentation_export import export_presentation_and_get_path
from app_mcp.wrapper.list_layout import list_layouts
class WorkflowOrchestrator:
"""
Orchestrates the presentation generation workflow using FSM
- Handles session management
- Executes
- file uploads
- summary generation
- outline generation
- layout selection
- presentation generation
- export
- Provides status and context management
- Allows for session-based operations
- Supports error handling and recovery
"""
def __init__(self):
"""
Initiating:
- The environment with user configuration from the user config file.
- The Finite State Machine (FSM) for presentation workflow.
- Active sessions dictionary to manage multiple workflows.
"""
try:
update_env_with_user_config()
except Exception as e:
print(f"Error updating environment with user config: {e}")
self.fsm = PresentationStateMachine()
self._active_sessions: Dict[str, PresentationStateMachine] = {}
def create_session(self, session_id: str) -> PresentationStateMachine:
"""
Create a new workflow session with the given session ID.
If a session with the same ID already exists, it will be replaced.
Session will Remain for the lifetime of the application.
Args:
session_id (str): Unique identifier for the session.
"""
if not session_id or not isinstance(session_id, str):
raise ValueError("Session ID must be a non-empty string")
session_id = session_id.strip()
if not session_id:
raise ValueError("Session ID cannot be empty")
if session_id in self._active_sessions:
self.remove_session(session_id)
print(f"Session {session_id} already exists, replacing it.")
self._active_sessions[session_id] = PresentationStateMachine()
return self._active_sessions[session_id]
def get_session(self, session_id: str) -> Optional[PresentationStateMachine]:
"""Get existing workflow session"""
if not session_id or not isinstance(session_id, str):
return None
return self._active_sessions.get(session_id.strip())
def remove_session(self, session_id: str) -> bool:
"""Remove workflow session"""
return self._active_sessions.pop(session_id, None) is not None
async def execute_generate_outline(self, session_id: str, prompt: str, **kwargs) -> Dict[str, Any]:
"""
Execute outline generation workflow step
Args:
session_id (str): Unique identifier for the session.
prompt (str): The prompt to generate the outline.
**kwargs: Additional parameters for outline generation.
Returns:
Dict[str, Any]: Result containing status, state, progress, next action, and generated outline.
"""
fsm = self.get_session(session_id)
if not fsm:
raise ValueError(f"Session {session_id} not found")
try:
fsm.transition(PresentationState.OUTLINE_REQUESTED)
result = await generate_outline(prompt, **kwargs)
# Update the Context and transition to outline generated
context_updates = {
"title": result["title"],
"outlines": result["outlines"]
}
fsm.transition(PresentationState.OUTLINE_GENERATED, context_updates)
return {
"status": "success",
"state": fsm.state.name,
"progress": fsm.get_workflow_progress(),
"next_action": "Review outline and approve",
"result": result,
"can_approve": True
}
except Exception as e:
fsm.transition(PresentationState.OUTLINE_FAILED, {"error_message": str(e)})
print(f"Error generating outline for session {session_id}: {e}")
return {
"status": "error",
"state": fsm.state.name,
"error": str(e),
"next_action": fsm.get_next_suggested_action()
}
async def approve_outline(self, session_id: str) -> Dict[str, Any]:
"""
Approve the generated outline
Args:
session_id (str): Unique identifier for the session.
Returns:
Dict[str, Any]: Result containing status, state, progress, next action.
"""
fsm = self.get_session(session_id)
if not fsm:
raise ValueError(f"Session {session_id} not found")
if fsm.state != PresentationState.OUTLINE_GENERATED:
raise ValueError(f"Cannot approve outline in state {fsm.state.name}")
fsm.transition(PresentationState.OUTLINE_APPROVED)
return {
"status": "success",
"state": fsm.state.name,
"progress": fsm.get_workflow_progress(),
"next_action": fsm.get_next_suggested_action()
}
async def execute_layout_selection(self, session_id: str, layout: str) -> Dict[str, Any]:
"""
Execute layout selection workflow step
Args:
session_id (str): Unique identifier for the session.
layout (str): Selected layout for the presentation.
Returns:
Dict[str, Any]: Result containing status, state, progress, next action, and selected layout.
"""
fsm = self.get_session(session_id)
if not fsm:
raise ValueError(f"Session {session_id} not found")
try:
fsm.transition(PresentationState.LAYOUT_REQUESTED)
#Updating the context and transitioning to LAYOUT_SELECTED
context_updates = {"layout": layout}
fsm.transition(PresentationState.LAYOUT_SELECTED, context_updates)
return {
"status": "success",
"state": fsm.state.name,
"progress": fsm.get_workflow_progress(),
"next_action": fsm.get_next_suggested_action(),
"selected_layout": layout
}
except Exception as e:
print(f"Error selecting layout for session {session_id}: {e}")
return {
"status": "error",
"error": str(e),
"next_action": "Please select a valid layout"
}
async def execute_presentation_generation(self, session_id: str, **kwargs) -> Dict[str, Any]:
"""
Execute presentation generation workflow step
Args:
session_id (str): Unique identifier for the session.
**kwargs: Additional parameters for presentation generation.
Returns:
Dict[str, Any]: Result containing status, state, progress, next action, and generated presentation.
"""
fsm = self.get_session(session_id)
if not fsm:
raise ValueError(f"Session {session_id} not found")
try:
fsm.transition(PresentationState.GENERATION_IN_PROGRESS)
notes = kwargs.get('notes', [])
result = await process_post_outline_workflow(
title=fsm.context.title,
outlines=fsm.context.outlines,
notes=notes,
layout=fsm.context.layout,
prompt=fsm.context.metadata.get('original_prompt', ""),
sql_session=None,
**kwargs
)
#Updating the Context and transitioning to PRESENTATION_READY
context_updates = {"presentation_id": result["presentation_id"]}
fsm.transition(PresentationState.PRESENTATION_READY, context_updates)
return {
"status": "success",
"state": fsm.state.name,
"progress": fsm.get_workflow_progress(),
"next_action": fsm.get_next_suggested_action(),
"result": result
}
except Exception as e:
fsm.transition(PresentationState.GENERATION_FAILED, {"error_message": str(e)})
print(f"Error generating presentation for session {session_id}: {e}")
return {
"status": "error",
"state": fsm.state.name,
"error": str(e),
"next_action": fsm.get_next_suggested_action()
}
async def execute_export(self, session_id: str, export_format: str = "pptx") -> Dict[str, Any]:
"""
Execute presentation export workflow step
Args:
session_id (str): Unique identifier for the session.
export_format (str): Format to export the presentation (e.g., "pptx", "pdf").
Returns:
Dict[str, Any]: Result containing status, state, progress, next action, and export
"""
fsm = self.get_session(session_id)
if not fsm:
raise ValueError(f"Session {session_id} not found")
try:
# Transition to EXPORT_REQUESTED state
fsm.transition(PresentationState.EXPORT_REQUESTED, {"export_format": export_format})
fsm.transition(PresentationState.EXPORT_IN_PROGRESS)
result = await export_presentation_and_get_path(
presentation_id=fsm.context.presentation_id,
title=fsm.context.title,
export_as=export_format
)
print("RResult of export:", result)
#Updating the Context and transitioning to EXPORT_COMPLETE
context_updates = {"export_path": result["path"]}
fsm.transition(PresentationState.EXPORT_COMPLETE, context_updates)
return {
"status": "success",
"state": fsm.state.name,
"progress": fsm.get_workflow_progress(),
"next_action": "Download your presentation or start a new one",
"result": result
}
except Exception as e:
fsm.transition(PresentationState.EXPORT_FAILED, {"error_message": str(e)})
print(f"Error exporting presentation for session {session_id}: {e}")
return {
"status": "error",
"state": fsm.state.name,
"error": str(e),
"next_action": fsm.get_next_suggested_action()
}
async def get_available_layouts(self) -> List[Any]:
"""
Get available presentation layouts
"""
return await list_layouts()
def get_workflow_status(self, session_id: str) -> Dict[str, Any]:
"""Get current workflow status"""
fsm = self.get_session(session_id)
if not fsm:
return {"error": "Session not found"}
return {
"session_id": session_id,
"current_state": fsm.state.name,
"progress": fsm.get_workflow_progress(),
"next_action": fsm.get_next_suggested_action(),
"available_transitions": [s.name for s in fsm.get_available_transitions()],
"is_error_state": fsm.is_error_state(),
"context": asdict(fsm.context),
"state_history": [s.name for s in fsm.get_state_history()]
}
def get_all_sessions(self) -> Dict[str, Dict[str, Any]]:
"""
Get status of all active sessions
"""
return {
session_id: self.get_workflow_status(session_id)
for session_id in self._active_sessions.keys()
}

View file

@ -0,0 +1,35 @@
"""MCP Tools package for presentation generation."""
from app_mcp.tools.choose_layout import register_choose_layout
from app_mcp.tools.export_presentation import register_export_presentation
from app_mcp.tools.get_status import register_get_status
from app_mcp.tools.show_layouts import register_show_layouts
from app_mcp.tools.start_presentation import register_start_presentation
from app_mcp.tools.help_me import register_help_me
from app_mcp.tools.continue_workflow import register_continue_workflow
__all__ = [
'register_choose_layout',
'register_export_presentation',
'register_get_status',
'register_show_layouts',
'register_start_presentation',
'register_help_me',
'register_continue_workflow',
'register_tools',
]
def register_tools(mcp, orchestrator):
"""Register all MCP tools in a fancy way."""
tools = [
register_choose_layout,
register_export_presentation,
register_get_status,
register_show_layouts,
register_start_presentation,
register_help_me,
register_continue_workflow
]
for tool in tools:
tool(mcp, orchestrator)

View file

@ -0,0 +1,46 @@
from typing import Dict, Any
def register_choose_layout(mcp, orchestrator):
"""Register all workflow-related tools for chat-based interaction"""
@mcp.tool("choose_layout")
async def choose_layout(session_id: str, layout_name: str) -> Dict[str, Any]:
"""
🎨 Select a visual style and theme for your presentation.
Choose from available professional layouts that determine:
- Color scheme and visual design
- Slide structure and layout patterns
- Font choices and styling
- Overall presentation aesthetic
Use 'show_layouts' first to see all available options. Only show the layout name and short description.
Args:
session_id: Your presentation session ID
layout_name: Name of the layout you want to use
"""
try:
result = await orchestrator.execute_layout_selection(session_id, layout_name)
if result["status"] == "success":
return {
"status": "success",
"session_id": session_id,
"message": f"Perfect! I've selected the '{layout_name}' layout for your presentation.",
"suggestion": "Now I'll generate all the slides with content, images, and styling. This might take a minute or two.",
"available_actions": {
"continue": "Start generating the presentation",
"change_layout": "Actually, let me pick a different layout"
}
}
return result
except Exception as e:
return {
"status": "error",
"error": str(e),
"session_id": session_id
}
return choose_layout

View file

@ -0,0 +1,129 @@
from typing import Dict, Any
def register_continue_workflow(mcp, orchestrator):
"""Register all workflow-related tools for chat-based interaction"""
@mcp.tool("continue_workflow")
async def continue_workflow(
session_id: str,
action: str = "continue"
) -> Dict[str, Any]:
"""
Move to the next step in creating your presentation.
This tool automatically determines what should happen next based on where
you are in the process:
- After starting: Generates your presentation outline
- After outline: Shows available layouts to choose from
- After layout: Creates your complete presentation
Just call this when you're ready to proceed to the next step!
Args:
session_id: Your presentation session ID
action: What to do next (usually just "continue")
"""
try:
# Validate session_id
if not session_id or not isinstance(session_id, str):
return {
"status": "error",
"error": "Valid session_id is required",
"suggestion": "Use the same session_id from start_presentation"
}
session_id = session_id.strip()
fsm = orchestrator.get_session(session_id)
if not fsm:
return {
"status": "error",
"error": "Session not found. Please start a new presentation first.",
"suggestion": "Call start_presentation to begin"
}
current_state = fsm.state.name
if current_state in ["INIT"]:
# Generate outline (this now handles file processing internally)
prompt = fsm.context.metadata.get("original_prompt", "")
n_slides = fsm.context.metadata.get("n_slides", 8)
language = fsm.context.metadata.get("language", "English")
files = fsm.context.metadata.get("files", None)
if not prompt:
return {
"status": "error",
"error": "No prompt found in session. Please start over.",
"suggestion": "Call start_presentation with a valid prompt"
}
# Pass files to outline generation if they exist
kwargs = {"n_slides": n_slides, "language": language}
if files:
kwargs["files"] = files
result = await orchestrator.execute_generate_outline(
session_id, prompt, **kwargs
)
if result["status"] == "success":
return {
"status": "success",
"session_id": session_id,
"message": "Here's your presentation outline:",
"title": result["result"]["title"],
"outlines": result["result"]["outlines"],
"files_processed": bool(files),
"suggestion": "Take a look at the outline. If it looks good, use 'continue_workflow' again to proceed to layout selection.",
"next_step": "Call continue_workflow again to choose layouts"
}
return result
elif current_state == "OUTLINE_GENERATED":
# Auto-approve and move to layouts
await orchestrator.approve_outline(session_id)
layouts = await orchestrator.get_available_layouts()
return {
"status": "success",
"session_id": session_id,
"message": "Great! Now let's choose a visual style for your presentation.",
"available_layouts": layouts,
"suggestion": "Choose a layout that fits your content and audience. Use 'choose_layout' with the layout name.",
"next_step": "Call choose_layout with your preferred layout name"
}
elif current_state == "LAYOUT_SELECTED":
# Generate presentation
result = await orchestrator.execute_presentation_generation(session_id)
if result["status"] == "success":
return {
"status": "success",
"session_id": session_id,
"message": "🎉 Your presentation is ready!",
"title": result["result"]["title"],
"presentation_id": result["result"]["presentation_id"],
"suggestion": "Your presentation has been generated successfully! Use 'export_presentation' to download it.",
"next_step": "Call export_presentation with format 'pptx' or 'pdf'"
}
return result
else:
return {
"status": "info",
"message": f"Currently in {current_state} state.",
"suggestion": "Use get_status to see what actions are available.",
"next_step": "Call get_status for guidance"
}
except Exception as e:
return {
"status": "error",
"error": f"Workflow error: {str(e)}",
"session_id": session_id if 'session_id' in locals() else "unknown",
"suggestion": "Use get_status to check your current progress"
}
return continue_workflow

View file

@ -0,0 +1,51 @@
from typing import Dict, Any
def register_export_presentation(mcp, orchestrator):
"""Register all workflow-related tools for chat-based interaction"""
@mcp.tool("export_presentation")
async def export_presentation(
session_id: str,
format: str = "pptx",
export_path: str = None
) -> Dict[str, Any]:
"""
📁 Download your finished presentation in your preferred format.
Export your completed presentation as:
- "pptx" - PowerPoint format (editable, best for sharing and presenting)
- "pdf" - PDF format (read-only, best for viewing and printing)
The exported file will be ready for download immediately.
Args:
session_id: Your presentation session ID
format: Export format - either "pptx" or "pdf"
"""
try:
if format.lower() not in ["pdf", "pptx"]:
return {
"status": "error",
"error": "Please choose either 'pdf' or 'pptx' format",
"session_id": session_id
}
result = await orchestrator.execute_export(session_id, format.lower())
print("Export result:", result)
if result["status"] == "success":
return {
"status": "success",
"session_id": session_id,
"message": f"🎉 Your presentation has been exported as {format.upper()}!",
"path": result["result"]["path"],
"suggestion": "You can download it now, or start creating another presentation."
}
return result
except Exception as e:
return {
"status": "error",
"error": str(e),
"session_id": session_id
}
return export_presentation

View file

@ -0,0 +1,83 @@
from typing import Dict, Any, Optional, List
def register_get_status(mcp, orchestrator):
"""Register all workflow-related tools for chat-based interaction"""
@mcp.tool("get_status")
def get_status(session_id: str) -> Dict[str, Any]:
"""
📊 Check your presentation creation progress.
See exactly where you are in the process:
- What step you're currently on
- How much progress you've made
- What you can do next
- Any issues that need attention
Perfect for checking in if you're unsure what to do next!
Args:
session_id: Your presentation session ID
"""
try:
if not session_id or not isinstance(session_id, str):
return {
"status": "error",
"error": "Valid session_id is required"
}
session_id = session_id.strip()
status = orchestrator.get_workflow_status(session_id)
if "error" in status:
return {
"status": "error",
"error": "Session not found. Start a new presentation with 'start_presentation'.",
"available_sessions": list(orchestrator._active_sessions.keys())
}
state = status["current_state"]
# Provide user-friendly status messages
friendly_messages = {
"INIT": "Ready to start! Use 'start_presentation' to begin.",
"OUTLINE_REQUESTED": "Generating outline with file analysis if applicable.",
"OUTLINE_GENERATED": "Outline created. Use 'continue_workflow' to proceed to layouts.",
"OUTLINE_APPROVED": "Outline approved. Use 'choose_layout' to select a theme.",
"LAYOUT_SELECTED": "Layout chosen. Use 'continue_workflow' to generate presentation.",
"PRESENTATION_READY": "Presentation generated! Use 'export_presentation' to download.",
"EXPORT_COMPLETE": "All done! Presentation exported successfully."
}
next_actions = {
"INIT": "start_presentation",
"OUTLINE_REQUESTED": "Wait for outline generation to complete",
"OUTLINE_GENERATED": "continue_workflow",
"OUTLINE_APPROVED": "choose_layout",
"LAYOUT_SELECTED": "continue_workflow",
"PRESENTATION_READY": "export_presentation",
"EXPORT_COMPLETE": "Download file or start_presentation for new one"
}
return {
"status": "success",
"session_id": session_id,
"current_step": state,
"progress": f"{status['progress']:.0f}%",
"message": friendly_messages.get(state, f"Currently in {state} state"),
"next_action": next_actions.get(state, status["next_action"]),
"context": {
"prompt": status["context"].get("metadata", {}).get("original_prompt"),
"n_slides": status["context"].get("metadata", {}).get("n_slides"),
"language": status["context"].get("metadata", {}).get("language")
}
}
except Exception as e:
return {
"status": "error",
"error": f"Status check failed: {str(e)}",
"suggestion": "Try start_presentation to begin a new session"
}
return get_status

View file

@ -0,0 +1,48 @@
from typing import Dict, Any, Optional, List
def register_help_me(mcp, orchestrator):
"""Register all workflow-related tools for chat-based interaction"""
@mcp.tool("help")
def help() -> Dict[str, Any]:
"""
Get help and guidance for creating presentations.
Shows you:
- Step-by-step workflow guide
- Available commands and what they do
- Example usage to get you started
- Tips for best results
Perfect for first-time users or when you need a refresher!
"""
return {
"status": "info",
"message": "🎯 Complete Guide to Creating Presentations",
"workflow": {
"step_1": "🚀 start_presentation - Begin with your topic and optional files",
"step_2": "📋 continue_workflow - Generate and review your outline",
"step_3": "🎨 choose_layout - Pick a visual style that fits your content",
"step_4": "⚡ continue_workflow - Generate your complete presentation",
"step_5": "📁 export_presentation - Download as PowerPoint or PDF"
},
"helpful_commands": {
"get_status": "📊 Check your current progress anytime",
"show_layouts": "👀 Browse available themes and styles",
"help": "❓ Show this helpful guide"
},
"quick_start": {
"with_files": "start_presentation(session_id='my-session', prompt='Your topic', files=[uploaded_files])",
"text_only": "start_presentation(session_id='my-session', prompt='Create a presentation about sustainable energy')",
"custom": "start_presentation(session_id='my-session', prompt='Your topic', n_slides=10, language='Spanish')"
},
"tips": [
"💡 Be specific in your prompt for better results",
"📎 Upload relevant files to enhance your content",
"🎨 Choose layouts that match your audience and purpose",
"📊 Use get_status anytime to see what's next"
]
}
return help

View file

@ -0,0 +1,39 @@
from typing import Dict, Any
def register_show_layouts(mcp, orchestrator):
"""Register all workflow-related tools for chat-based interaction"""
@mcp.tool("show_layouts")
async def show_layouts(session_id: str) -> Dict[str, Any]:
"""
👀 Browse all available presentation themes and layouts.
See the complete list of professional layouts including:
- Business and corporate themes
- Creative and modern designs
- Academic and educational styles
- Technical and data-focused layouts
Each layout comes with its own color scheme, fonts, and slide structures.
Args:
session_id: Your presentation session ID
"""
try:
layouts = await orchestrator.get_available_layouts()
return {
"status": "success",
"session_id": session_id,
"message": "Here are all the available presentation layouts:",
"layouts": layouts,
"suggestion": "Choose one using 'choose_layout' with the layout name."
}
except Exception as e:
return {
"status": "error",
"error": str(e),
"session_id": session_id
}
return show_layouts

View file

@ -0,0 +1,111 @@
from typing import List, Dict, Any, Optional
def register_start_presentation(mcp, orchestrator):
"""Register all workflow-related tools for chat-based interaction"""
@mcp.tool("start_presentation")
async def start_presentation(
session_id: str,
prompt: str,
files: Optional[List] = None,
n_slides: int = 8,
language: str = "English"
) -> Dict[str, Any]:
"""
🚀 Start creating a new presentation with your idea!
This is your entry point to create presentations. You can:
- Start with just a text prompt describing what you want
- Upload files (PDFs, docs, etc.) to base your presentation on
- Specify how many slides you want (default: 8)
- Choose the language for your presentation
Examples:
- "Create a presentation about climate change solutions"
- "Make slides about our Q4 financial results" (with uploaded files)
- "Build a training deck for new employees"
Args:
session_id: Unique identifier for your presentation session
prompt: Describe what your presentation should be about
files: Optional list of files to analyze and include
n_slides: Number of slides to generate (default: 8)
language: Presentation language (default: English)
"""
try:
if not session_id or not isinstance(session_id, str) or len(session_id.strip()) == 0:
return {
"status": "error",
"error": "Session ID is required and must be a non-empty string",
"example": "Use something like: session_id='my_presentation_123'"
}
if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0:
return {
"status": "error",
"error": "Prompt is required and must be a non-empty string",
"example": "prompt='Create a presentation about AI in healthcare'"
}
# Clean session_id
session_id = session_id.strip()
# Create session
orchestrator.create_session(session_id)
# Store initial parameters
fsm = orchestrator.get_session(session_id)
if not fsm:
return {
"status": "error",
"error": "Failed to create session",
"session_id": session_id
}
fsm.context.metadata.update({
"original_prompt": prompt.strip(),
"n_slides": max(1, min(50, n_slides)), # Validate slide count
"language": language.strip() if language else "English"
})
# Debug log to verify metadata update
print("DEBUG: Metadata after update:", fsm.context.metadata)
# Handle files if provided - store them in context for later use
if files and len(files) > 0:
# Store files in context for integrated processing during outline generation
fsm.context.metadata.update({
"files": files
})
return {
"status": "success",
"session_id": session_id,
"message": "Great! I've received your files and will analyze them during presentation creation.",
"prompt": prompt,
"files_count": len(files),
"suggestion": f"Now I'll create a presentation outline based on your prompt '{prompt}' and analyze the uploaded files. Use 'continue_workflow' to proceed.",
"next_step": "Call continue_workflow to generate the outline with file analysis"
}
else:
# Direct outline generation without files
return {
"status": "success",
"session_id": session_id,
"message": f"Perfect! Let's create a presentation about: '{prompt}'",
"suggestion": "I'll generate an outline with the key topics and structure. Use 'continue_workflow' to proceed.",
"next_step": "Call continue_workflow to generate the outline",
"parameters": {
"n_slides": fsm.context.metadata.get("n_slides", 8), # Ensure n_slides is retrieved correctly
"language": fsm.context.metadata.get("language", "English") # Ensure language is retrieved correctly
}
}
except Exception as e:
return {
"status": "error",
"error": f"Unexpected error: {str(e)}",
"session_id": session_id if 'session_id' in locals() else "unknown",
"suggestion": "Please try again with a valid session_id and prompt"
}
return start_presentation

View file

@ -0,0 +1,52 @@
from typing import Dict, Any
from models.sql.presentation import PresentationModel
from models.sql.slide import SlideModel
from models.presentation_from_template import GetPresentationUsingTemplateRequest
from utils.dict_utils import deep_update
from utils.export_utils import export_presentation
from sqlmodel import select
from fastapi import HTTPException
class EditFromTemplateTools:
def __init__(self):
pass
def register(self, mcp):
@mcp.tool("edit_from_template")
async def edit_from_template(
data: GetPresentationUsingTemplateRequest,
sql_session
) -> Dict[str, Any]:
"""
Create a new presentation from a template and updated slide data, then export.
"""
presentation = await sql_session.get(PresentationModel, data.presentation_id)
if not presentation:
raise HTTPException(status_code=404, detail="Presentation not found")
slides = await sql_session.scalars(
select(SlideModel).where(SlideModel.presentation == data.presentation_id)
)
new_presentation = presentation.get_new_presentation()
new_slides = []
for each_slide in slides:
updated_content = None
new_slide_data = list(filter(lambda x: x.index == each_slide.index, data.data))
if new_slide_data:
updated_content = deep_update(each_slide.content, new_slide_data[0].content)
new_slides.append(
each_slide.get_new_slide(new_presentation.id, updated_content)
)
sql_session.add(new_presentation)
sql_session.add_all(new_slides)
await sql_session.commit()
presentation_and_path = await export_presentation(
new_presentation.id, new_presentation.title, data.export_as
)
return {
**presentation_and_path.model_dump(),
"edit_path": f"/presentation?id={new_presentation.id}",
}

View file

@ -0,0 +1,87 @@
import json
import os
from typing import Dict, Any, Optional, List, Annotated
from models.presentation_outline_model import PresentationOutlineModel
from utils.llm_calls.generate_presentation_outlines import generate_ppt_outline
from services import TEMP_FILE_SERVICE
from services.documents_loader import DocumentsLoader
from services.score_based_chunker import ScoreBasedChunker
from utils.validators import validate_files
from fastapi import UploadFile, File
from constants.documents import UPLOAD_ACCEPTED_FILE_TYPES
import asyncio
async def generate_outline(
prompt: str,
n_slides: int = 8,
language: str = "English",
files: Annotated[Optional[List[UploadFile]], File()] = None,
) -> Dict[str, Any]:
"""
Generate presentation outlines given a prompt, number of slides, language, optional summary, and files.
Files are now processed directly within this function instead of a separate step.
Returns the parsed outline data.
"""
validate_files(files, True, True, 50, UPLOAD_ACCEPTED_FILE_TYPES)
temp_dir = TEMP_FILE_SERVICE.create_temp_dir()
file_paths = []
if files:
for upload in files:
file_path = os.path.join(temp_dir, upload.filename)
with open(file_path, "wb") as f:
f.write(await upload.read())
file_paths.append(file_path)
presentation_outlines = None
additional_context = ""
if file_paths:
documents_loader = DocumentsLoader(file_paths=file_paths)
await documents_loader.load_documents(temp_dir)
documents = documents_loader.documents
if documents:
additional_context = documents[0]
chunker = ScoreBasedChunker()
try:
chunks = await chunker.get_n_chunks(documents[0], n_slides)
presentation_outlines = PresentationOutlineModel(
slides=[chunk.to_slide_outline() for chunk in chunks]
)
except Exception as e:
print(e)
if not presentation_outlines:
presentation_outlines_text = ""
async for chunk in generate_ppt_outline(
prompt,
n_slides,
language,
additional_context,
):
# Give control to the event loop
await asyncio.sleep(0)
presentation_outlines_text += chunk
presentation_outlines_json = json.loads(presentation_outlines_text)
presentation_outlines = PresentationOutlineModel(**presentation_outlines_json)
# Truncate slides to n_slides
presentation_outlines.slides = presentation_outlines.slides[:n_slides]
# Compose title from first slide
title = (
presentation_outlines.slides[0][:50]
.replace("#", "")
.replace("/", "")
.replace("\\", "")
.replace("\n", "")
)
# Prepare outlines list
outlines = presentation_outlines.model_dump(mode="json")
return {
"title": title,
"outlines": outlines,
}

View file

@ -0,0 +1,8 @@
from typing import List, Any
from api.v1.ppt.endpoints.layouts import get_layouts
async def list_layouts() -> List[Any]:
"""
Retrieve and return a list of all available presentation layouts.
"""
return await get_layouts()

View file

@ -0,0 +1,25 @@
from typing import Literal, Dict, Any
from utils.export_utils import export_presentation
# Standalone function for workflow orchestrator
async def export_presentation_and_get_path(
presentation_id: str,
title: str,
export_as: Literal["pptx", "pdf"] = "pptx"
) -> Dict[str, Any]:
"""
Export the presentation and return the export path and edit path.
"""
presentation_and_path = await export_presentation(
presentation_id, title, export_as
)
# model_dump() is assumed to return a dict with the export path and related info
data = presentation_and_path.model_dump()
print("Exported presentation data:", data)
# Map export_path to path if needed
return {
**data,
"edit_path": f"/presentation?id={presentation_id}",
"export_path": data["path"],
}

View file

@ -0,0 +1,126 @@
import random
from typing import List, Dict, Any, Optional
from models.presentation_layout import PresentationLayoutModel
from models.presentation_structure_model import PresentationStructureModel
from models.sql.presentation import PresentationModel
from models.sql.slide import SlideModel
from utils.get_layout_by_name import get_layout_by_name
from utils.llm_calls.generate_presentation_structure import (
generate_presentation_structure,
)
from utils.llm_calls.generate_slide_content import (
get_slide_content_from_type_and_outline,
)
from services.image_generation_service import ImageGenerationService
from services.icon_finder_service import IconFinderService
from utils.asset_directory_utils import get_images_directory
from utils.process_slides import process_slide_and_fetch_assets
from models.presentation_outline_model import PresentationOutlineModel
from utils.randomizers import get_random_uuid
import asyncio
from sqlalchemy.ext.asyncio import AsyncSession
# Standalone function for workflow orchestrator
async def process_post_outline_workflow(
title: str,
outlines: List[str],
layout: str = "general",
language: str = "English",
prompt: str = "",
n_slides: int = 8,
sql_session: Optional[AsyncSession] = None,
) -> Dict[str, Any]:
"""
Process the workflow after outlines are generated: layout, structure, slides, assets, save, and ask for export.
"""
# 1. Parse Layout
layout_model: PresentationLayoutModel = await get_layout_by_name(layout)
total_slide_layouts = len(layout_model.slides)
# 2. Generate Structure
if layout_model.ordered:
presentation_structure = layout_model.to_presentation_structure()
else:
presentation_structure: PresentationStructureModel = (
await generate_presentation_structure(
presentation_outline=PresentationOutlineModel(
slides=outlines,
),
presentation_layout=layout_model,
)
)
presentation_structure.slides = presentation_structure.slides[:n_slides]
for index in range(n_slides):
random_slide_index = random.randint(0, total_slide_layouts - 1)
if index >= n_slides:
presentation_structure.slides.append(random_slide_index)
continue
if presentation_structure.slides[index] >= total_slide_layouts:
presentation_structure.slides[index] = random_slide_index
# 3. Create PresentationModel
presentation_id = get_random_uuid()
presentation = PresentationModel(
id=presentation_id,
title=title,
n_slides=n_slides,
language=language,
outlines=outlines,
prompt=prompt,
layout=layout_model.model_dump(),
structure=presentation_structure.model_dump(),
)
image_generation_service = ImageGenerationService(get_images_directory())
icon_finder_service = IconFinderService()
async_asset_generation_tasks = []
# 4. Generate slide content and save slides
slides: List[SlideModel] = []
for i, slide_layout_index in enumerate(presentation_structure.slides):
slide_layout = layout_model.slides[slide_layout_index]
slide_content = await get_slide_content_from_type_and_outline(
slide_layout, outlines[i], language
)
slide = SlideModel(
presentation=presentation_id,
layout_group=layout_model.name,
layout=slide_layout.id,
index=i,
content=slide_content,
)
async_asset_generation_tasks.append(
process_slide_and_fetch_assets(
image_generation_service, icon_finder_service, slide
)
)
slides.append(slide)
generated_assets_lists = await asyncio.gather(*async_asset_generation_tasks)
generated_assets = []
for assets_list in generated_assets_lists:
generated_assets.extend(assets_list)
# 5. Save PresentationModel and Slides
if sql_session is None:
from services.database import get_async_session
async for session in get_async_session():
session.add(presentation)
session.add_all(slides)
session.add_all(generated_assets)
await session.commit()
else:
sql_session.add(presentation)
sql_session.add_all(slides)
sql_session.add_all(generated_assets)
await sql_session.commit()
# 6. Ask user if they want to export and in which format
return {
"presentation_id": presentation_id,
"title": title,
"message": "Presentation is ready. Would you like to export? (pdf or pptx)",
"export_options": ["pdf", "pptx"],
}

View file

@ -1,25 +0,0 @@
{
"_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
"architectures": [
"BertModel"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 384,
"initializer_range": 0.02,
"intermediate_size": 1536,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 6,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"transformers_version": "4.27.4",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 30522
}

View file

@ -1,7 +0,0 @@
{
"cls_token": "[CLS]",
"mask_token": "[MASK]",
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"unk_token": "[UNK]"
}

File diff suppressed because it is too large Load diff

View file

@ -1,15 +0,0 @@
{
"cls_token": "[CLS]",
"do_basic_tokenize": true,
"do_lower_case": true,
"mask_token": "[MASK]",
"model_max_length": 512,
"never_split": null,
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"special_tokens_map_file": "/Users/hammad/.cache/huggingface/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/7dbbc90392e2f80f3d3c277d6e90027e55de9125/special_tokens_map.json",
"strip_accents": null,
"tokenize_chinese_chars": true,
"tokenizer_class": "BertTokenizer",
"unk_token": "[UNK]"
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,6 @@
OPENAI_URL = "https://api.openai.com/v1"
# Default models
DEFAULT_OPENAI_MODEL = "gpt-4.1"
DEFAULT_GOOGLE_MODEL = "models/gemini-2.0-flash"
DEFAULT_ANTHROPIC_MODEL = "claude-3-5-sonnet-20240620"

View file

@ -5,81 +5,61 @@ SUPPORTED_OLLAMA_MODELS = {
"llama3:8b": OllamaModelMetadata(
label="Llama 3:8b",
value="llama3:8b",
description="❌ Graphs not supported.",
size="4.7GB",
supports_graph=False,
icon="/static/icons/meta.png",
),
"llama3:70b": OllamaModelMetadata(
label="Llama 3:70b",
value="llama3:70b",
description="✅ Graphs supported.",
size="40GB",
supports_graph=True,
icon="/static/icons/meta.png",
),
"llama3.1:8b": OllamaModelMetadata(
label="Llama 3.1:8b",
value="llama3.1:8b",
description="❌ Graphs not supported.",
size="4.9GB",
supports_graph=False,
icon="/static/icons/meta.png",
),
"llama3.1:70b": OllamaModelMetadata(
label="Llama 3.1:70b",
value="llama3.1:70b",
description="✅ Graphs supported.",
size="43GB",
supports_graph=True,
icon="/static/icons/meta.png",
),
"llama3.1:405b": OllamaModelMetadata(
label="Llama 3.1:405b",
value="llama3.1:405b",
description="✅ Graphs supported.",
size="243GB",
supports_graph=True,
icon="/static/icons/meta.png",
),
"llama3.2:1b": OllamaModelMetadata(
label="Llama 3.2:1b",
value="llama3.2:1b",
description="❌ Graphs not supported.",
size="1.3GB",
supports_graph=False,
icon="/static/icons/meta.png",
),
"llama3.2:3b": OllamaModelMetadata(
label="Llama 3.2:3b",
value="llama3.2:3b",
description="❌ Graphs not supported.",
size="2GB",
supports_graph=False,
icon="/static/icons/meta.png",
),
"llama3.3:70b": OllamaModelMetadata(
label="Llama 3.3:70b",
value="llama3.3:70b",
description="✅ Graphs supported.",
size="43GB",
supports_graph=True,
icon="/static/icons/meta.png",
),
"llama4:16x17b": OllamaModelMetadata(
label="Llama 4:16x17b",
value="llama4:16x17b",
description="✅ Graphs supported.",
size="67GB",
supports_graph=True,
icon="/static/icons/meta.png",
),
"llama4:128x17b": OllamaModelMetadata(
label="Llama 4:128x17b",
value="llama4:128x17b",
description="✅ Graphs supported.",
size="245GB",
supports_graph=True,
icon="/static/icons/meta.png",
),
}
@ -88,33 +68,25 @@ SUPPORTED_GEMMA_MODELS = {
"gemma3:1b": OllamaModelMetadata(
label="Gemma 3:1b",
value="gemma3:1b",
description="❌ Graphs not supported.",
size="815MB",
supports_graph=False,
icon="/static/icons/gemma.png",
),
"gemma3:4b": OllamaModelMetadata(
label="Gemma 3:4b",
value="gemma3:4b",
description="❌ Graphs not supported.",
size="3.3GB",
supports_graph=False,
icon="/static/icons/gemma.png",
),
"gemma3:12b": OllamaModelMetadata(
label="Gemma 3:12b",
value="gemma3:12b",
description="❌ Graphs not supported.",
size="8.1GB",
supports_graph=False,
icon="/static/icons/gemma.png",
),
"gemma3:27b": OllamaModelMetadata(
label="Gemma 3:27b",
value="gemma3:27b",
description="✅ Graphs supported.",
size="17GB",
supports_graph=True,
icon="/static/icons/gemma.png",
),
}
@ -123,57 +95,43 @@ SUPPORTED_DEEPSEEK_MODELS = {
"deepseek-r1:1.5b": OllamaModelMetadata(
label="DeepSeek R1:1.5b",
value="deepseek-r1:1.5b",
description="❌ Graphs not supported.",
size="1.1GB",
supports_graph=False,
icon="/static/icons/deepseek.png",
),
"deepseek-r1:7b": OllamaModelMetadata(
label="DeepSeek R1:7b",
value="deepseek-r1:7b",
description="❌ Graphs not supported.",
size="4.7GB",
supports_graph=False,
icon="/static/icons/deepseek.png",
),
"deepseek-r1:8b": OllamaModelMetadata(
label="DeepSeek R1:8b",
value="deepseek-r1:8b",
description="❌ Graphs not supported.",
size="5.2GB",
supports_graph=False,
icon="/static/icons/deepseek.png",
),
"deepseek-r1:14b": OllamaModelMetadata(
label="DeepSeek R1:14b",
value="deepseek-r1:14b",
description="❌ Graphs not supported.",
size="9GB",
supports_graph=False,
icon="/static/icons/deepseek.png",
),
"deepseek-r1:32b": OllamaModelMetadata(
label="DeepSeek R1:32b",
value="deepseek-r1:32b",
description="✅ Graphs supported.",
size="20GB",
supports_graph=True,
icon="/static/icons/deepseek.png",
),
"deepseek-r1:70b": OllamaModelMetadata(
label="DeepSeek R1:70b",
value="deepseek-r1:70b",
description="✅ Graphs supported.",
size="43GB",
supports_graph=True,
icon="/static/icons/deepseek.png",
),
"deepseek-r1:671b": OllamaModelMetadata(
label="DeepSeek R1:671b",
value="deepseek-r1:671b",
description="✅ Graphs supported.",
size="404GB",
supports_graph=True,
icon="/static/icons/deepseek.png",
),
}
@ -182,65 +140,49 @@ SUPPORTED_QWEN_MODELS = {
"qwen3:0.6b": OllamaModelMetadata(
label="Qwen 3:0.6b",
value="qwen3:0.6b",
description="❌ Graphs not supported.",
size="523MB",
supports_graph=False,
icon="/static/icons/qwen.png",
),
"qwen3:1.7b": OllamaModelMetadata(
label="Qwen 3:1.7b",
value="qwen3:1.7b",
description="❌ Graphs not supported.",
size="1.4GB",
supports_graph=False,
icon="/static/icons/qwen.png",
),
"qwen3:4b": OllamaModelMetadata(
label="Qwen 3:4b",
value="qwen3:4b",
description="❌ Graphs not supported.",
size="2.6GB",
supports_graph=False,
icon="/static/icons/qwen.png",
),
"qwen3:8b": OllamaModelMetadata(
label="Qwen 3:8b",
value="qwen3:8b",
description="❌ Graphs not supported.",
size="5.2GB",
supports_graph=False,
icon="/static/icons/qwen.png",
),
"qwen3:14b": OllamaModelMetadata(
label="Qwen 3:14b",
value="qwen3:14b",
description="❌ Graphs not supported.",
size="9.3GB",
supports_graph=False,
icon="/static/icons/qwen.png",
),
"qwen3:30b": OllamaModelMetadata(
label="Qwen 3:30b",
value="qwen3:30b",
description="✅ Graphs supported.",
size="19GB",
supports_graph=True,
icon="/static/icons/qwen.png",
),
"qwen3:32b": OllamaModelMetadata(
label="Qwen 3:32b",
value="qwen3:32b",
description="✅ Graphs supported.",
size="20GB",
supports_graph=True,
icon="/static/icons/qwen.png",
),
"qwen3:235b": OllamaModelMetadata(
label="Qwen 3:235b",
value="qwen3:235b",
description="✅ Graphs supported.",
size="142GB",
supports_graph=True,
icon="/static/icons/qwen.png",
),
}

View file

@ -5,4 +5,5 @@ class LLMProvider(Enum):
OLLAMA = "ollama"
OPENAI = "openai"
GOOGLE = "google"
ANTHROPIC = "anthropic"
CUSTOM = "custom"

View file

@ -1,419 +0,0 @@
import json
from typing import List, Literal, Optional
from pydantic import BaseModel, Field, HttpUrl, EmailStr
from models.presentation_layout import PresentationLayoutModel, SlideLayoutModel
from models.presentation_outline_model import PresentationOutlineModel
from utils.dict_utils import get_dict_at_path, get_dict_paths_with_key
from utils.schema_utils import remove_fields_from_schema
class ContactInfoModel(BaseModel):
email: Optional[EmailStr] = Field(None, description="Contact email")
phone: Optional[str] = Field(
None, min_length=5, max_length=50, description="Contact phone number"
)
website: Optional[HttpUrl] = Field(None, description="Website URL")
class ImageModel(BaseModel):
__image_url__: str = Field(description="Image URL")
__image_prompt__: str = Field(description="Image prompt")
# First Slide Layout
class FirstSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Main title of the presentation",
)
subtitle: Optional[str] = Field(
min_length=10, max_length=200, description="Optional subtitle or tagline"
)
author: Optional[str] = Field(
min_length=2,
max_length=100,
description="Author or presenter name",
)
date: Optional[str] = Field(description="Presentation date")
company: Optional[str] = Field(
min_length=2,
max_length=100,
description="Company or organization name",
)
backgroundImage: Optional[ImageModel] = Field(
description="Background image for the slide"
)
# Bullet Point Slide Layout
class BulletPointSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Title of the slide",
)
subtitle: Optional[str] = Field(
min_length=3,
max_length=150,
description="Optional subtitle or description",
)
icon: Optional[str] = Field(description="Icon to display in the slide")
bulletPoints: List[str] = Field(
min_length=2,
max_length=8,
description="List of bullet points (2-8 items)",
)
# Image Slide Layout
class ImageSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Title of the slide",
)
subtitle: Optional[str] = Field(
min_length=3,
max_length=150,
description="Optional subtitle or description",
)
image: HttpUrl = Field(
description="Main image URL",
)
imageCaption: Optional[str] = Field(
min_length=5,
max_length=200,
description="Optional image caption or description",
)
content: Optional[str] = Field(
min_length=10,
max_length=600,
description="Optional supporting content text",
)
backgroundImage: Optional[HttpUrl] = Field(
description="URL to background image for the slide"
)
# Statistics Slide Layout
class StatisticItemModel(BaseModel):
value: str = Field(
min_length=1,
max_length=20,
description="Statistical value (e.g., '250%', '$1.2M', '99.9%')",
)
label: str = Field(
min_length=3, max_length=100, description="Description of the statistic"
)
trend: Optional[str] = Field(
description="Trend direction indicator", pattern="^(up|down|neutral)$"
)
context: Optional[str] = Field(
min_length=5,
max_length=200,
description="Additional context or time period",
)
class StatisticsSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Title of the slide",
)
subtitle: Optional[str] = Field(
min_length=3,
max_length=150,
description="Optional subtitle or description",
)
statistics: List[StatisticItemModel] = Field(
min_length=2,
max_length=6,
description="List of statistics (2-6 items)",
)
backgroundImage: Optional[HttpUrl] = Field(
description="URL to background image for the slide"
)
# Quote Slide Layout
class QuoteSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Title of the slide",
)
subtitle: Optional[str] = Field(
min_length=3,
max_length=150,
description="Optional subtitle or description",
)
quote: str = Field(
min_length=10,
max_length=500,
description="The main quote or testimonial",
)
author: str = Field(
min_length=2,
max_length=100,
description="Quote author name",
)
authorTitle: Optional[str] = Field(
min_length=2, max_length=100, description="Author job title or position"
)
company: Optional[str] = Field(
min_length=2, max_length=100, description="Author company or organization"
)
authorImage: Optional[HttpUrl] = Field(description="URL to author photo")
backgroundImage: Optional[HttpUrl] = Field(
description="URL to background image for the slide"
)
# Timeline Slide Layout
class TimelineItemModel(BaseModel):
date: str = Field(min_length=2, max_length=50, description="Date or time period")
title: str = Field(
min_length=3, max_length=100, description="Event or milestone title"
)
description: str = Field(
min_length=10, max_length=300, description="Event description"
)
status: str = Field(
description="Timeline item status",
pattern="^(completed|current|upcoming)$",
)
class TimelineSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Title of the slide",
)
subtitle: Optional[str] = Field(
min_length=3,
max_length=150,
description="Optional subtitle or description",
)
timelineItems: List[TimelineItemModel] = Field(
min_length=2,
max_length=6,
description="Timeline events (2-6 items)",
)
backgroundImage: Optional[HttpUrl] = Field(
description="URL to background image for the slide"
)
# Team Slide Layout
class TeamMemberModel(BaseModel):
name: str = Field(min_length=2, max_length=100, description="Team member name")
title: str = Field(min_length=2, max_length=100, description="Job title or role")
image: Optional[HttpUrl] = Field(description="URL to team member photo")
bio: Optional[str] = Field(
min_length=10,
max_length=300,
description="Brief biography or description",
)
email: Optional[EmailStr] = Field(description="Contact email")
linkedin: Optional[HttpUrl] = Field(description="LinkedIn profile URL")
class TeamSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Title of the slide",
)
subtitle: Optional[str] = Field(
min_length=3,
max_length=150,
description="Optional subtitle or team description",
)
teamMembers: List[TeamMemberModel] = Field(
min_length=1,
max_length=6,
description="Team members (1-6 people)",
)
backgroundImage: Optional[HttpUrl] = Field(
description="URL to background image for the slide"
)
# Process Slide Layout
class ProcessStepModel(BaseModel):
step: int = Field(ge=1, le=10, description="Step number")
title: str = Field(min_length=3, max_length=100, description="Step title")
description: str = Field(
min_length=10, max_length=200, description="Step description"
)
class ProcessSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Title of the slide",
)
subtitle: Optional[str] = Field(
min_length=3,
max_length=150,
description="Optional subtitle or description",
)
processSteps: List[ProcessStepModel] = Field(
min_length=2,
max_length=6,
description="Process steps (2-6 items)",
)
backgroundImage: Optional[HttpUrl] = Field(
description="URL to background image for the slide"
)
# Two Column Slide Layout
class ColumnContentModel(BaseModel):
title: str = Field(min_length=3, max_length=100, description="Column title")
content: str = Field(min_length=10, max_length=800, description="Column content")
class TwoColumnSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Title of the slide",
)
subtitle: Optional[str] = Field(
min_length=3,
max_length=150,
description="Optional subtitle or description",
)
leftColumn: ColumnContentModel = Field(
description="Left column content",
)
rightColumn: ColumnContentModel = Field(
description="Right column content",
)
backgroundImage: Optional[HttpUrl] = Field(
description="URL to background image for the slide"
)
# Conclusion Slide Layout
class ConclusionSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Title of the slide",
)
subtitle: Optional[str] = Field(
min_length=3,
max_length=150,
description="Optional subtitle or description",
)
keyTakeaways: List[str] = Field(
min_length=2,
max_length=6,
description="Key takeaways or summary points (2-6 items)",
)
callToAction: Optional[str] = Field(
min_length=5,
max_length=150,
description="Optional call to action or next steps",
)
contactInfo: Optional[ContactInfoModel] = Field(
description="Optional contact information"
)
backgroundImage: Optional[HttpUrl] = Field(
description="URL to background image for the slide"
)
# Content Slide Layout
class ContentSlideModel(BaseModel):
title: str = Field(
min_length=3,
max_length=100,
description="Title of the slide",
)
subtitle: Optional[str] = Field(
min_length=3,
max_length=150,
description="Optional subtitle or description",
)
content: str = Field(
min_length=10,
max_length=1000,
description="Main content text",
)
backgroundImage: Optional[HttpUrl] = Field(
description="URL to background image for the slide"
)
# Create the presentation layout with all slide types
presentation_layout = PresentationLayoutModel(
name="Complete Presentation Layout",
slides=[
SlideLayoutModel(
id="first-slide",
name="First Slide",
json_schema=FirstSlideModel.model_json_schema(),
),
# SlideLayoutModel(
# id="bullet-point-slide",
# name="Bullet Point Slide",
# json_schema=BulletPointSlideModel.model_json_schema(),
# ),
# SlideLayoutModel(
# id="image-slide",
# name="Image Slide",
# json_schema=ImageSlideModel.model_json_schema(),
# ),
# SlideLayoutModel(
# id="statistics-slide",
# name="Statistics Slide",
# json_schema=StatisticsSlideModel.model_json_schema(),
# ),
# SlideLayoutModel(
# id="quote-slide",
# name="Quote Slide",
# json_schema=QuoteSlideModel.model_json_schema(),
# ),
# SlideLayoutModel(
# id="timeline-slide",
# name="Timeline Slide",
# json_schema=TimelineSlideModel.model_json_schema(),
# ),
# SlideLayoutModel(
# id="team-slide",
# name="Team Slide",
# json_schema=TeamSlideModel.model_json_schema(),
# ),
# SlideLayoutModel(
# id="process-slide",
# name="Process Slide",
# json_schema=ProcessSlideModel.model_json_schema(),
# ),
# SlideLayoutModel(
# id="two-column-slide",
# name="Two Column Slide",
# json_schema=TwoColumnSlideModel.model_json_schema(),
# ),
# SlideLayoutModel(
# id="conclusion-slide",
# name="Conclusion Slide",
# json_schema=ConclusionSlideModel.model_json_schema(),
# ),
# SlideLayoutModel(
# id="content-slide",
# name="Content Slide",
# json_schema=ContentSlideModel.model_json_schema(),
# ),
],
)
print(json.dumps(StatisticsSlideModel.model_json_schema()))

View file

@ -0,0 +1,24 @@
import sys
import os
import argparse
import asyncio
from app_mcp.server import create_mcp_server, uvicorn_config
async def main():
parser = argparse.ArgumentParser(description="Run the FastAPI server")
parser.add_argument(
"--port", type=int, default=8001, help="Port number to run the server on"
)
args = parser.parse_args()
mcp = create_mcp_server()
await mcp.run_async(
transport="http",
host="0.0.0.0",
port=args.port,
uvicorn_config=uvicorn_config
)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,11 @@
from pydantic import BaseModel
class DocumentChunk(BaseModel):
heading: str
content: str
heading_index: int
score: float
def to_slide_outline(self) -> str:
return f"{self.heading}\n{self.content}"

View file

@ -0,0 +1,7 @@
from typing import Literal
from pydantic import BaseModel
class LLMMessage(BaseModel):
role: Literal["user", "system"]
content: str

View file

@ -4,7 +4,5 @@ from pydantic import BaseModel
class OllamaModelMetadata(BaseModel):
label: str
value: str
description: str
icon: str
size: str
supports_graph: bool

View file

@ -1,4 +1,5 @@
from typing import List, Optional
from fastapi import HTTPException
from pydantic import BaseModel, Field
from models.presentation_structure_model import PresentationStructureModel
@ -12,10 +13,18 @@ class SlideLayoutModel(BaseModel):
class PresentationLayoutModel(BaseModel):
name: Optional[str] = None
name: str
ordered: bool = Field(default=False)
slides: List[SlideLayoutModel]
def get_slide_layout_index(self, slide_layout_id: str) -> int:
for index, slide in enumerate(self.slides):
if slide.id == slide_layout_id:
return index
raise HTTPException(
status_code=404, detail=f"Slide layout {slide_layout_id} not found"
)
def to_presentation_structure(self):
return PresentationStructureModel(
slides=[index for index in range(len(self.slides))]

View file

@ -1,32 +1,13 @@
from typing import List, Optional
from pydantic import BaseModel, Field
class SlideOutlineModel(BaseModel):
title: str = Field(
description="Title of the slide in about 3 to 5 words",
)
body: str = Field(
description="Content of the slide in markdown format",
)
from typing import List
from pydantic import BaseModel
class PresentationOutlineModel(BaseModel):
title: str = Field(
description="Title of the presentation in about 3 to 8 words",
)
notes: Optional[List[str]] = Field(default=None, description="Notes for the presentation")
slides: List[SlideOutlineModel] = Field(description="List of slides")
slides: List[str]
def to_string(self):
message = f"# Presentation Title: {self.title} \n\n"
message = ""
for i, slide in enumerate(self.slides):
message += f"## Slide {i+1}:\n"
message += f" - Title: {slide.title} \n"
message += f" - Body: {slide.body} \n"
if self.notes:
message += f"# Notes: \n"
for note in self.notes:
message += f" - {note} \n"
message += f" - Content: {slide} \n"
return message

View file

@ -4,7 +4,7 @@ from datetime import datetime
from pydantic import BaseModel
from models.presentation_layout import PresentationLayoutModel
from models.presentation_outline_model import SlideOutlineModel
from models.presentation_outline_model import PresentationOutlineModel
from models.presentation_structure_model import PresentationStructureModel
from models.sql.presentation import PresentationModel
from models.sql.slide import SlideModel
@ -16,9 +16,7 @@ class PresentationWithSlides(BaseModel):
n_slides: int
language: str
title: Optional[str] = None
notes: Optional[List[str]]
outlines: Optional[List[SlideOutlineModel]]
summary: Optional[str]
outlines: Optional[PresentationOutlineModel]
created_at: datetime
updated_at: datetime
layout: Optional[PresentationLayoutModel]

View file

@ -2,7 +2,7 @@ from datetime import datetime
from typing import Optional
from sqlalchemy import JSON, Column, DateTime
from sqlmodel import SQLModel, Field
from sqlmodel import Field, SQLModel
from utils.randomizers import get_random_uuid

View file

@ -1,4 +1,4 @@
from sqlmodel import SQLModel, Field, Column, JSON
from sqlmodel import Field, Column, JSON, SQLModel
from utils.randomizers import get_random_uuid

View file

@ -0,0 +1,8 @@
from datetime import datetime
from sqlmodel import Field, Column, JSON, SQLModel, DateTime
class OllamaPullStatus(SQLModel, table=True):
id: str = Field(primary_key=True)
last_updated: datetime = Field(sa_column=Column(DateTime, default=datetime.now))
status: dict = Field(sa_column=Column(JSON))

View file

@ -1,13 +1,10 @@
from datetime import datetime
from typing import List, Optional
from sqlalchemy import JSON, Column, DateTime
from sqlmodel import SQLModel, Field
from sqlmodel import Field, SQLModel
from models.presentation_layout import PresentationLayoutModel
from models.presentation_outline_model import (
PresentationOutlineModel,
SlideOutlineModel,
)
from models.presentation_outline_model import PresentationOutlineModel
from models.presentation_structure_model import PresentationStructureModel
from utils.randomizers import get_random_uuid
@ -18,9 +15,8 @@ class PresentationModel(SQLModel, table=True):
n_slides: int
language: str
title: Optional[str] = None
notes: Optional[List[str]] = Field(sa_column=Column(JSON), default=None)
outlines: Optional[List[dict]] = Field(sa_column=Column(JSON), default=None)
summary: Optional[str] = None
file_paths: Optional[List[str]] = Field(sa_column=Column(JSON), default=None)
outlines: Optional[dict] = Field(sa_column=Column(JSON), default=None)
created_at: datetime = Field(sa_column=Column(DateTime, default=datetime.now))
updated_at: datetime = Field(sa_column=Column(DateTime, default=datetime.now))
layout: Optional[dict] = Field(sa_column=Column(JSON), default=None)
@ -33,9 +29,8 @@ class PresentationModel(SQLModel, table=True):
n_slides=self.n_slides,
language=self.language,
title=self.title,
notes=self.notes,
file_paths=self.file_paths,
outlines=self.outlines,
summary=self.summary,
layout=self.layout,
structure=self.structure,
)
@ -43,11 +38,7 @@ class PresentationModel(SQLModel, table=True):
def get_presentation_outline(self):
if not self.outlines:
return None
return PresentationOutlineModel(
title=self.title,
slides=[SlideOutlineModel(**each) for each in self.outlines],
notes=self.notes,
)
return PresentationOutlineModel(**self.outlines)
def get_layout(self):
return PresentationLayoutModel(**self.layout)

View file

@ -1,5 +1,5 @@
from typing import Optional
from sqlmodel import SQLModel, Field, Column, JSON
from sqlmodel import Field, Column, JSON, SQLModel
from utils.randomizers import get_random_uuid

View file

@ -4,13 +4,34 @@ from pydantic import BaseModel
class UserConfig(BaseModel):
LLM: Optional[str] = None
# OpenAI
OPENAI_API_KEY: Optional[str] = None
OPENAI_MODEL: Optional[str] = None
# Google
GOOGLE_API_KEY: Optional[str] = None
GOOGLE_MODEL: Optional[str] = None
# Anthropic
ANTHROPIC_API_KEY: Optional[str] = None
ANTHROPIC_MODEL: Optional[str] = None
# Ollama
OLLAMA_URL: Optional[str] = None
OLLAMA_MODEL: Optional[str] = None
# Custom LLM
CUSTOM_LLM_URL: Optional[str] = None
CUSTOM_LLM_API_KEY: Optional[str] = None
CUSTOM_MODEL: Optional[str] = None
PEXELS_API_KEY: Optional[str] = None
# Image Provider
IMAGE_PROVIDER: Optional[str] = None
PEXELS_API_KEY: Optional[str] = None
PIXABAY_API_KEY: Optional[str] = None
# Reasoning
TOOL_CALLS: Optional[bool] = None
DISABLE_THINKING: Optional[bool] = None
EXTENDED_REASONING: Optional[bool] = None

View file

@ -0,0 +1,28 @@
[project]
name = "presenton-backend"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.11,<3.12"
dependencies = [
"aiohttp>=3.12.15",
"aiomysql>=0.2.0",
"aiosqlite>=0.21.0",
"anthropic>=0.60.0",
"asyncpg>=0.30.0",
"chromadb>=1.0.15",
"docling>=2.43.0",
"fastapi[standard]>=0.116.1",
"fastmcp>=2.11.0",
"google-genai>=1.28.0",
"nltk>=3.9.1",
"openai>=1.98.0",
"pathvalidate>=3.3.1",
"pdfplumber>=0.11.7",
"python-pptx>=1.0.2",
"redis>=6.2.0",
"sqlmodel>=0.0.24",
]
[[tool.uv.index]]
url = "https://download.pytorch.org/whl/cpu"

View file

@ -1,139 +0,0 @@
aiohappyeyeballs==2.6.1
aiohttp==3.12.14
aiomysql==0.2.0
aiosignal==1.4.0
aiosqlite==0.21.0
annotated-types==0.7.0
anthropic==0.60.0
anyio==4.9.0
async-timeout==5.0.1
asyncpg==0.30.0
attrs==25.3.0
backoff==2.2.1
bcrypt==4.3.0
build==1.2.2.post1
cachetools==5.5.2
certifi==2025.7.14
cffi==1.17.1
charset-normalizer==3.4.2
chromadb==1.0.15
click==8.2.1
coloredlogs==15.0.1
cryptography==45.0.5
distro==1.9.0
dnspython==2.7.0
durationpy==0.10
email_validator==2.2.0
fastapi==0.116.1
fastapi-cli==0.0.8
fastapi-cloud-cli==0.1.4
fastembed==0.7.1
filelock==3.18.0
flatbuffers==25.2.10
fonttools==4.59.0
frozenlist==1.7.0
fsspec==2025.7.0
google-auth==2.40.3
google-genai==1.25.0
googleapis-common-protos==1.70.0
greenlet==3.2.3
grpcio==1.74.0
h11==0.16.0
h2==4.2.0
hf-xet==1.1.5
hpack==4.1.0
httpcore==1.0.9
httptools==0.6.4
httpx==0.28.1
huggingface-hub==0.34.1
humanfriendly==10.0
hyperframe==6.1.0
idna==3.10
importlib_metadata==8.7.0
importlib_resources==6.5.2
iniconfig==2.1.0
Jinja2==3.1.6
jiter==0.10.0
jsonschema==4.25.0
jsonschema-specifications==2025.4.1
kubernetes==33.1.0
loguru==0.7.3
lxml==6.0.0
markdown-it-py==3.0.0
MarkupSafe==3.0.2
mdurl==0.1.2
mmh3==5.1.0
mpmath==1.3.0
multidict==6.6.3
numpy==2.3.2
oauthlib==3.3.1
onnxruntime==1.22.1
openai==1.95.1
opentelemetry-api==1.35.0
opentelemetry-exporter-otlp-proto-common==1.35.0
opentelemetry-exporter-otlp-proto-grpc==1.35.0
opentelemetry-proto==1.35.0
opentelemetry-sdk==1.35.0
opentelemetry-semantic-conventions==0.56b0
orjson==3.11.1
overrides==7.7.0
packaging==25.0
pathvalidate==3.3.1
pdfminer.six==20250506
pdfplumber==0.11.7
pillow==11.3.0
pluggy==1.6.0
portalocker==3.2.0
posthog==5.4.0
propcache==0.3.2
protobuf==6.31.1
py_rust_stemmers==0.1.5
pyasn1==0.6.1
pyasn1_modules==0.4.2
pybase64==1.4.2
pycparser==2.22
pydantic==2.11.7
pydantic_core==2.33.2
Pygments==2.19.2
pypdfium2==4.30.1
PyPika==0.48.9
pyproject_hooks==1.2.0
pytest==8.4.1
python-dateutil==2.9.0.post0
python-docx==1.2.0
python-dotenv==1.1.1
python-multipart==0.0.20
python-pptx==1.0.2
PyYAML==6.0.2
redis==6.2.0
referencing==0.36.2
requests==2.32.4
requests-oauthlib==2.0.0
rich==14.0.0
rich-toolkit==0.14.8
rignore==0.6.2
rpds-py==0.26.0
rsa==4.9.1
sentry-sdk==2.32.0
shellingham==1.5.4
six==1.17.0
sniffio==1.3.1
SQLAlchemy==2.0.41
sqlmodel==0.0.24
starlette==0.47.1
sympy==1.14.0
tenacity==8.5.0
tokenizers==0.21.2
tqdm==4.67.1
typer==0.16.0
typing-inspection==0.4.1
typing_extensions==4.14.1
urllib3==2.5.0
uvicorn==0.35.0
uvloop==0.21.0
watchfiles==1.1.0
websocket-client==1.8.0
websockets==15.0.1
xlsxwriter==3.2.5
yarl==1.20.1
zipp==3.23.0

View file

@ -1,21 +1,21 @@
import uvicorn
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the FastAPI server")
parser.add_argument(
"--port", type=int, required=True, help="Port number to run the server on"
)
parser.add_argument(
"--reload", type=bool, default=False, help="Reload the server on code changes"
"--reload", type=str, default="false", help="Reload the server on code changes"
)
args = parser.parse_args()
reload = args.reload == "true"
uvicorn.run(
"api.main:app",
host="0.0.0.0",
port=args.port,
log_level="info",
reload=args.reload,
reload=reload,
)

View file

@ -1,6 +1,4 @@
from services.redis_service import RedisService
from services.temp_file_service import TempFileService
TEMP_FILE_SERVICE = TempFileService()
REDIS_SERVICE = RedisService()

View file

@ -8,9 +8,16 @@ from sqlalchemy.ext.asyncio import (
)
from sqlmodel import SQLModel
from models.sql.image_asset import ImageAsset
from models.sql.key_value import KeyValueSqlModel
from models.sql.ollama_pull_status import OllamaPullStatus
from models.sql.presentation import PresentationModel
from models.sql.slide import SlideModel
from models.sql.presentation_layout_code import PresentationLayoutCodeModel
from utils.get_env import get_app_data_directory_env, get_database_url_env
raw_database_url = get_database_url_env() or "sqlite:///" + os.path.join(
get_app_data_directory_env() or "/tmp/presenton", "fastapi.db"
)
@ -37,6 +44,41 @@ async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
yield session
# Container DB (Lives inside the container)
container_db_url = "sqlite+aiosqlite:////app/container.db"
container_db_engine: AsyncEngine = create_async_engine(
container_db_url, connect_args={"check_same_thread": False}
)
container_db_async_session_maker = async_sessionmaker(
container_db_engine, expire_on_commit=False
)
async def get_container_db_async_session() -> AsyncGenerator[AsyncSession, None]:
async with container_db_async_session_maker() as session:
yield session
# Create Database and Tables
async def create_db_and_tables():
async with sql_engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
await conn.run_sync(
lambda sync_conn: SQLModel.metadata.create_all(
sync_conn,
tables=[
PresentationModel.__table__,
SlideModel.__table__,
KeyValueSqlModel.__table__,
ImageAsset.__table__,
PresentationLayoutCodeModel.__table__,
],
)
)
async with container_db_engine.begin() as conn:
await conn.run_sync(
lambda sync_conn: SQLModel.metadata.create_all(
sync_conn,
tables=[OllamaPullStatus.__table__],
)
)

View file

@ -0,0 +1,27 @@
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.datamodel.base_models import InputFormat
class DoclingService:
def __init__(self):
self.pipeline_options = PdfPipelineOptions()
self.pipeline_options.do_ocr = False
self.converter = DocumentConverter(
format_options={
InputFormat.DOCX: PdfFormatOption(
pipeline_options=self.pipeline_options,
),
InputFormat.PPTX: PdfFormatOption(
pipeline_options=self.pipeline_options,
),
InputFormat.PDF: PdfFormatOption(
pipeline_options=self.pipeline_options,
),
}
)
def parse_to_markdown(self, file_path: str) -> str:
result = self.converter.convert(file_path)
return result.document.export_to_markdown()

View file

@ -1,9 +1,8 @@
import mimetypes
from fastapi import HTTPException
import os, pdfplumber, asyncio
import os, asyncio
from typing import List, Tuple
from docx import Document
from pptx import Presentation
import pdfplumber
from constants.documents import (
PDF_MIME_TYPES,
@ -11,6 +10,7 @@ from constants.documents import (
TEXT_MIME_TYPES,
WORD_TYPES,
)
from services.docling_service import DoclingService
class DocumentsLoader:
@ -18,6 +18,8 @@ class DocumentsLoader:
def __init__(self, file_paths: List[str]):
self._file_paths = file_paths
self.docling_service = DoclingService()
self._documents: List[str] = []
self._images: List[List[str]] = []
@ -76,9 +78,7 @@ class DocumentsLoader:
document: str = ""
if load_text:
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
document += await asyncio.to_thread(page.extract_text)
document = self.docling_service.parse_to_markdown(file_path)
if load_images:
image_paths = await self.get_page_images_from_pdf_async(file_path, temp_dir)
@ -90,23 +90,10 @@ class DocumentsLoader:
return await asyncio.to_thread(file.read)
def load_msword(self, file_path: str) -> str:
document = Document(file_path)
text = "\n".join([paragraph.text for paragraph in document.paragraphs])
return text
return self.docling_service.parse_to_markdown(file_path)
def load_powerpoint(self, file_path: str) -> str:
presentation = Presentation(file_path)
extracted_text = ""
for index, slide in enumerate(presentation.slides):
extracted_text += f"# Slide {index + 1}\n"
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
extracted_text += f"{paragraph.text}\n"
extracted_text += "\n"
extracted_text += "\n\n"
return extracted_text
return self.docling_service.parse_to_markdown(file_path)
def get_page_images_from_pdf(self, file_path: str, temp_dir: str):
with pdfplumber.open(file_path) as pdf:

View file

@ -11,7 +11,9 @@ class IconFinderService:
self.client = chromadb.PersistentClient(
path="chroma", settings=Settings(anonymized_telemetry=False)
)
print('Initializing icons collection...')
self._initialize_icons_collection()
print('Icons collection initialized.')
def _initialize_icons_collection(self):
self.embedding_function = ONNXMiniLM_L6_V2()

View file

@ -3,12 +3,12 @@ import os
import aiohttp
from google import genai
from google.genai.types import GenerateContentConfig
from openai import AsyncOpenAI
from models.image_prompt import ImagePrompt
from models.sql.image_asset import ImageAsset
from utils.download_helpers import download_file
from utils.get_env import get_pexels_api_key_env
from utils.get_env import get_pixabay_api_key_env
from utils.llm_provider import get_llm_client
from utils.image_provider import (
is_pixels_selected,
is_pixabay_selected,
@ -80,7 +80,7 @@ class ImageGenerationService:
return "/static/images/placeholder.jpg"
async def generate_image_openai(self, prompt: str, output_directory: str) -> str:
client = get_llm_client()
client = AsyncOpenAI()
result = await client.images.generate(
model="dall-e-3",
prompt=prompt,

View file

@ -0,0 +1,665 @@
import asyncio
import json
from typing import List, Optional
from fastapi import HTTPException
from openai import AsyncOpenAI
from google import genai
from google.genai.types import GenerateContentConfig
from anthropic import AsyncAnthropic
from anthropic.types import Message as AnthropicMessage
from anthropic import MessageStreamEvent as AnthropicMessageStreamEvent
from enums.llm_provider import LLMProvider
from models.llm_message import LLMMessage
from utils.async_iterator import iterator_to_async
from utils.get_env import (
get_anthropic_api_key_env,
get_custom_llm_api_key_env,
get_custom_llm_url_env,
get_disable_thinking_env,
get_google_api_key_env,
get_ollama_url_env,
get_openai_api_key_env,
get_tool_calls_env,
)
from utils.llm_provider import get_llm_provider
from utils.parsers import parse_bool_or_none
from utils.schema_utils import ensure_strict_json_schema
class LLMClient:
def __init__(self):
self.llm_provider = get_llm_provider()
self._client = self._get_client()
# ? Use tool calls
def use_tool_calls(self) -> bool:
if self.llm_provider != LLMProvider.CUSTOM:
return False
return parse_bool_or_none(get_tool_calls_env()) or False
# ? Disable thinking
def disable_thinking(self) -> bool:
if self.llm_provider != LLMProvider.CUSTOM:
return False
return parse_bool_or_none(get_disable_thinking_env()) or False
# ? Clients
def _get_client(self):
match self.llm_provider:
case LLMProvider.OPENAI:
return self._get_openai_client()
case LLMProvider.GOOGLE:
return self._get_google_client()
case LLMProvider.ANTHROPIC:
return self._get_anthropic_client()
case LLMProvider.OLLAMA:
return self._get_ollama_client()
case LLMProvider.CUSTOM:
return self._get_custom_client()
case _:
raise HTTPException(
status_code=400,
detail="LLM Provider must be either openai, google, anthropic, ollama, or custom",
)
def _get_openai_client(self):
if not get_openai_api_key_env():
raise HTTPException(
status_code=400,
detail="OpenAI API Key is not set",
)
return AsyncOpenAI()
def _get_google_client(self):
if not get_google_api_key_env():
raise HTTPException(
status_code=400,
detail="Google API Key is not set",
)
return genai.Client()
def _get_anthropic_client(self):
if not get_anthropic_api_key_env():
raise HTTPException(
status_code=400,
detail="Anthropic API Key is not set",
)
return AsyncAnthropic()
def _get_ollama_client(self):
return AsyncOpenAI(
base_url=(get_ollama_url_env() or "http://localhost:11434") + "/v1",
api_key="ollama",
)
def _get_custom_client(self):
if not get_custom_llm_url_env():
raise HTTPException(
status_code=400,
detail="Custom LLM URL is not set",
)
return AsyncOpenAI(
base_url=get_custom_llm_url_env(),
api_key=get_custom_llm_api_key_env() or "null",
)
# ? Prompts
def _get_system_prompt(self, messages: List[LLMMessage]) -> str:
for message in messages:
if message.role == "system":
return message.content
return ""
def _get_user_prompts(self, messages: List[LLMMessage]) -> List[str]:
return [message.content for message in messages if message.role == "user"]
def _get_user_llm_messages(self, messages: List[LLMMessage]) -> List[LLMMessage]:
return [message for message in messages if message.role == "user"]
# ? Generate Unstructured Content
async def _generate_openai(
self,
model: str,
messages: List[LLMMessage],
max_tokens: Optional[int] = None,
):
client: AsyncOpenAI = self._client
response = await client.chat.completions.create(
model=model,
messages=[message.model_dump() for message in messages],
max_completion_tokens=max_tokens,
extra_body={
"enable_thinking": not self.disable_thinking(),
},
)
return response.choices[0].message.content
async def _generate_google(
self,
model: str,
messages: List[LLMMessage],
max_tokens: Optional[int] = None,
):
client: genai.Client = self._client
response = await asyncio.to_thread(
client.models.generate_content,
model=model,
contents=self._get_user_prompts(messages),
config=GenerateContentConfig(
system_instruction=self._get_system_prompt(messages),
response_mime_type="text/plain",
max_output_tokens=max_tokens,
),
)
return response.text
async def _generate_anthropic(
self,
model: str,
messages: List[LLMMessage],
max_tokens: Optional[int] = None,
):
client: AsyncAnthropic = self._client
response: AnthropicMessage = await client.messages.create(
model=model,
system=self._get_system_prompt(messages),
messages=[
message.model_dump()
for message in self._get_user_llm_messages(messages)
],
max_tokens=max_tokens or 4000,
)
text = ""
for content in response.content:
if content.type == "text" and isinstance(content.text, str):
text += content.text
if text == "":
return None
return text
async def _generate_ollama(
self, model: str, messages: List[LLMMessage], max_tokens: Optional[int] = None
):
return await self._generate_openai(model, messages, max_tokens)
async def _generate_custom(
self, model: str, messages: List[LLMMessage], max_tokens: Optional[int] = None
):
return await self._generate_openai(model, messages, max_tokens)
async def generate(
self,
model: str,
messages: List[LLMMessage],
max_tokens: Optional[int] = None,
):
content = None
match self.llm_provider:
case LLMProvider.OPENAI:
content = await self._generate_openai(model, messages, max_tokens)
case LLMProvider.GOOGLE:
content = await self._generate_google(model, messages, max_tokens)
case LLMProvider.ANTHROPIC:
content = await self._generate_anthropic(model, messages, max_tokens)
case LLMProvider.OLLAMA:
content = await self._generate_ollama(model, messages, max_tokens)
case LLMProvider.CUSTOM:
content = await self._generate_custom(model, messages, max_tokens)
if content is None:
raise HTTPException(
status_code=400,
detail="LLM did not return any content",
)
return content
# ? Generate Structured Content
async def _generate_openai_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
strict: bool = False,
max_tokens: Optional[int] = None,
):
client: AsyncOpenAI = self._client
use_tool_calls = self.use_tool_calls()
response_schema = response_format
if strict:
response_schema = ensure_strict_json_schema(
response_schema,
path=(),
root=response_schema,
)
if not use_tool_calls:
response = await client.chat.completions.create(
model=model,
messages=[message.model_dump() for message in messages],
response_format={
"type": "json_schema",
"json_schema": (
{
"name": "ResponseSchema",
"strict": strict,
"schema": response_schema,
}
),
},
max_completion_tokens=max_tokens,
extra_body={
"enable_thinking": not self.disable_thinking(),
},
)
content = response.choices[0].message.content
else:
response = await client.chat.completions.create(
model=model,
messages=[message.model_dump() for message in messages],
tools=[
{
"type": "function",
"function": {
"name": "ResponseSchema",
"description": "A response to the user's message",
"strict": strict,
"parameters": response_format,
},
}
],
tool_choice="required",
max_completion_tokens=max_tokens,
extra_body={
"enable_thinking": not self.disable_thinking(),
},
)
tool_calls = response.choices[0].message.tool_calls
if tool_calls:
content = tool_calls[0].function.arguments
if content:
return json.loads(content)
return None
async def _generate_google_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
max_tokens: Optional[int] = None,
):
client: genai.Client = self._client
response = await asyncio.to_thread(
client.models.generate_content,
model=model,
contents=self._get_user_prompts(messages),
config=GenerateContentConfig(
system_instruction=self._get_system_prompt(messages),
response_mime_type="application/json",
response_json_schema=response_format,
max_output_tokens=max_tokens,
),
)
content = None
if response.text:
content = json.loads(response.text)
return content
async def _generate_anthropic_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
max_tokens: Optional[int] = None,
):
client: AsyncAnthropic = self._client
response: AnthropicMessage = await client.messages.create(
model=model,
system=self._get_system_prompt(messages),
messages=[
message.model_dump()
for message in self._get_user_llm_messages(messages)
],
max_tokens=max_tokens or 4000,
tools=[
{
"name": "ResponseSchema",
"description": "A response to the user's message",
"input_schema": response_format,
}
],
tool_choice={
"type": "tool",
"name": "ResponseSchema",
},
)
content: dict | None = None
for content_block in response.content:
if content_block.type == "tool_use":
content = content_block.input
return content
async def _generate_ollama_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
strict: bool = False,
max_tokens: Optional[int] = None,
):
return await self._generate_openai_structured(
model, messages, response_format, strict, max_tokens
)
async def _generate_custom_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
strict: bool = False,
max_tokens: Optional[int] = None,
):
return await self._generate_openai_structured(
model, messages, response_format, strict, max_tokens
)
async def generate_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
strict: bool = False,
max_tokens: Optional[int] = None,
) -> dict:
content = None
match self.llm_provider:
case LLMProvider.OPENAI:
content = await self._generate_openai_structured(
model, messages, response_format, strict, max_tokens
)
case LLMProvider.GOOGLE:
content = await self._generate_google_structured(
model, messages, response_format, max_tokens
)
case LLMProvider.ANTHROPIC:
content = await self._generate_anthropic_structured(
model, messages, response_format, max_tokens
)
case LLMProvider.OLLAMA:
content = await self._generate_ollama_structured(
model, messages, response_format, strict, max_tokens
)
case LLMProvider.CUSTOM:
content = await self._generate_custom_structured(
model, messages, response_format, strict, max_tokens
)
if content is None:
raise HTTPException(
status_code=400,
detail="LLM did not return any content",
)
return content
# ? Stream Unstructured Content
async def _stream_openai(
self,
model: str,
messages: List[LLMMessage],
max_tokens: Optional[int] = None,
):
client: AsyncOpenAI = self._client
async with client.chat.completions.stream(
model=model,
messages=[message.model_dump() for message in messages],
max_completion_tokens=max_tokens,
extra_body={
"enable_thinking": not self.disable_thinking(),
},
) as stream:
async for event in stream:
if event.type == "content.delta":
yield event.delta
async def _stream_google(
self,
model: str,
messages: List[LLMMessage],
max_tokens: Optional[int] = None,
):
client: genai.Client = self._client
async for event in iterator_to_async(client.models.generate_content_stream)(
model=model,
contents=self._get_user_prompts(messages),
config=GenerateContentConfig(
system_instruction=self._get_system_prompt(messages),
response_mime_type="text/plain",
max_output_tokens=max_tokens,
),
):
if event.text:
yield event.text
async def _stream_anthropic(
self,
model: str,
messages: List[LLMMessage],
max_tokens: Optional[int] = None,
):
client: AsyncAnthropic = self._client
async with client.messages.stream(
model=model,
system=self._get_system_prompt(messages),
messages=[
message.model_dump()
for message in self._get_user_llm_messages(messages)
],
max_tokens=max_tokens or 4000,
) as stream:
async for event in stream:
event: AnthropicMessageStreamEvent = event
if event.type == "text" and isinstance(event.text, str):
yield event.text
def _stream_ollama(
self,
model: str,
messages: List[LLMMessage],
max_tokens: Optional[int] = None,
):
return self._stream_openai(model, messages, max_tokens)
def _stream_custom(
self,
model: str,
messages: List[LLMMessage],
max_tokens: Optional[int] = None,
):
return self._stream_openai(model, messages, max_tokens)
def stream(
self, model: str, messages: List[LLMMessage], max_tokens: Optional[int] = None
):
match self.llm_provider:
case LLMProvider.OPENAI:
return self._stream_openai(model, messages, max_tokens)
case LLMProvider.GOOGLE:
return self._stream_google(model, messages, max_tokens)
case LLMProvider.ANTHROPIC:
return self._stream_anthropic(model, messages, max_tokens)
case LLMProvider.OLLAMA:
return self._stream_ollama(model, messages, max_tokens)
case LLMProvider.CUSTOM:
return self._stream_custom(model, messages, max_tokens)
# ? Stream Structured Content
async def _stream_openai_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
strict: bool = False,
max_tokens: Optional[int] = None,
):
client: AsyncOpenAI = self._client
use_tool_calls = self.use_tool_calls()
response_schema = response_format
if strict:
response_schema = ensure_strict_json_schema(
response_schema,
path=(),
root=response_schema,
)
if not use_tool_calls:
async with client.chat.completions.stream(
model=model,
messages=[message.model_dump() for message in messages],
max_completion_tokens=max_tokens,
response_format=(
{
"type": "json_schema",
"json_schema": {
"name": "ResponseSchema",
"strict": strict,
"schema": response_schema,
},
}
),
extra_body={
"enable_thinking": not self.disable_thinking(),
},
) as stream:
async for event in stream:
if event.type == "content.delta":
yield event.delta
else:
async with client.chat.completions.stream(
model=model,
messages=[message.model_dump() for message in messages],
max_completion_tokens=max_tokens,
tools=[
{
"type": "function",
"function": {
"name": "ResponseSchema",
"description": "A response to the user's message",
"strict": strict,
"parameters": response_format,
},
}
],
tool_choice="required",
extra_body={
"enable_thinking": not self.disable_thinking(),
},
) as stream:
async for event in stream:
if event.type == "tool_calls.function.arguments.delta":
yield event.arguments_delta
async def _stream_google_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
max_tokens: Optional[int] = None,
):
client: genai.Client = self._client
async for event in iterator_to_async(client.models.generate_content_stream)(
model=model,
contents=self._get_user_prompts(messages),
config=GenerateContentConfig(
system_instruction=self._get_system_prompt(messages),
response_mime_type="application/json",
response_json_schema=response_format,
max_output_tokens=max_tokens,
),
):
if event.text:
yield event.text
async def _stream_anthropic_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
max_tokens: Optional[int] = None,
):
client: AsyncAnthropic = self._client
async with client.messages.stream(
model=model,
system=self._get_system_prompt(messages),
messages=[
message.model_dump()
for message in self._get_user_llm_messages(messages)
],
max_tokens=max_tokens or 4000,
tools=[
{
"name": "ResponseSchema",
"description": "A response to the user's message",
"input_schema": response_format,
}
],
tool_choice={
"type": "tool",
"name": "ResponseSchema",
},
) as stream:
async for event in stream:
event: AnthropicMessageStreamEvent = event
if event.type == "input_json" and isinstance(event.partial_json, str):
yield event.partial_json
def _stream_ollama_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
strict: bool = False,
max_tokens: Optional[int] = None,
):
return self._stream_openai_structured(
model, messages, response_format, strict, max_tokens
)
def _stream_custom_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
strict: bool = False,
max_tokens: Optional[int] = None,
):
return self._stream_openai_structured(
model, messages, response_format, strict, max_tokens
)
def stream_structured(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
strict: bool = False,
max_tokens: Optional[int] = None,
):
match self.llm_provider:
case LLMProvider.OPENAI:
return self._stream_openai_structured(
model, messages, response_format, strict, max_tokens
)
case LLMProvider.GOOGLE:
return self._stream_google_structured(
model, messages, response_format, max_tokens
)
case LLMProvider.ANTHROPIC:
return self._stream_anthropic_structured(
model, messages, response_format, max_tokens
)
case LLMProvider.OLLAMA:
return self._stream_ollama_structured(
model, messages, response_format, strict, max_tokens
)
case LLMProvider.CUSTOM:
return self._stream_custom_structured(
model, messages, response_format, strict, max_tokens
)

View file

@ -0,0 +1,199 @@
import asyncio
from typing import List
import nltk
from models.document_chunk import DocumentChunk
try:
nltk.data.find("tokenizers/punkt", paths=["./nltk"])
except LookupError:
nltk.download("punkt", download_dir="./nltk")
class ScoreBasedChunker:
def extract_sentences(self, text: str, min_sentences: int) -> List[str]:
sentences = self.extract_sentences_markdown(text)
if len(sentences) < min_sentences:
sentences = self.extract_sentences_nltk(text)
if len(sentences) < min_sentences:
sentences = self.extract_sentences_by_stop_words(text)
if len(sentences) < min_sentences:
sentences = self.extract_sentences_by_new_line(text)
if len(sentences) < min_sentences:
raise ValueError(
f"Only {len(sentences)} sentences found, requested {min_sentences}"
)
return sentences
def extract_sentences_markdown(self, text: str) -> List[str]:
lines = text.split("\n")
sentences = []
for line in lines:
line = line.strip()
if line:
if line.startswith("#"):
sentences.append(line)
else:
if line.endswith((".", "!", "?")):
sentences.append(line)
else:
sentences.append(line)
return sentences
def extract_sentences_nltk(self, text: str) -> List[str]:
sentences = nltk.sent_tokenize(text)
return sentences
def extract_sentences_by_stop_words(self, text: str) -> List[str]:
sentences = []
current_sentence = ""
for char in text:
current_sentence += char
if char in ".!?":
sentences.append(current_sentence.strip())
current_sentence = ""
if current_sentence.strip():
sentences.append(current_sentence.strip())
return [s for s in sentences if s]
def extract_sentences_by_new_line(self, text: str) -> List[str]:
sentences = text.split("\n")
result = []
for i, sentence in enumerate(sentences):
if i < len(sentences) - 1:
result.append(sentence + "\n")
else:
result.append(sentence)
return result
def score_sentences_for_heading(self, sentences: List[str]) -> List[float]:
sentences_scores = []
last_heading_index = -1
first_heading_found = False
for i, sentence in enumerate(sentences):
score = 0.0
if sentence.strip().startswith("#"):
heading_level = len(sentence) - len(sentence.lstrip("#"))
if heading_level <= 3:
score += 10.0 - (heading_level - 1) * 2.0
else:
score += 4.0 - (heading_level - 4) * 0.5
if not first_heading_found:
score += 5.0
first_heading_found = True
if last_heading_index != -1:
distance = i - last_heading_index
distance_bonus = min(5.0, distance * 0.5)
score += distance_bonus
last_heading_index = i
sentences_scores.append(score)
return sentences_scores
def get_chunks(
self, sentences: List[str], sentences_scores: List[float], top_k: int = 10
) -> List[DocumentChunk]:
if not sentences_scores:
sentences_scores = self.score_sentences_for_heading(sentences)
chunks = []
heading_scores = []
for i, score in enumerate(sentences_scores):
if score > 0:
heading_scores.append((i, score))
if len(heading_scores) == 0:
return chunks
heading_scores.sort(key=lambda x: (-x[1], x[0]))
if len(heading_scores) <= top_k:
selected_headings = [idx for idx, _ in heading_scores]
selected_headings.sort()
else:
score_groups = {}
for idx, score in heading_scores:
rounded_score = round(score)
if rounded_score not in score_groups:
score_groups[rounded_score] = []
score_groups[rounded_score].append(idx)
sorted_groups = sorted(
score_groups.items(), key=lambda x: x[0], reverse=True
)
selected_headings = []
for score, headings in sorted_groups:
headings.sort()
remaining_needed = top_k - len(selected_headings)
if remaining_needed <= 0:
break
if len(headings) <= remaining_needed:
selected_headings.extend(headings)
else:
if remaining_needed == 1:
mid_idx = len(headings) // 2
selected_headings.append(headings[mid_idx])
elif remaining_needed == 2:
selected_headings.append(headings[0])
selected_headings.append(headings[-1])
else:
step = (len(headings) - 1) / (remaining_needed - 1)
for i in range(remaining_needed):
index = int(round(i * step))
if index < len(headings):
selected_headings.append(headings[index])
selected_headings.sort()
for i, heading_idx in enumerate(selected_headings):
heading = sentences[heading_idx]
if i + 1 < len(selected_headings):
next_heading_idx = selected_headings[i + 1]
content_end = next_heading_idx
else:
content_end = len(sentences)
content_sentences = sentences[heading_idx + 1 : content_end]
content = " ".join(content_sentences).strip()
chunk = DocumentChunk(
heading=heading,
content=content,
heading_index=heading_idx,
score=sentences_scores[heading_idx],
)
chunks.append(chunk)
return chunks
async def get_n_chunks(self, text: str, n: int) -> List[DocumentChunk]:
sentences = await asyncio.to_thread(self.extract_sentences, text, n)
sentences_scores = await asyncio.to_thread(
self.score_sentences_for_heading, sentences
)
chunks = await asyncio.to_thread(
self.get_chunks, sentences, sentences_scores, n
)
if len(chunks) < n:
raise ValueError(f"Only {len(chunks)} chunks found, requested {n}")
return chunks

View file

@ -9,8 +9,7 @@ class TempFileService:
def __init__(self):
self.base_dir = get_temp_directory_env() or "/tmp/presenton"
# TODO: Uncomment this when we want to cleanup the base dir on startup
# self.cleanup_base_dir()
self.cleanup_base_dir()
os.makedirs(self.base_dir, exist_ok=True)
def create_dir_in_dir(self, base_dir: str, dir_name: Optional[str] = None) -> str:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 397 KiB

After

Width:  |  Height:  |  Size: 79 KiB

View file

@ -0,0 +1,393 @@
import asyncio
import pytest
from fastmcp import FastMCP, Client
from app_mcp.tools.start_presentation import register_start_presentation
from app_mcp.tools.help_me import register_help_me
from app_mcp.tools.continue_workflow import register_continue_workflow
from app_mcp.tools.export_presentation import register_export_presentation
from app_mcp.tools.show_layouts import register_show_layouts
from app_mcp.tools.get_status import register_get_status
from app_mcp.tools.choose_layout import register_choose_layout
from app_mcp.services.state_machine.machine import PresentationStateMachine
from app_mcp.services.state_machine.context import StateContext
from unittest.mock import patch, MagicMock
@pytest.fixture
def mcp_server():
with patch("app_mcp.services.workflow_orchestrator.WorkflowOrchestrator") as MockOrchestrator:
mock_orchestrator = MockOrchestrator.return_value
mcp = FastMCP("TestServer")
#Mocking the StateContext Too
mock_context = StateContext()
mock_context.metadata = {}
mock_fsm = MagicMock(spec=PresentationStateMachine)
mock_fsm.context = mock_context
mock_orchestrator.get_session.return_value = mock_fsm
# Register all tool functions with the mocked orchestrator
register_start_presentation(mcp=mcp, orchestrator=mock_orchestrator)
register_help_me(mcp=mcp, orchestrator=mock_orchestrator)
register_continue_workflow(mcp=mcp, orchestrator=mock_orchestrator)
register_export_presentation(mcp=mcp, orchestrator=mock_orchestrator)
register_show_layouts(mcp=mcp, orchestrator=mock_orchestrator)
register_get_status(mcp=mcp, orchestrator=mock_orchestrator)
register_choose_layout(mcp=mcp, orchestrator=mock_orchestrator)
return mcp
# Grouped test classes for each tool
class TestStartPresentation:
"""
Tests for the start_presentation tool
"""
def test_success(self, mcp_server):
"""
Test successful start_presentation call with all required parameters.
Checks for correct status, session_id, and parameter values in response.
"""
async def run():
async with Client(mcp_server) as client:
params = {
"session_id": "test_session",
"prompt": "Test Presentation",
"files": None,
"n_slides": 5,
"language": "English"
}
result = await client.call_tool("start_presentation", params)
assert result.data["status"] == "success"
assert result.data["session_id"] == "test_session"
assert "message" in result.data
assert "suggestion" in result.data
assert "next_step" in result.data
assert "parameters" in result.data
assert result.data["parameters"]["n_slides"] == 5
assert result.data["parameters"]["language"] == "English"
asyncio.run(run())
def test_missing_session_id(self, mcp_server):
"""
Test start_presentation with missing session_id.
Expects error status and appropriate error message.
"""
async def run():
async with Client(mcp_server) as client:
params = {"prompt": "Test Presentation", "session_id": ""}
result = await client.call_tool("start_presentation", params)
assert result.data["status"] == "error"
assert "Session ID is required" in result.data["error"]
asyncio.run(run())
def test_missing_prompt(self, mcp_server):
"""
Test start_presentation with missing prompt.
Expects error status and appropriate error message.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "test_session", "prompt": ""}
result = await client.call_tool("start_presentation", params)
assert result.data["status"] == "error"
assert "Prompt is required" in result.data["error"]
asyncio.run(run())
def test_invalid_prompt_type(self, mcp_server):
"""
Test start_presentation with invalid prompt type (None).
Expects error status and appropriate error message.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "test_session",
"prompt": ""}
result = await client.call_tool("start_presentation", params)
assert result.data["status"] == "error"
assert "Prompt is required" in result.data["error"]
asyncio.run(run())
class TestHelp:
"""
Tests for the help tool
"""
def test_help(self, mcp_server):
"""
Test help tool with no parameters.
Checks for info status and presence of help fields in response.
"""
async def run():
async with Client(mcp_server) as client:
result = await client.call_tool("help", {})
data = result.data
assert data["status"] == "info"
assert "message" in data
assert "workflow" in data
assert "helpful_commands" in data
assert "quick_start" in data
assert "tips" in data
assert "step_1" in data["workflow"]
assert "get_status" in data["helpful_commands"]
assert isinstance(data["tips"], list)
asyncio.run(run())
class TestContinueWorkflow:
"""
Tests for the continue_workflow tool
"""
def test_success(self, mcp_server):
"""
Test continue_workflow with valid session_id.
Checks for correct status and required fields in response.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "test_session"}
result = await client.call_tool("continue_workflow", params)
data = result.data
assert "status" in data
assert data["status"] in ["success", "error", "info"]
if data["status"] == "success":
assert data["session_id"] == "test_session"
assert "next_step" in data
if data["status"] == "error":
assert "error" in data
asyncio.run(run())
def test_missing_session_id(self, mcp_server):
"""
Test continue_workflow with missing session_id.
Expects error status and appropriate error message.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": ""}
result = await client.call_tool("continue_workflow", params)
data = result.data
assert data["status"] == "error"
assert "Valid session_id is required" in data["error"]
asyncio.run(run())
class TestExportPresentation:
"""
Tests for the export_presentation tool
"""
def test_success_pptx(self, mcp_server):
"""
Test export_presentation with format 'pptx'.
Checks for success status, correct session_id, and pptx path in response.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "test_session", "format": "pptx"}
result = await client.call_tool("export_presentation", params)
data = result.data
assert "status" in data
if data["status"] == "success":
assert data["session_id"] == "test_session"
assert data["message"].endswith("PPTX!")
assert "path" in data
assert "suggestion" in data
assert "available_actions" in data
if data["status"] == "error":
assert "error" in data
asyncio.run(run())
def test_success_pdf(self, mcp_server):
"""
Test export_presentation with format 'pdf'.
Checks for success status, correct session_id, and pdf path in response.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "test_session", "format": "pdf"}
result = await client.call_tool("export_presentation", params)
data = result.data
assert "status" in data
if data["status"] == "success":
assert data["session_id"] == "test_session"
assert data["message"].endswith("PDF!")
assert "path" in data
if data["status"] == "error":
assert "error" in data
asyncio.run(run())
def test_invalid_format(self, mcp_server):
"""
Test export_presentation with invalid format (not 'pdf' or 'pptx').
Expects error status and appropriate error message.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "test_session", "format": "docx"}
result = await client.call_tool("export_presentation", params)
data = result.data
assert data["status"] == "error"
assert "Please choose either 'pdf' or 'pptx' format" in data["error"]
asyncio.run(run())
def test_missing_session_id(self, mcp_server):
"""
Test export_presentation with missing session_id.
Expects error status and session_id error in response.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "", "format": "pptx"}
result = await client.call_tool("export_presentation", params)
data = result.data
assert data["status"] == "error"
assert "session_id" in data
asyncio.run(run())
class TestShowLayouts:
"""
Tests for the show_layouts tool
"""
def test_success(self, mcp_server):
"""
Test show_layouts with valid session_id.
Checks for success status, layouts list, and suggestion in response.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "test_session"}
result = await client.call_tool("show_layouts", params)
data = result.data
assert "status" in data
if data["status"] == "success":
assert data["session_id"] == "test_session"
assert "layouts" in data
assert isinstance(
data["layouts"], list) or data["layouts"] is not None
assert "message" in data
assert "suggestion" in data
if data["status"] == "error":
assert "error" in data
asyncio.run(run())
def test_missing_session_id(self, mcp_server):
"""
Test show_layouts with missing session_id.
Expects error status and session_id error in response.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": ""}
result = await client.call_tool("show_layouts", params)
data = result.data
assert data["status"] == "error"
assert "session_id" in data
asyncio.run(run())
class TestGetStatus:
"""
Tests for the get_status tool
"""
def test_success(self, mcp_server):
"""
Test get_status with valid session_id.
Checks for success status, progress, and context in response.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "test_session"}
result = await client.call_tool("get_status", params)
data = result.data
assert "status" in data
if data["status"] == "success":
assert data["session_id"] == "test_session"
assert "current_step" in data
assert "progress" in data
assert "message" in data
assert "next_action" in data
assert "context" in data
if data["status"] == "error":
assert "error" in data
asyncio.run(run())
def test_missing_session_id(self, mcp_server):
"""
Test get_status with missing session_id.
Expects error status and appropriate error message.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": ""}
result = await client.call_tool("get_status", params)
data = result.data
assert data["status"] == "error"
assert "Valid session_id is required" in data["error"]
asyncio.run(run())
class TestChooseLayout:
"""
Tests for the choose_layout tool
"""
def test_success(self, mcp_server):
"""
Test choose_layout with valid session_id and layout_name.
Checks for success status, available actions, and suggestion in response.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "test_session",
"layout_name": "default"}
result = await client.call_tool("choose_layout", params)
data = result.data
assert "status" in data
if data["status"] == "success":
assert data["session_id"] == "test_session"
assert "message" in data
assert "suggestion" in data
assert "available_actions" in data
if data["status"] == "error":
assert "error" in data
asyncio.run(run())
def test_missing_session_id(self, mcp_server):
"""
Test choose_layout with missing session_id.
Expects error status and session_id error in response.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "", "layout_name": "default"}
result = await client.call_tool("choose_layout", params)
data = result.data
assert data["status"] == "error"
assert "session_id" in data
asyncio.run(run())
def test_missing_layout_name(self, mcp_server):
"""
Test choose_layout with missing layout_name.
Checks for error status if layout_name is required.
"""
async def run():
async with Client(mcp_server) as client:
params = {"session_id": "test_session", "layout_name": ""}
result = await client.call_tool("choose_layout", params)
data = result.data
assert "status" in data
if data["status"] == "error":
assert "error" in data
asyncio.run(run())

View file

@ -0,0 +1,21 @@
from anthropic import AsyncAnthropic
from openai import AsyncOpenAI
from google import genai
async def list_available_openai_compatible_models(url: str, api_key: str) -> list[str]:
client = AsyncOpenAI(api_key=api_key, base_url=url)
models = (await client.models.list()).data
if models:
return list(map(lambda x: x.id, models))
return []
async def list_available_anthropic_models(api_key: str) -> list[str]:
client = AsyncAnthropic(api_key=api_key)
return list(map(lambda x: x.id, (await client.models.list(limit=50)).data))
async def list_available_google_models(api_key: str) -> list[str]:
client = genai.Client(api_key=api_key)
return list(map(lambda x: x.name, client.models.list(config={"page_size": 50})))

View file

@ -1,18 +0,0 @@
from typing import Optional
from openai import AsyncOpenAI
from utils.llm_provider import get_llm_client
async def list_available_custom_models(
url: Optional[str] = None, api_key: Optional[str] = None
) -> list[str]:
if not url:
client = get_llm_client()
else:
client = AsyncOpenAI(api_key=api_key or "null", base_url=url)
models = []
async for model in client.models.list():
print(model)
models.append(model.id)
return models

View file

@ -78,3 +78,12 @@ def deep_update(original: dict, updates: dict) -> dict:
if not isinstance(value, (dict, list)):
original[key] = value
return original
def has_more_than_n_keys(obj: dict[str, object], n: int) -> bool:
i = 0
for _ in obj.keys():
i += 1
if i > n:
return True
return False

View file

@ -1,35 +1,15 @@
from typing import List, Optional
from typing import List
from pydantic import Field
from models.presentation_outline_model import (
PresentationOutlineModel,
SlideOutlineModel,
)
from models.presentation_outline_model import PresentationOutlineModel
from models.presentation_structure_model import PresentationStructureModel
class SlideOutlineModelWithValidation(SlideOutlineModel):
title: str = Field(
description="Title of the slide in about 3 to 5 words",
min_length=10,
max_length=50,
)
def get_presentation_outline_model_with_n_slides(n_slides: int):
class PresentationOutlineModelWithNSlides(PresentationOutlineModel):
title: str = Field(
description="Title of the presentation in about 3 to 8 words",
min_length=10,
max_length=50,
)
notes: Optional[List[str]] = Field(
default=None,
description="Important notes for the presentation styling and formatting",
min_length=0,
max_length=10,
)
slides: List[SlideOutlineModelWithValidation] = Field(
description="List of slides", min_items=n_slides, max_items=n_slides
slides: List[str] = Field(
description="Markdown content for each slide",
min_items=n_slides,
max_items=n_slides,
)
return PresentationOutlineModelWithNSlides

View file

@ -25,6 +25,14 @@ def get_llm_provider_env():
return os.getenv("LLM")
def get_anthropic_api_key_env():
return os.getenv("ANTHROPIC_API_KEY")
def get_anthropic_model_env():
return os.getenv("ANTHROPIC_MODEL")
def get_ollama_url_env():
return os.getenv("OLLAMA_URL")
@ -37,10 +45,18 @@ def get_openai_api_key_env():
return os.getenv("OPENAI_API_KEY")
def get_openai_model_env():
return os.getenv("OPENAI_MODEL")
def get_google_api_key_env():
return os.getenv("GOOGLE_API_KEY")
def get_google_model_env():
return os.getenv("GOOGLE_MODEL")
def get_custom_llm_api_key_env():
return os.getenv("CUSTOM_LLM_API_KEY")
@ -79,3 +95,15 @@ def get_redis_db_env():
def get_redis_password_env():
return os.getenv("REDIS_PASSWORD")
def get_tool_calls_env():
return os.getenv("TOOL_CALLS")
def get_disable_thinking_env():
return os.getenv("DISABLE_THINKING")
def get_extended_reasoning_env():
return os.getenv("EXTENDED_REASONING")

View file

@ -1,15 +1,8 @@
import asyncio
import json
from models.llm_message import LLMMessage
from models.presentation_layout import SlideLayoutModel
from models.sql.slide import SlideModel
from google.genai.types import GenerateContentConfig
from utils.llm_provider import (
get_google_llm_client,
get_large_model,
get_llm_client,
is_google_selected,
)
from services.llm_client import LLMClient
from utils.llm_provider import get_model
from utils.schema_utils import remove_fields_from_schema
system_prompt = """
@ -42,64 +35,40 @@ def get_user_prompt(prompt: str, slide_data: dict, language: str):
"""
def get_prompt_to_edit_slide_content(
def get_messages(
prompt: str,
slide_data: dict,
language: str,
):
return [
{
"role": "system",
"content": system_prompt,
},
{
"role": "user",
"content": get_user_prompt(prompt, slide_data, language),
},
LLMMessage(
role="system",
content=system_prompt,
),
LLMMessage(
role="user",
content=get_user_prompt(prompt, slide_data, language),
),
]
async def get_edited_slide_content(
prompt: str,
slide_layout: SlideLayoutModel,
slide: SlideModel,
language: str,
slide_layout: SlideLayoutModel,
):
model = get_large_model()
model = get_model()
response_schema = remove_fields_from_schema(
slide_layout.json_schema, ["__image_url__", "__icon_url__"]
)
if is_google_selected():
client = get_google_llm_client()
response = await asyncio.to_thread(
client.models.generate_content,
model=model,
contents=[get_user_prompt(prompt, slide.content, language)],
config=GenerateContentConfig(
system_instruction=system_prompt,
response_mime_type="application/json",
response_json_schema=response_schema,
),
)
slide_content_json = json.loads(response.text)
else:
client = get_llm_client()
response = await client.beta.chat.completions.parse(
model=model,
messages=get_prompt_to_edit_slide_content(
prompt,
slide.content,
language,
),
response_format={
"type": "json_schema",
"json_schema": {
"name": "slide_content",
"schema": response_schema,
},
},
)
slide_content_json = json.loads(response.choices[0].message.content)
return slide_content_json
client = LLMClient()
response = await client.generate_structured(
model=model,
messages=get_messages(prompt, slide.content, language),
response_format=response_schema,
strict=False,
)
return response

View file

@ -1,12 +1,7 @@
import asyncio
from typing import Optional
from google.genai.types import GenerateContentConfig
from utils.llm_provider import (
get_google_llm_client,
get_large_model,
is_google_selected,
get_llm_client,
)
from models.llm_message import LLMMessage
from services.llm_client import LLMClient
from utils.llm_provider import get_model
system_prompt = """
You are an expert HTML slide editor. Your task is to modify slide HTML content based on user prompts while maintaining proper structure, styling, and functionality.
@ -52,35 +47,17 @@ def get_user_prompt(prompt: str, html: str):
async def get_edited_slide_html(prompt: str, html: str):
model = get_large_model()
llm_response = None
if is_google_selected():
client = get_google_llm_client()
response = await asyncio.to_thread(
client.models.generate_content,
model=model,
contents=[get_user_prompt(prompt, html)],
config=GenerateContentConfig(
system_instruction=system_prompt,
response_mime_type="text/plain",
),
)
llm_response = response.text
else:
client = get_llm_client()
response = await client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": get_user_prompt(prompt, html)},
],
)
llm_response = response.choices[0].message.content
model = get_model()
if not llm_response:
return html
return extract_html_from_response(llm_response) or html
client = LLMClient()
response = await client.generate(
model=model,
messages=[
LLMMessage(role="system", content=system_prompt),
LLMMessage(role="user", content=get_user_prompt(prompt, html)),
],
)
return extract_html_from_response(response) or html
def extract_html_from_response(response_text: str) -> Optional[str]:

View file

@ -1,45 +0,0 @@
import asyncio
from typing import List
from openai.types.chat.chat_completion import ChatCompletion
from utils.llm_provider import get_llm_client, get_nano_model
sysmte_prompt = """
Generate a blog-style summary of the provided document in **more than 2000 words**.
Maintain as much information as possible.
### Output Format
- Provide the summary in a **blog format** with an **engaging introduction** and a **clear structure**.
- Ensure the **logical flow** of the document is preserved.
### Notes
- **Retain the main ideas and essential details** from the document.
- **Show line-breaks** clearly.
- If **slides structure is mentioned** in document, structure the summary in the same way.
"""
async def generate_document_summary(documents: List[str]):
client = get_llm_client()
model = get_nano_model()
coroutines = []
for document in documents:
truncated_text = document[:200000]
coroutine = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": sysmte_prompt},
{"role": "user", "content": truncated_text},
],
)
coroutines.append(coroutine)
completions: List[ChatCompletion] = await asyncio.gather(*coroutines)
combined = "\n\n\n\n".join(
[completion.choices[0].message.content for completion in completions]
)
return combined

View file

@ -1,54 +1,19 @@
import asyncio
from typing import Optional
from google.genai.types import GenerateContentConfig
from openai.types.chat.chat_completion_chunk import ChoiceDelta
from utils.async_iterator import iterator_to_async
from models.llm_message import LLMMessage
from services.llm_client import LLMClient
from utils.get_dynamic_models import get_presentation_outline_model_with_n_slides
from utils.llm_provider import (
get_google_llm_client,
get_large_model,
get_llm_client,
is_google_selected,
)
from pydantic import BaseModel
from utils.llm_provider import get_model
system_prompt = """
You are an expert presentation creator. Generate structured presentations based on user requirements and format them according to the specified JSON schema with markdown content.
You are an expert presentation creator. Generate structured presentations based on user requirements and format them according to the specified JSON schema with markdown content.
## Core Requirements
### Input Processing
1. **Extract key information** from the user's prompt:
- Main topic/subject matter
- Required number of slides
- Target language for output
- Specific content requirements or focus areas
- Target audience (if specified)
- Presentation style or tone preferences
## Content Generation Guidelines
### Presentation Title
- Create a **concise, descriptive title** that captures the essence of the topic
- Use **plain text format** (no markdown formatting)
- Make it **engaging and professional**
- Ensure it reflects the main theme and target audience
### Slide Titles
- Generate **clear, specific titles** for each slide
- Use **plain text format** (no markdown, no "Slide 1", "Slide 2" prefixes)
- Make each title **descriptive and informative**
- Ensure titles create a **logical flow** through the presentation
- Keep titles **concise but meaningful**
## Special Considerations
### Slide Count Compliance
- Generate **exactly** the number of slides requested
- Distribute content **evenly** across slides
- Create **balanced information flow**
- Provide content for each slide in markdown format.
- Make sure that flow of the presentation is logical and consistent.
- Place greater emphasis on numerical data.
- If Additional Information is provided, divide it into slides.
- Make sure that content follows language guidelines.
"""
@ -62,61 +27,34 @@ def get_user_prompt(prompt: str, n_slides: int, language: str, content: str):
"""
def get_prompt_template(prompt: str, n_slides: int, language: str, content: str):
def get_messages(prompt: str, n_slides: int, language: str, content: str):
return [
{
"role": "system",
"content": system_prompt,
},
{
"role": "user",
"content": get_user_prompt(prompt, n_slides, language, content),
},
LLMMessage(
role="system",
content=system_prompt,
),
LLMMessage(
role="user",
content=get_user_prompt(prompt, n_slides, language, content),
),
]
def get_response_format(response_model: BaseModel):
return {
"type": "json_schema",
"json_schema": {
"name": "PresentationOutlineModel",
"schema": response_model.model_json_schema(),
},
}
async def generate_ppt_outline(
prompt: Optional[str],
n_slides: int,
language: Optional[str] = None,
content: Optional[str] = None,
):
model = get_large_model()
model = get_model()
response_model = get_presentation_outline_model_with_n_slides(n_slides)
if not is_google_selected():
client = get_llm_client()
async for response in await client.chat.completions.create(
model=model,
messages=get_prompt_template(prompt, n_slides, language, content),
stream=True,
response_format=get_response_format(response_model),
):
delta: ChoiceDelta = response.choices[0].delta
if delta.content:
yield delta.content
client = LLMClient()
else:
client = get_google_llm_client()
generate_stream = iterator_to_async(client.models.generate_content_stream)
async for event in generate_stream(
model=model,
contents=[get_user_prompt(prompt, n_slides, language, content)],
config=GenerateContentConfig(
system_instruction=system_prompt,
response_mime_type="application/json",
response_json_schema=response_model.model_json_schema(),
),
):
if event.text:
yield event.text
async for chunk in client.stream_structured(
model,
get_messages(prompt, n_slides, language, content),
response_model.model_json_schema(),
strict=True,
):
yield chunk

View file

@ -1,24 +1,19 @@
from models.llm_message import LLMMessage
from models.presentation_layout import PresentationLayoutModel
from models.presentation_outline_model import PresentationOutlineModel
from utils.llm_provider import (
get_large_model,
get_llm_client,
get_nano_model,
get_small_model,
)
from utils.get_dynamic_models import (
get_presentation_structure_model_with_n_slides,
)
from models.presentation_structure_model import (
PresentationStructureModel,
)
from services.llm_client import LLMClient
from utils.llm_provider import get_model
from utils.get_dynamic_models import get_presentation_structure_model_with_n_slides
from models.presentation_structure_model import PresentationStructureModel
def get_prompt(presentation_layout: PresentationLayoutModel, n_slides: int, data: str):
def get_messages(
presentation_layout: PresentationLayoutModel, n_slides: int, data: str
):
return [
{
"role": "system",
"content": f"""
LLMMessage(
role="system",
content=f"""
You're a professional presentation designer with creative freedom to design engaging presentations.
{presentation_layout.to_string()}
@ -51,13 +46,13 @@ def get_prompt(presentation_layout: PresentationLayoutModel, n_slides: int, data
Select layout index for each of the {n_slides} slides based on what will best serve the presentation's goals.
""",
},
{
"role": "user",
"content": f"""
),
LLMMessage(
role="user",
content=f"""
{data}
""",
},
),
]
@ -66,20 +61,20 @@ async def generate_presentation_structure(
presentation_layout: PresentationLayoutModel,
) -> PresentationStructureModel:
client = get_llm_client()
model = get_large_model()
client = LLMClient()
model = get_model()
response_model = get_presentation_structure_model_with_n_slides(
len(presentation_outline.slides)
)
response = await client.beta.chat.completions.parse(
response = await client.generate_structured(
model=model,
messages=get_prompt(
messages=get_messages(
presentation_layout,
len(presentation_outline.slides),
presentation_outline.to_string(),
),
response_format=response_model,
response_format=response_model.model_json_schema(),
strict=True,
)
print(response.choices[0].message.parsed)
return response.choices[0].message.parsed
return PresentationStructureModel(**response)

Some files were not shown because too many files have changed in this diff Show more