Merge branch 'main' into feat/docker-release-electron-sync

This commit is contained in:
Sudip Parajuli 2026-04-20 20:56:20 +05:45 committed by GitHub
commit 7f4d6acdd8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 1056 additions and 97 deletions

View file

@ -43,6 +43,8 @@ services:
- MEM0_EMBEDDING_DIMS=${MEM0_EMBEDDING_DIMS:-384}
- LITEPARSE_DPI=${LITEPARSE_DPI:-120}
- LITEPARSE_NUM_WORKERS=${LITEPARSE_NUM_WORKERS:-1}
- OPEN_WEBUI_IMAGE_URL=${OPEN_WEBUI_IMAGE_URL}
- OPEN_WEBUI_IMAGE_API_KEY=${OPEN_WEBUI_IMAGE_API_KEY}
production-gpu:
# image: ghcr.io/presenton/presenton:latest
@ -96,6 +98,9 @@ services:
- LITEPARSE_DPI=${LITEPARSE_DPI:-120}
- LITEPARSE_NUM_WORKERS=${LITEPARSE_NUM_WORKERS:-1}
- OPEN_WEBUI_IMAGE_URL=${OPEN_WEBUI_IMAGE_URL}
- OPEN_WEBUI_IMAGE_API_KEY=${OPEN_WEBUI_IMAGE_API_KEY}
development:
build:
context: .
@ -143,6 +148,8 @@ services:
- MEM0_EMBEDDING_DIMS=${MEM0_EMBEDDING_DIMS:-384}
- LITEPARSE_DPI=${LITEPARSE_DPI:-120}
- LITEPARSE_NUM_WORKERS=${LITEPARSE_NUM_WORKERS:-1}
- OPEN_WEBUI_IMAGE_URL=${OPEN_WEBUI_IMAGE_URL}
- OPEN_WEBUI_IMAGE_API_KEY=${OPEN_WEBUI_IMAGE_API_KEY}
development-gpu:
build:
@ -201,3 +208,5 @@ services:
volumes:
presenton_root_node_modules:
presenton_document_extraction_liteparse:
- OPEN_WEBUI_IMAGE_URL=${OPEN_WEBUI_IMAGE_URL}
- OPEN_WEBUI_IMAGE_API_KEY=${OPEN_WEBUI_IMAGE_API_KEY}

View file

@ -63,6 +63,7 @@ export function setupExportHandlers() {
const exportTaskProcess = spawn(process.execPath, [exportScriptPath, exportTaskPath], {
stdio: ["ignore", "pipe", "pipe"],
cwd: baseDir,
windowsHide: process.platform === "win32",
env: {
...process.env,
ELECTRON_RUN_AS_NODE: "1",

View file

@ -173,7 +173,7 @@ export function getImageMagickBinaryPath(): string {
export function getImageMagickDownloadUrl(): string {
if (process.platform === "win32") {
return "https://imagemagick.org/archive/binaries/ImageMagick-7.1.2-18-Q16-HDRI-x64-dll.exe";
return "https://github.com/ImageMagick/ImageMagick/releases/download/7.1.2-18/ImageMagick-7.1.2-18-Q16-HDRI-x64-dll.exe";
}
if (process.platform === "darwin") {
return "https://brew.sh/";

View file

@ -1,12 +1,12 @@
{
"name": "presenton",
"version": "0.7.2-beta",
"version": "0.7.3-beta",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "presenton",
"version": "0.7.2-beta",
"version": "0.7.3-beta",
"hasInstallScript": true,
"dependencies": {
"@llamaindex/liteparse": "^1.4.0",

View file

@ -1,8 +1,8 @@
{
"name": "presenton",
"productName": "Presenton Open Source",
"version": "0.7.2-beta",
"exportVersion": "v0.2.0",
"version": "0.7.3-beta",
"exportVersion": "v0.2.2",
"main": "app_dist/main.js",
"description": "Open-Source AI Presentation Generator",
"homepage": "https://presenton.ai",

View file

@ -4,7 +4,7 @@ import os
from fastapi import FastAPI
from migrations import migrate_database_on_startup
from services.database import create_db_and_tables
from services.database import create_db_and_tables, dispose_engines
from utils.get_env import get_app_data_directory_env
from utils.model_availability import (
check_llm_and_image_provider_api_or_model_availability,
@ -24,3 +24,5 @@ async def app_lifespan(_: FastAPI):
await create_db_and_tables()
await check_llm_and_image_provider_api_or_model_availability()
yield
# Shutdown: release all database connections to prevent stale/leaked pools.
await dispose_engines()

View file

@ -1,4 +1,4 @@
from typing import Any, Callable, Coroutine, Optional
from typing import Any, Callable, Coroutine
from pydantic import BaseModel, Field

View file

@ -17,6 +17,7 @@ dependencies = [
"google-genai>=1.28.0",
# Platform-specific: greenlet for macOS only (critical for SQLAlchemy async)
"greenlet>=3.0.0; sys_platform == 'darwin'",
"jsonschema>=4.25.0",
"nltk>=3.9.1",
"openai>=1.98.0",
"pathvalidate>=3.3.1",

View file

@ -20,14 +20,20 @@ from models.sql.template import TemplateModel
from models.sql.template_create_info import TemplateCreateInfoModel
from models.sql.slide import SlideModel
from models.sql.webhook_subscription import WebhookSubscription
from utils.db_utils import get_database_url_and_connect_args
from utils.db_utils import get_database_url_and_connect_args, get_pool_kwargs
from utils.get_env import get_app_data_directory_env
from utils.get_env import get_migrate_database_on_startup_env
database_url, connect_args = get_database_url_and_connect_args()
sql_engine: AsyncEngine = create_async_engine(database_url, connect_args=connect_args)
# Apply connection-pool settings for server-class databases (PostgreSQL, MySQL).
# SQLite uses a file-lock model and ignores pool configuration, so we skip it.
_pool_kwargs = get_pool_kwargs() if "sqlite" not in database_url else {}
sql_engine: AsyncEngine = create_async_engine(
database_url, connect_args=connect_args, **_pool_kwargs
)
async_session_maker = async_sessionmaker(sql_engine, expire_on_commit=False)
@ -81,3 +87,14 @@ async def create_db_and_tables():
tables=[OllamaPullStatus.__table__],
)
)
async def dispose_engines():
"""Dispose all engine connection pools.
Call this during application shutdown (e.g. in a FastAPI ``shutdown``
event or lifespan context) to release every connection back to the
database and prevent stale / leaked connections.
"""
await sql_engine.dispose()
await container_db_engine.dispose()

View file

@ -1,6 +1,7 @@
import asyncio
import dirtyjson
import json
import logging
from typing import AsyncGenerator, List, Optional, Dict, Any
from fastapi import HTTPException
from openai import APIStatusError, AsyncOpenAI, OpenAIError
@ -69,11 +70,15 @@ from utils.schema_utils import (
ensure_array_schemas_have_items,
ensure_strict_json_schema,
flatten_json_schema,
get_schema_validation_errors,
remove_titles_from_schema,
)
LOGGER = logging.getLogger(__name__)
class LLMClient:
def __init__(self):
self.llm_provider = get_llm_provider()
@ -95,6 +100,59 @@ class LLMClient:
return False
return parse_bool_or_none(get_web_grounding_env()) or False
def web_search_enabled_for_request(self, web_search: bool) -> bool:
"""Attach SearchWebTool only when the user enabled web search for this request.
Controlled solely by the presentation ``web_search`` flag (Advanced settings).
Legacy ``WEB_GROUNDING`` / settings toggles are not consulted here so a saved
false there cannot disable per-deck web search.
"""
if not web_search:
return False
if self.llm_provider in (
LLMProvider.OLLAMA,
LLMProvider.CUSTOM,
LLMProvider.CODEX,
):
return False
return True
def outline_uses_prefetched_web_facts(self, web_search: bool) -> bool:
"""Chat Completions + json_schema rarely invoke custom function tools.
For OpenAI we can prefetch via the Responses API (``web_search_preview``)
and attach the result as context so Advanced settings **Web search** still
grounds outlines without relying on ``SearchWebTool`` in the same call.
"""
if not self.web_search_enabled_for_request(web_search):
return False
return self.llm_provider == LLMProvider.OPENAI
async def prefetch_outline_web_facts(
self,
content: str,
additional_context: Optional[str] = None,
) -> Optional[str]:
if self.llm_provider not in (LLMProvider.OPENAI, LLMProvider.CODEX):
return None
parts = [(content or "").strip(), (additional_context or "").strip()]
topic = "\n\n".join(p for p in parts if p)
if not topic:
topic = "general presentation topic"
topic = topic[:12000]
query = (
"Search the web and summarize the most relevant current facts, statistics, "
"and notable recent developments for this presentation topic. Use concise "
"bullet points; include approximate dates or time ranges when known.\n\n"
f"Topic:\n{topic}"
)
try:
text = await self._search_openai(query)
out = (text or "").strip()
return out or None
except Exception:
return None
# ? Disable thinking
def disable_thinking(self) -> bool:
return parse_bool_or_none(get_disable_thinking_env()) or False
@ -1067,6 +1125,101 @@ class LLMClient:
depth=depth,
)
async def _generate_structured_once(
self,
model: str,
messages: List[LLMMessage],
response_format: dict,
strict: bool = False,
tools: Optional[List[dict]] = None,
max_tokens: Optional[int] = None,
) -> dict | None:
match self.llm_provider:
case LLMProvider.OPENAI:
return await self._generate_openai_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=tools,
max_tokens=max_tokens,
)
case LLMProvider.CODEX:
return await self._generate_codex_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=tools,
max_tokens=max_tokens,
)
case LLMProvider.GOOGLE:
return await self._generate_google_structured(
model=model,
messages=messages,
response_format=response_format,
tools=tools,
max_tokens=max_tokens,
)
case LLMProvider.ANTHROPIC:
return await self._generate_anthropic_structured(
model=model,
messages=messages,
response_format=response_format,
tools=tools,
max_tokens=max_tokens,
)
case LLMProvider.OLLAMA:
return await self._generate_ollama_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
case LLMProvider.CUSTOM:
return await self._generate_custom_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
def _get_structured_validation_feedback_message(
self,
content: dict,
validation_errors: List[str],
) -> LLMUserMessage:
max_error_count = 10
max_json_chars = 6000
formatted_errors = validation_errors[:max_error_count]
if len(validation_errors) > max_error_count:
formatted_errors.append(
f"...and {len(validation_errors) - max_error_count} more validation errors."
)
previous_response = json.dumps(
content,
ensure_ascii=False,
indent=2,
default=str,
)
if len(previous_response) > max_json_chars:
previous_response = previous_response[:max_json_chars] + "\n... (truncated)"
return LLMUserMessage(
content=(
"The previous JSON response did not match the required response schema.\n\n"
"Validation errors:\n"
+ "\n".join(f"- {error}" for error in formatted_errors)
+ "\n\nPrevious invalid JSON:\n"
+ f"```json\n{previous_response}\n```\n\n"
+ "Return corrected JSON only. Make sure it fully matches the required schema."
)
)
async def generate_structured(
self,
model: str,
@ -1075,68 +1228,69 @@ class LLMClient:
strict: bool = False,
tools: Optional[List[type[LLMTool] | LLMDynamicTool]] = None,
max_tokens: Optional[int] = None,
validate_schema: bool = False,
validate_schema_max_loop_count: int = 5,
) -> dict:
parsed_tools = self.tool_calls_handler.parse_tools(tools)
max_validation_loops = max(1, validate_schema_max_loop_count)
working_messages = [*messages]
for attempt in range(3):
for validation_attempt in range(max_validation_loops):
content = None
match self.llm_provider:
case LLMProvider.OPENAI:
content = await self._generate_openai_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.CODEX:
content = await self._generate_codex_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.GOOGLE:
content = await self._generate_google_structured(
model=model,
messages=messages,
response_format=response_format,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.ANTHROPIC:
content = await self._generate_anthropic_structured(
model=model,
messages=messages,
response_format=response_format,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.OLLAMA:
content = await self._generate_ollama_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
case LLMProvider.CUSTOM:
content = await self._generate_custom_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
for attempt in range(3):
content = await self._generate_structured_once(
model=model,
messages=working_messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
if content is not None:
if content is not None:
break
if attempt < 2:
await asyncio.sleep(0.5 * (attempt + 1))
if content is None:
raise HTTPException(
status_code=400,
detail="LLM did not return any content",
)
if not validate_schema:
return content
if attempt < 2:
await asyncio.sleep(0.5 * (attempt + 1))
validation_errors = get_schema_validation_errors(
response_format,
content,
strict=strict,
)
if not validation_errors:
return content
formatted_validation_errors = " | ".join(validation_errors)
if validation_attempt == max_validation_loops - 1:
LOGGER.warning(
"Validation error after max fixes, returning last response: %s",
formatted_validation_errors,
)
return content
LOGGER.warning(
"Validation error, attempting fix %s/%s: %s",
validation_attempt + 1,
max_validation_loops - 1,
formatted_validation_errors,
)
working_messages.append(
self._get_structured_validation_feedback_message(
content,
validation_errors,
)
)
raise HTTPException(
status_code=400,
@ -1652,7 +1806,7 @@ class LLMClient:
current_arguments = None
has_response_schema_tool_call = False
async for event in await client.chat.completions.create(
completion_kwargs: Dict[str, Any] = dict(
model=model,
messages=[message.model_dump() for message in messages],
max_completion_tokens=max_tokens,
@ -1673,7 +1827,11 @@ class LLMClient:
),
extra_body=extra_body,
stream=True,
):
)
if all_tools:
completion_kwargs["tool_choice"] = "auto"
completion_kwargs["parallel_tool_calls"] = True
async for event in await client.chat.completions.create(**completion_kwargs):
event: OpenAIChatCompletionChunk = event
if not event.choices:
continue
@ -1754,8 +1912,6 @@ class LLMClient:
):
yield event
async def _stream_codex_structured(
self,
model: str,

View file

@ -55,7 +55,7 @@ class LLMToolCallsHandler:
self.dynamic_tools.append(tool)
match self.client.llm_provider:
case LLMProvider.OPENAI | LLMProvider.OLLAMA | LLMProvider.CUSTOM:
case LLMProvider.OPENAI | LLMProvider.OLLAMA | LLMProvider.CUSTOM | LLMProvider.CODEX:
return self.parse_tool_openai(tool, strict)
case LLMProvider.ANTHROPIC:
return self.parse_tool_anthropic(tool)
@ -63,7 +63,7 @@ class LLMToolCallsHandler:
return self.parse_tool_google(tool)
case _:
raise ValueError(
f"LLM provider must be either openai, anthropic, or google"
"LLM provider must be one of: openai, anthropic, google, codex, ollama, custom"
)
def parse_tool_openai(
@ -181,7 +181,7 @@ class LLMToolCallsHandler:
# Search web tool call handler
async def search_web_tool_call_handler(self, arguments: str) -> str:
match self.client.llm_provider:
case LLMProvider.OPENAI:
case LLMProvider.OPENAI | LLMProvider.CODEX:
return await self.search_web_tool_call_handler_openai(arguments)
case LLMProvider.ANTHROPIC:
return await self.search_web_tool_call_handler_anthropic(arguments)

View file

@ -0,0 +1,338 @@
import asyncio
import uuid
from types import SimpleNamespace
from unittest.mock import AsyncMock, patch
from enums.llm_provider import LLMProvider
from models.llm_message import LLMUserMessage
from models.presentation_outline_model import PresentationOutlineModel, SlideOutlineModel
from models.sql.slide import SlideModel
from services.llm_client import LLMClient
from templates.presentation_layout import PresentationLayoutModel, SlideLayoutModel
from utils.llm_calls.edit_slide import get_edited_slide_content
from utils.llm_calls.generate_presentation_structure import (
generate_presentation_structure,
)
from utils.llm_calls.generate_slide_content import get_slide_content_from_type_and_outline
from utils.llm_calls.select_slide_type_on_edit import get_slide_layout_from_prompt
def _build_client() -> LLMClient:
client = object.__new__(LLMClient)
client.llm_provider = LLMProvider.OPENAI
client.tool_calls_handler = SimpleNamespace(parse_tools=lambda tools: None)
return client
def _build_layout() -> PresentationLayoutModel:
return PresentationLayoutModel(
name="Test Layout",
slides=[
SlideLayoutModel(
id="layout-1",
name="Title Slide",
description="Single title layout",
json_schema={
"type": "object",
"properties": {
"title": {"type": "string"},
},
"required": ["title"],
"additionalProperties": False,
},
)
],
)
def _build_slide() -> SlideModel:
return SlideModel(
presentation=uuid.uuid4(),
layout_group="default",
layout="layout-1",
index=0,
content={"title": "Current title"},
)
def test_generate_structured_skips_validation_when_disabled():
client = _build_client()
call_messages = []
async def fake_generate(**kwargs):
call_messages.append(kwargs["messages"])
return {"title": 123}
client._generate_structured_once = AsyncMock(side_effect=fake_generate)
response = asyncio.run(
client.generate_structured(
model="test-model",
messages=[LLMUserMessage(content="Generate JSON")],
response_format={
"type": "object",
"properties": {"title": {"type": "string"}},
"required": ["title"],
"additionalProperties": False,
},
validate_schema=False,
)
)
assert response == {"title": 123}
assert len(call_messages) == 1
assert len(call_messages[0]) == 1
def test_generate_structured_retries_with_validation_feedback():
client = _build_client()
call_messages = []
responses = [
{"title": 123},
{"title": "Valid title"},
]
async def fake_generate(**kwargs):
call_messages.append(kwargs["messages"])
return responses[len(call_messages) - 1]
client._generate_structured_once = AsyncMock(side_effect=fake_generate)
with patch("services.llm_client.LOGGER.warning") as mock_warning:
response = asyncio.run(
client.generate_structured(
model="test-model",
messages=[LLMUserMessage(content="Generate JSON")],
response_format={
"type": "object",
"properties": {"title": {"type": "string"}},
"required": ["title"],
"additionalProperties": False,
},
validate_schema=True,
)
)
assert response == {"title": "Valid title"}
assert len(call_messages) == 2
feedback_message = call_messages[1][-1]
assert isinstance(feedback_message, LLMUserMessage)
assert "Validation errors:" in feedback_message.content
assert "$.title" in feedback_message.content
assert '"title": 123' in feedback_message.content
mock_warning.assert_called_once()
assert "$.title" in mock_warning.call_args.args[3]
def test_generate_structured_returns_last_invalid_response_at_max_loop_count():
client = _build_client()
call_messages = []
responses = [
{"title": 123},
{"title": False},
{"title": "should not be used"},
]
async def fake_generate(**kwargs):
call_messages.append(kwargs["messages"])
return responses[len(call_messages) - 1]
client._generate_structured_once = AsyncMock(side_effect=fake_generate)
response = asyncio.run(
client.generate_structured(
model="test-model",
messages=[LLMUserMessage(content="Generate JSON")],
response_format={
"type": "object",
"properties": {"title": {"type": "string"}},
"required": ["title"],
"additionalProperties": False,
},
validate_schema=True,
validate_schema_max_loop_count=2,
)
)
assert response == {"title": False}
assert len(call_messages) == 2
def test_generate_structured_uses_strict_schema_for_validation():
client = _build_client()
call_messages = []
responses = [
{"title": "Only title"},
{"title": "Valid title", "subtitle": "Valid subtitle"},
]
async def fake_generate(**kwargs):
call_messages.append(kwargs["messages"])
return responses[len(call_messages) - 1]
client._generate_structured_once = AsyncMock(side_effect=fake_generate)
response = asyncio.run(
client.generate_structured(
model="test-model",
messages=[LLMUserMessage(content="Generate JSON")],
response_format={
"type": "object",
"properties": {
"title": {"type": "string"},
"subtitle": {"type": "string"},
},
},
strict=True,
validate_schema=True,
)
)
assert response == {"title": "Valid title", "subtitle": "Valid subtitle"}
assert len(call_messages) == 2
feedback_message = call_messages[1][-1]
assert "required property" in feedback_message.content
assert "subtitle" in feedback_message.content
def test_generate_structured_preserves_no_content_retries():
client = _build_client()
client._generate_structured_once = AsyncMock(
side_effect=[None, None, {"title": "Valid title"}]
)
response = asyncio.run(
client.generate_structured(
model="test-model",
messages=[LLMUserMessage(content="Generate JSON")],
response_format={
"type": "object",
"properties": {"title": {"type": "string"}},
"required": ["title"],
"additionalProperties": False,
},
)
)
assert response == {"title": "Valid title"}
assert client._generate_structured_once.await_count == 3
def test_edit_slide_enables_schema_validation():
mock_client = SimpleNamespace(
generate_structured=AsyncMock(
return_value={
"title": "Edited title",
"__speaker_note__": "x" * 120,
}
)
)
with patch("utils.llm_calls.edit_slide.LLMClient", return_value=mock_client), patch(
"utils.llm_calls.edit_slide.get_model",
return_value="test-model",
):
response = asyncio.run(
get_edited_slide_content(
prompt="Update the title",
slide=_build_slide(),
language="English",
slide_layout=_build_layout().slides[0],
)
)
assert response["title"] == "Edited title"
assert mock_client.generate_structured.await_args.kwargs["validate_schema"] is True
def test_generate_presentation_structure_enables_schema_validation():
mock_client = SimpleNamespace(
generate_structured=AsyncMock(return_value={"slides": [0]})
)
mock_response_model = SimpleNamespace(
model_json_schema=lambda: {
"type": "object",
"properties": {
"slides": {
"type": "array",
"items": {"type": "integer"},
}
},
"required": ["slides"],
"additionalProperties": False,
}
)
with patch(
"utils.llm_calls.generate_presentation_structure.LLMClient",
return_value=mock_client,
), patch(
"utils.llm_calls.generate_presentation_structure.get_model",
return_value="test-model",
), patch(
"utils.llm_calls.generate_presentation_structure.get_presentation_structure_model_with_n_slides",
return_value=mock_response_model,
):
response = asyncio.run(
generate_presentation_structure(
presentation_outline=PresentationOutlineModel(
slides=[SlideOutlineModel(content="Outline content")]
),
presentation_layout=_build_layout(),
)
)
assert response.slides == [0]
assert mock_client.generate_structured.await_args.kwargs["validate_schema"] is True
def test_generate_slide_content_enables_schema_validation():
mock_client = SimpleNamespace(
generate_structured=AsyncMock(
return_value={
"title": "Slide title",
"__speaker_note__": "x" * 120,
}
)
)
with patch(
"utils.llm_calls.generate_slide_content.LLMClient",
return_value=mock_client,
), patch(
"utils.llm_calls.generate_slide_content.get_model",
return_value="test-model",
):
response = asyncio.run(
get_slide_content_from_type_and_outline(
slide_layout=_build_layout().slides[0],
outline=SlideOutlineModel(content="Slide outline"),
language="English",
)
)
assert response["title"] == "Slide title"
assert mock_client.generate_structured.await_args.kwargs["validate_schema"] is True
def test_select_slide_type_on_edit_enables_schema_validation():
mock_client = SimpleNamespace(generate_structured=AsyncMock(return_value={"index": 0}))
layout = _build_layout()
with patch(
"utils.llm_calls.select_slide_type_on_edit.LLMClient",
return_value=mock_client,
), patch(
"utils.llm_calls.select_slide_type_on_edit.get_model",
return_value="test-model",
):
response = asyncio.run(
get_slide_layout_from_prompt(
prompt="Use the first layout",
layout=layout,
slide=_build_slide(),
)
)
assert response.id == "layout-1"
assert mock_client.generate_structured.await_args.kwargs["validate_schema"] is True

View file

@ -4,6 +4,40 @@ from urllib.parse import urlsplit, urlunsplit, parse_qsl
import ssl
def _int_env(name: str, default: int) -> int:
"""Read an integer from an environment variable, falling back to *default*."""
raw = os.getenv(name)
if raw is None:
return default
try:
return int(raw)
except ValueError:
return default
def get_pool_kwargs() -> dict:
"""Build SQLAlchemy engine pool keyword arguments from environment variables.
Supported variables (all optional):
DB_POOL_SIZE max persistent connections (default 5)
DB_MAX_OVERFLOW extra connections above pool_size (default 10)
DB_POOL_TIMEOUT seconds to wait for a connection (default 30)
DB_POOL_RECYCLE seconds before a connection is recycled (default 1800)
DB_POOL_PRE_PING enable connection liveness check (default true)
For SQLite the pool settings are not applicable and an empty dict is
returned, since SQLite uses ``StaticPool`` / ``NullPool`` by default.
"""
return {
"pool_size": _int_env("DB_POOL_SIZE", 5),
"max_overflow": _int_env("DB_MAX_OVERFLOW", 10),
"pool_timeout": _int_env("DB_POOL_TIMEOUT", 30),
"pool_recycle": _int_env("DB_POOL_RECYCLE", 1800),
"pool_pre_ping": os.getenv("DB_POOL_PRE_PING", "true").lower()
not in ("false", "0", "no"),
}
def _ensure_sqlite_parent_dir(database_url: str) -> None:
if not database_url.startswith("sqlite://"):
return

View file

@ -108,7 +108,7 @@ async def get_edited_slide_content(
"__speaker_note__": {
"type": "string",
"minLength": 100,
"maxLength": 250,
"maxLength": 500,
"description": "Speaker note for the slide",
}
},
@ -124,6 +124,7 @@ async def get_edited_slide_content(
),
response_format=response_schema,
strict=False,
validate_schema=True,
)
return response

View file

@ -16,6 +16,7 @@ def get_system_prompt(
instructions: Optional[str] = None,
include_title_slide: bool = True,
include_table_of_contents: bool = False,
web_search: bool = False,
):
verbosity_instruction = (
"Slide content should be abound 20 words but detailed enough to generate a good slide."
@ -40,6 +41,27 @@ def get_system_prompt(
)
toc_block = f"{toc_instruction}\n" if toc_instruction else ""
if web_search:
tools_hint = "Try to use available tools when they improve accuracy.\n"
web_block = (
"Web search is enabled: use any \"## Web research (current sources)\" section in Context when present, "
"and call SearchWebTool when it is available for fresh facts.\n"
)
else:
tools_hint = ""
web_block = "Do not use web search for this outline; rely on Content and Context only.\n"
url_line = (
"Only include URLs if they appear in Content, Context, or a \"## Web research (current sources)\" block.\n"
if web_search
else "Only include URLs if they appear in the provided content/context.\n"
)
data_line = (
"Ground slide data in Content and Context, and in \"## Web research (current sources)\" when that block is present.\n"
if web_search
else "Make sure data used is strictly from the provided content/context.\n"
)
slide_outline_structure = (
"Each slide content:\n"
" - Must have a ## title.\n"
@ -60,11 +82,13 @@ def get_system_prompt(
"If 'auto-detect' is used, figure it out from the content/context.\n"
f"{title_slide_instruction}\n"
f"{toc_block}"
f"{tools_hint}"
f"{web_block}"
f"{slide_outline_structure}\n"
"Slide content must not contain any presentation branding/styling information.\n"
"Title slide must only contain title, presenter name, date and overview.\n"
"Only include URLs if they appear in the provided content/context.\n"
"Make sure data used is strictly from the provided content/context.\n"
f"{url_line}"
f"{data_line}"
"Make sure data is consistent across all slides."
)
@ -124,6 +148,7 @@ def get_messages(
instructions: Optional[str] = None,
include_title_slide: bool = True,
include_table_of_contents: bool = False,
web_search: bool = False,
):
return [
LLMSystemMessage(
@ -133,6 +158,7 @@ def get_messages(
instructions,
include_title_slide,
include_table_of_contents,
web_search=web_search,
),
),
LLMUserMessage(
@ -170,6 +196,21 @@ async def generate_ppt_outline(
)
client = LLMClient()
web_search_enabled = client.web_search_enabled_for_request(web_search)
merged_context = additional_context
if client.outline_uses_prefetched_web_facts(web_search):
facts = await client.prefetch_outline_web_facts(content, additional_context)
if facts:
merged_context = (
f"{(additional_context or '').strip()}\n\n## Web research (current sources)\n{facts}"
if (additional_context or "").strip()
else f"## Web research (current sources)\n{facts}"
)
use_search_tool = web_search_enabled and not client.outline_uses_prefetched_web_facts(
web_search
)
try:
async for chunk in client.stream_structured(
@ -178,20 +219,17 @@ async def generate_ppt_outline(
content,
n_slides,
language,
additional_context,
merged_context,
tone,
verbosity,
instructions,
include_title_slide,
include_table_of_contents,
web_search=web_search_enabled,
),
response_model.model_json_schema(),
strict=True,
tools=(
[SearchWebTool]
if (client.enable_web_grounding() and web_search)
else None
),
tools=([SearchWebTool] if use_search_tool else None),
):
yield chunk
except Exception as e:

View file

@ -167,6 +167,7 @@ async def generate_presentation_structure(
),
response_format=response_model.model_json_schema(),
strict=True,
validate_schema=True,
)
return PresentationStructureModel(**response)
except Exception as e:

View file

@ -24,7 +24,7 @@ You need to generate structured content json based on the schema.
# General Rules
- Make sure to follow language guidelines.
- Speaker note should be normal text, not markdown.
- Never ever go over the max character limit.
- Never ever go over the max character limit but don't clip the sentence to satisfy character limit instead rephrase it.
- Do not add emoji in the content.
- Don't provide $schema field in content json.
{markdown_emphasis_rules}
@ -167,7 +167,7 @@ async def get_slide_content_from_type_and_outline(
"__speaker_note__": {
"type": "string",
"minLength": 100,
"maxLength": 250,
"maxLength": 500,
"description": "Speaker note for the slide",
}
},
@ -187,6 +187,7 @@ async def get_slide_content_from_type_and_outline(
),
response_format=response_schema,
strict=False,
validate_schema=True,
)
return response

View file

@ -58,6 +58,7 @@ async def get_slide_layout_from_prompt(
),
response_format=SlideLayoutIndex.model_json_schema(),
strict=True,
validate_schema=True,
)
index = SlideLayoutIndex(**response).index
return layout.slides[index]

View file

@ -1,6 +1,7 @@
from copy import deepcopy
from typing import Any, List
from jsonschema.validators import validator_for
from openai import NOT_GIVEN
from utils.dict_utils import (
@ -323,6 +324,53 @@ def ensure_array_schemas_have_items(schema: dict) -> dict[str, Any]:
return _ensure(result)
def prepare_schema_for_validation(
schema: dict,
strict: bool = False,
) -> dict[str, Any]:
prepared_schema = deepcopy(schema)
if strict:
prepared_schema = ensure_strict_json_schema(
prepared_schema,
path=(),
root=prepared_schema,
)
return ensure_array_schemas_have_items(prepared_schema)
def format_json_path(path: List[Any]) -> str:
if not path:
return "$"
formatted = "$"
for part in path:
if isinstance(part, int):
formatted += f"[{part}]"
else:
formatted += f".{part}"
return formatted
def get_schema_validation_errors(
schema: dict,
instance: Any,
strict: bool = False,
) -> List[str]:
prepared_schema = prepare_schema_for_validation(schema, strict=strict)
validator_cls = validator_for(prepared_schema)
validator_cls.check_schema(prepared_schema)
validator = validator_cls(prepared_schema)
errors = sorted(
validator.iter_errors(instance),
key=lambda error: (format_json_path(list(error.path)), error.message),
)
return [
f"{format_json_path(list(error.path))}: {error.message}" for error in errors
]
def remove_titles_from_schema(schema: dict) -> dict[str, Any]:
def _strip_titles(node: Any) -> Any:

View file

@ -1,5 +1,5 @@
version = 1
revision = 3
revision = 2
requires-python = "==3.11.*"
resolution-markers = [
"platform_machine == 'aarch64' and sys_platform == 'linux'",
@ -624,6 +624,7 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fc/2e/d4fcb2978f826358b673f779f78fa8a32ee37df11920dc2bb5589cbeecef/greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822", size = 270219, upload-time = "2025-06-05T16:10:10.414Z" },
{ url = "https://files.pythonhosted.org/packages/16/24/929f853e0202130e4fe163bc1d05a671ce8dcd604f790e14896adac43a52/greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83", size = 630383, upload-time = "2025-06-05T16:38:51.785Z" },
{ url = "https://files.pythonhosted.org/packages/d1/b2/0320715eb61ae70c25ceca2f1d5ae620477d246692d9cc284c13242ec31c/greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf", size = 642422, upload-time = "2025-06-05T16:41:35.259Z" },
{ url = "https://files.pythonhosted.org/packages/bd/49/445fd1a210f4747fedf77615d941444349c6a3a4a1135bba9701337cd966/greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b", size = 638375, upload-time = "2025-06-05T16:48:18.235Z" },
{ url = "https://files.pythonhosted.org/packages/7e/c8/ca19760cf6eae75fa8dc32b487e963d863b3ee04a7637da77b616703bc37/greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147", size = 637627, upload-time = "2025-06-05T16:13:02.858Z" },
{ url = "https://files.pythonhosted.org/packages/65/89/77acf9e3da38e9bcfca881e43b02ed467c1dedc387021fc4d9bd9928afb8/greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5", size = 585502, upload-time = "2025-06-05T16:12:49.642Z" },
{ url = "https://files.pythonhosted.org/packages/97/c6/ae244d7c95b23b7130136e07a9cc5aadd60d59b5951180dc7dc7e8edaba7/greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc", size = 1114498, upload-time = "2025-06-05T16:36:46.598Z" },
@ -1302,6 +1303,7 @@ dependencies = [
{ name = "fastmcp" },
{ name = "google-genai" },
{ name = "greenlet", marker = "sys_platform == 'darwin'" },
{ name = "jsonschema" },
{ name = "nltk" },
{ name = "openai" },
{ name = "pathvalidate" },
@ -1329,6 +1331,7 @@ requires-dist = [
{ name = "fastmcp", specifier = ">=2.11.0" },
{ name = "google-genai", specifier = ">=1.28.0" },
{ name = "greenlet", marker = "sys_platform == 'darwin'", specifier = ">=3.0.0" },
{ name = "jsonschema", specifier = ">=4.25.0" },
{ name = "nltk", specifier = ">=3.9.1" },
{ name = "openai", specifier = ">=1.98.0" },
{ name = "pathvalidate", specifier = ">=3.3.1" },

View file

@ -1,9 +1,9 @@
{
"version": "0.7.2-beta",
"message": "What's New\n\n:repeat: Smarter Streaming & Retries\n- Outline and slide generation now retries automatically on failure - fewer interrupted generations, smoother experience end to end\n\n:frame_photo: ComfyUI Fix\n- Image generation via ComfyUI is back to working correctly - mid-generation failures resolved\n\n:art: UI & Template Polish\n- Continued refinements to UI components - tighter layouts, cleaner interactions\n- Template improvements - more consistent rendering across providers\n\n:bar_chart: Better Analytics & Error Tracking\n- Sentry integrated for crash and error monitoring - helps us catch and fix issues faster\n\n:wrench: Fixes\n- Download URL and version message corrected in version.json\n- Various stability and content fixes across the board",
"version": "0.7.3-beta",
"message": "Presenton Desktop electron-v0.7.3-beta\n\nSmarter content generation, reliable web search, no more shady Windows popups, and a round of minor fixes under the hood. Clean update. 🙌\n\nWhat's New\n\n🧠 Smarter Slide Content Generation\n• Overflow mitigation loop added — slides no longer clip or overflow when content runs long\n• Improved system prompt for slide content generation — cleaner, better-fitting output every time\n\n🔍 Web Search Fixed\n• Web search is back to working reliably during presentation generation\n\n🪟 Windows Fix\n• Export tasks no longer flash a console window on Windows — cleaner, more polished experience\n\n🔧 Minor Fixes\n• Various small fixes and stability improvements across the app\n\n---\nView full diff: electron-v0.7.2-beta → electron-v0.7.3-beta\nhttps://github.com/presenton/presenton/compare/electron-v0.7.2-beta...electron-v0.7.3-beta\n\nInstallation\nDownload Link: https://presenton.ai/download\nLove the app? Star us on GitHub → github.com/presenton/presenton",
"downloads": {
"linux": "https://github.com/presenton/presenton/releases/download/electron-v0.7.2-beta/Presenton-0.7.2-beta.deb",
"mac": "https://github.com/presenton/presenton/releases/download/electron-v0.7.2-beta/Presenton-0.7.2-beta.dmg",
"windows": "https://github.com/presenton/presenton/releases/download/electron-v0.7.2-beta/Presenton-0.7.2-beta.exe"
"linux": "https://github.com/presenton/presenton/releases/download/electron-v0.7.3-beta/Presenton-0.7.3-beta.deb",
"mac": "https://github.com/presenton/presenton/releases/download/electron-v0.7.3-beta/Presenton-0.7.3-beta.dmg",
"windows": "https://github.com/presenton/presenton/releases/download/electron-v0.7.3-beta/Presenton-0.7.3-beta.exe"
}
}
}

View file

@ -4,7 +4,7 @@ import os
from fastapi import FastAPI
from migrations import migrate_database_on_startup
from services.database import create_db_and_tables
from services.database import create_db_and_tables, dispose_engines
from utils.get_env import get_app_data_directory_env
from utils.model_availability import (
check_llm_and_image_provider_api_or_model_availability,
@ -24,3 +24,5 @@ async def app_lifespan(_: FastAPI):
await create_db_and_tables()
await check_llm_and_image_provider_api_or_model_availability()
yield
# Shutdown: release all database connections to prevent stale/leaked pools.
await dispose_engines()

View file

@ -9,3 +9,4 @@ class ImageProvider(Enum):
DALLE3 = "dall-e-3"
GPT_IMAGE_1_5 = "gpt-image-1.5"
COMFYUI = "comfyui"
OPEN_WEBUI = "open_webui"

View file

@ -36,6 +36,10 @@ class UserConfig(BaseModel):
COMFYUI_URL: Optional[str] = None
COMFYUI_WORKFLOW: Optional[str] = None
# Open WebUI Image Provider
OPEN_WEBUI_IMAGE_URL: Optional[str] = None
OPEN_WEBUI_IMAGE_API_KEY: Optional[str] = None
# Dalle 3 Quality
DALL_E_3_QUALITY: Optional[str] = None
# Gpt Image 1.5 Quality

View file

@ -20,14 +20,20 @@ from models.sql.template import TemplateModel
from models.sql.template_create_info import TemplateCreateInfoModel
from models.sql.slide import SlideModel
from models.sql.webhook_subscription import WebhookSubscription
from utils.db_utils import get_database_url_and_connect_args
from utils.get_env import get_app_data_directory_env
from utils.get_env import get_migrate_database_on_startup_env
from utils.db_utils import get_database_url_and_connect_args, get_pool_kwargs
database_url, connect_args = get_database_url_and_connect_args()
sql_engine: AsyncEngine = create_async_engine(database_url, connect_args=connect_args)
# Apply connection-pool settings for server-class databases (PostgreSQL, MySQL).
# SQLite uses a file-lock model and ignores pool configuration, so we skip it.
_pool_kwargs = get_pool_kwargs() if "sqlite" not in database_url else {}
sql_engine: AsyncEngine = create_async_engine(
database_url, connect_args=connect_args, **_pool_kwargs
)
async_session_maker = async_sessionmaker(sql_engine, expire_on_commit=False)
@ -81,3 +87,14 @@ async def create_db_and_tables():
tables=[OllamaPullStatus.__table__],
)
)
async def dispose_engines():
"""Dispose all engine connection pools.
Call this during application shutdown (e.g. in a FastAPI ``shutdown``
event or lifespan context) to release every connection back to the
database and prevent stale / leaked connections.
"""
await sql_engine.dispose()
await container_db_engine.dispose()

View file

@ -13,6 +13,8 @@ from utils.get_env import (
get_dall_e_3_quality_env,
get_gpt_image_1_5_quality_env,
get_pexels_api_key_env,
get_open_webui_image_url_env,
get_open_webui_image_api_key_env,
)
from utils.get_env import get_pixabay_api_key_env
from utils.get_env import get_comfyui_url_env
@ -26,6 +28,7 @@ from utils.image_provider import (
is_nanobanana_pro_selected,
is_dalle3_selected,
is_comfyui_selected,
is_open_webui_selected,
)
import uuid
@ -54,6 +57,8 @@ class ImageGenerationService:
return self.generate_image_openai_gpt_image_1_5
elif is_comfyui_selected():
return self.generate_image_comfyui
elif is_open_webui_selected():
return self.generate_image_open_webui
return None
def is_stock_provider_selected(self):
@ -146,6 +151,88 @@ class ImageGenerationService:
get_gpt_image_1_5_quality_env() or "medium",
)
async def generate_image_open_webui(
self, prompt: str, output_directory: str
) -> str:
base_url = get_open_webui_image_url_env()
if not base_url:
raise ValueError("OPEN_WEBUI_IMAGE_URL environment variable is not set")
base_url = base_url.rstrip("/")
api_key = get_open_webui_image_api_key_env() or ""
from urllib.parse import urlparse
parsed = urlparse(base_url)
origin = f"{parsed.scheme}://{parsed.netloc}"
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
payload = {
"prompt": prompt,
"n": 1,
"size": "1024x1024",
}
async with aiohttp.ClientSession(trust_env=True) as session:
resp = await session.post(
f"{base_url}/images/generations",
json=payload,
headers=headers,
timeout=aiohttp.ClientTimeout(total=300),
)
if resp.status != 200:
error_text = await resp.text()
raise Exception(
f"Open WebUI image generation returned {resp.status}: {error_text}"
)
body = await resp.json()
# Open WebUI returns a bare [...] array instead of {"data": [...]}.
if isinstance(body, list):
items = body
elif isinstance(body, dict) and "data" in body:
items = body["data"]
else:
raise Exception(f"Unexpected response format: {type(body)}")
if not items:
raise Exception("Open WebUI returned empty results")
item = items[0]
image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png")
if item.get("b64_json"):
with open(image_path, "wb") as f:
f.write(base64.b64decode(item["b64_json"]))
elif item.get("url"):
image_url = item["url"]
# Open WebUI returns relative URLs like /api/v1/files/.../content
if image_url.startswith("/"):
image_url = origin + image_url
dl_headers = {}
if api_key:
dl_headers["Authorization"] = f"Bearer {api_key}"
dl_resp = await session.get(
image_url,
headers=dl_headers,
timeout=aiohttp.ClientTimeout(total=120),
)
if dl_resp.status != 200:
raise Exception(
f"Failed to download image: {dl_resp.status}"
)
with open(image_path, "wb") as f:
f.write(await dl_resp.read())
else:
raise Exception("Open WebUI returned no image data")
return image_path
async def _generate_image_google(
self, prompt: str, output_directory: str, model: str
) -> str:

View file

@ -91,6 +91,7 @@ class LLMClient:
if (
self.llm_provider == LLMProvider.OLLAMA
or self.llm_provider == LLMProvider.CUSTOM
or self.llm_provider == LLMProvider.CODEX
):
return False
return parse_bool_or_none(get_web_grounding_env()) or False

View file

@ -20,6 +20,38 @@ def _ensure_sqlite_parent_dir(database_url: str) -> None:
parent = os.path.dirname(db_path)
if parent:
os.makedirs(parent, exist_ok=True)
def _int_env(name: str, default: int) -> int:
"""Read an integer from an environment variable, falling back to *default*."""
raw = os.getenv(name)
if raw is None:
return default
try:
return int(raw)
except ValueError:
return default
def get_pool_kwargs() -> dict:
"""Build SQLAlchemy engine pool keyword arguments from environment variables.
Supported variables (all optional):
DB_POOL_SIZE max persistent connections (default 5)
DB_MAX_OVERFLOW extra connections above pool_size (default 10)
DB_POOL_TIMEOUT seconds to wait for a connection (default 30)
DB_POOL_RECYCLE seconds before a connection is recycled (default 1800)
DB_POOL_PRE_PING enable connection liveness check (default true)
For SQLite the pool settings are not applicable and an empty dict is
returned, since SQLite uses ``StaticPool`` / ``NullPool`` by default.
"""
return {
"pool_size": _int_env("DB_POOL_SIZE", 5),
"max_overflow": _int_env("DB_MAX_OVERFLOW", 10),
"pool_timeout": _int_env("DB_POOL_TIMEOUT", 30),
"pool_recycle": _int_env("DB_POOL_RECYCLE", 1800),
"pool_pre_ping": os.getenv("DB_POOL_PRE_PING", "true").lower()
not in ("false", "0", "no"),
}
def get_database_url_and_connect_args() -> tuple[str, dict]:

View file

@ -166,3 +166,10 @@ def get_sentry_traces_sample_rate_env():
def get_sentry_send_default_pii_env():
return os.getenv("SENTRY_SEND_DEFAULT_PII")
# Open WebUI Image Provider
def get_open_webui_image_url_env():
return os.getenv("OPEN_WEBUI_IMAGE_URL")
def get_open_webui_image_api_key_env():
return os.getenv("OPEN_WEBUI_IMAGE_API_KEY")

View file

@ -38,6 +38,10 @@ def is_comfyui_selected() -> bool:
return ImageProvider.COMFYUI == get_selected_image_provider()
def is_open_webui_selected() -> bool:
return ImageProvider.OPEN_WEBUI == get_selected_image_provider()
def get_selected_image_provider() -> ImageProvider | None:
"""
Get the selected image provider from environment variables.

View file

@ -1,6 +1,7 @@
from datetime import datetime
from typing import Optional
from enums.llm_provider import LLMProvider
from models.llm_message import LLMSystemMessage, LLMUserMessage
from models.presentation_outline_model import PresentationOutlineModel
from models.llm_tools import SearchWebTool
@ -170,6 +171,16 @@ async def generate_ppt_outline(
)
client = LLMClient()
providers_with_search_tool = {
LLMProvider.OPENAI,
LLMProvider.ANTHROPIC,
LLMProvider.GOOGLE,
}
use_search_tool = (
web_search
and client.enable_web_grounding()
and client.llm_provider in providers_with_search_tool
)
try:
async for chunk in client.stream_structured(
@ -187,11 +198,7 @@ async def generate_ppt_outline(
),
response_model.model_json_schema(),
strict=True,
tools=(
[SearchWebTool]
if (client.enable_web_grounding() and web_search)
else None
),
tools=([SearchWebTool] if use_search_tool else None),
):
yield chunk
except Exception as e:

View file

@ -136,3 +136,12 @@ def set_codex_is_pro_env(value: str):
def set_codex_model_env(value: str):
os.environ["CODEX_MODEL"] = value
# Open WebUI Image Provider
def set_open_webui_image_url_env(value: str):
os.environ["OPEN_WEBUI_IMAGE_URL"] = value
def set_open_webui_image_api_key_env(value: str):
os.environ["OPEN_WEBUI_IMAGE_API_KEY"] = value

View file

@ -36,6 +36,8 @@ from utils.get_env import (
get_codex_email_env,
get_codex_is_pro_env,
get_codex_model_env,
get_open_webui_image_url_env,
get_open_webui_image_api_key_env,
)
from utils.parsers import parse_bool_or_none
from utils.set_env import (
@ -71,6 +73,8 @@ from utils.set_env import (
set_codex_email_env,
set_codex_is_pro_env,
set_codex_model_env,
set_open_webui_image_url_env,
set_open_webui_image_api_key_env,
)
@ -146,6 +150,8 @@ def get_user_config():
if existing_config.CODEX_IS_PRO is not None
else parse_bool_or_none(get_codex_is_pro_env())
),
OPEN_WEBUI_IMAGE_URL=existing_config.OPEN_WEBUI_IMAGE_URL or get_open_webui_image_url_env(),
OPEN_WEBUI_IMAGE_API_KEY=existing_config.OPEN_WEBUI_IMAGE_API_KEY or get_open_webui_image_api_key_env(),
)
@ -215,6 +221,10 @@ def update_env_with_user_config():
set_codex_email_env(user_config.CODEX_EMAIL)
if user_config.CODEX_IS_PRO is not None:
set_codex_is_pro_env(str(user_config.CODEX_IS_PRO))
if user_config.OPEN_WEBUI_IMAGE_URL:
set_open_webui_image_url_env(user_config.OPEN_WEBUI_IMAGE_URL)
if user_config.OPEN_WEBUI_IMAGE_API_KEY:
set_open_webui_image_api_key_env(user_config.OPEN_WEBUI_IMAGE_API_KEY)
def save_codex_tokens_to_user_config() -> None:

View file

@ -262,6 +262,33 @@ const ImageProvider = ({ llmConfig, setLlmConfig }: { llmConfig: LLMConfig, setL
);
}
// Show Open WebUI configuration
if (provider.value === "open_webui") {
return (
<div className="space-y-4">
<div className='w-[205px]'>
<label className="block text-sm font-medium text-gray-700 mb-2">
Open WebUI URL
</label>
<div className="relative">
<input
type="text"
placeholder="http://localhost:3000/api/v1"
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
value={llmConfig.OPEN_WEBUI_IMAGE_URL || ""}
onChange={(e) => {
input_field_changed(
e.target.value,
"OPEN_WEBUI_IMAGE_URL"
);
}}
/>
</div>
</div>
</div>
);
}
// Show API key input for other providers
return (
<div className=" w-[205px]">
@ -300,6 +327,31 @@ const ImageProvider = ({ llmConfig, setLlmConfig }: { llmConfig: LLMConfig, setL
{!isImageGenerationDisabled && <div className='flex justify-end items-center mt-[18px]'>
{renderQualitySelector(llmConfig, input_field_changed)}
{llmConfig.IMAGE_PROVIDER === "open_webui" && (
<div className='w-[205px]'>
<label className="block text-sm font-medium text-gray-700 mb-2">
API Key (optional)
</label>
<div className="relative">
<input
type={showApiKey ? 'text' : 'password'}
placeholder="API key"
className="w-full px-4 py-2.5 h-12 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
value={llmConfig.OPEN_WEBUI_IMAGE_API_KEY || ""}
onChange={(e) => {
input_field_changed(e.target.value, "OPEN_WEBUI_IMAGE_API_KEY");
}}
/>
<button
type="button"
onClick={() => setShowApiKey((prev) => !prev)}
className='absolute right-2 top-1/2 -translate-y-1/2 bg-white px-2 py-1 cursor-pointer'
>
{showApiKey ? <Eye className='w-4 h-4 text-gray-500' /> : <EyeOff className='w-4 h-4 text-gray-500' />}
</button>
</div>
</div>
)}
{llmConfig.IMAGE_PROVIDER === "comfyui" && <div className='w-full'>
<label className="block text-sm font-medium text-gray-700 mb-2">
Workflow JSON

View file

@ -52,6 +52,11 @@ export async function POST(request: Request) {
userConfig.USE_CUSTOM_URL === undefined
? existingConfig.USE_CUSTOM_URL
: userConfig.USE_CUSTOM_URL,
OPEN_WEBUI_IMAGE_URL:
userConfig.OPEN_WEBUI_IMAGE_URL || existingConfig.OPEN_WEBUI_IMAGE_URL,
OPEN_WEBUI_IMAGE_API_KEY:
userConfig.OPEN_WEBUI_IMAGE_API_KEY || existingConfig.OPEN_WEBUI_IMAGE_API_KEY,
CODEX_MODEL: userConfig.CODEX_MODEL || existingConfig.CODEX_MODEL,
CODEX_ACCESS_TOKEN: existingConfig.CODEX_ACCESS_TOKEN,
CODEX_REFRESH_TOKEN: existingConfig.CODEX_REFRESH_TOKEN,
CODEX_TOKEN_EXPIRES: existingConfig.CODEX_TOKEN_EXPIRES,

View file

@ -266,6 +266,56 @@ const ImageSelectionConfig = ({ isImageGenerationDisabled, openImageProviderSele
return <></>;
}
// Show Open WebUI configuration
if (provider.value === "open_webui") {
return (
<div className="space-y-4 w-[295px]">
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
Open WebUI URL
</label>
<div className="relative">
<input
type="text"
placeholder="http://localhost:3000/api/v1"
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
value={llmConfig.OPEN_WEBUI_IMAGE_URL || ""}
onChange={(e) => {
input_field_changed(
e.target.value,
"open_webui_image_url"
);
}}
/>
</div>
<p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
<span className="block w-1 h-1 rounded-full bg-gray-400"></span>
Image model is configured in Open WebUI admin settings
</p>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
API Key (optional)
</label>
<div className="relative">
<input
type="text"
placeholder="Open WebUI API key"
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
value={llmConfig.OPEN_WEBUI_IMAGE_API_KEY || ""}
onChange={(e) => {
input_field_changed(
e.target.value,
"open_webui_image_api_key"
);
}}
/>
</div>
</div>
</div>
);
}
// Show ComfyUI configuration
if (provider.value === "comfyui") {
return (

View file

@ -32,6 +32,10 @@ export interface LLMConfig {
COMFYUI_URL?: string;
COMFYUI_WORKFLOW?: string;
// Open WebUI Image Provider
OPEN_WEBUI_IMAGE_URL?: string;
OPEN_WEBUI_IMAGE_API_KEY?: string;
// Dalle 3 Quality
DALL_E_3_QUALITY?: string;
// GPT Image 1.5 Quality

View file

@ -98,6 +98,15 @@ export const IMAGE_PROVIDERS: Record<string, ImageProviderOption> = {
apiKeyField: "COMFYUI_URL",
apiKeyFieldLabel: "ComfyUI Server URL",
},
open_webui: {
value: "open_webui",
label: "Open WebUI",
description: "Use your Open WebUI server for image generation",
icon: "/icons/open-webui.png",
requiresApiKey: false,
apiKeyField: "OPEN_WEBUI_IMAGE_URL",
apiKeyFieldLabel: "Open WebUI URL",
},
};
export const LLM_PROVIDERS: Record<string, LLMProviderOption> = {

View file

@ -54,6 +54,8 @@ export const updateLLMConfig = (
comfyui_workflow: "COMFYUI_WORKFLOW",
dall_e_3_quality: "DALL_E_3_QUALITY",
gpt_image_1_5_quality: "GPT_IMAGE_1_5_QUALITY",
open_webui_image_url: "OPEN_WEBUI_IMAGE_URL",
open_webui_image_api_key: "OPEN_WEBUI_IMAGE_API_KEY",
codex_model: "CODEX_MODEL",
};

View file

@ -102,6 +102,11 @@ export const getLLMConfigValidationError = (
return "ComfyUI server URL is required.";
}
break;
case "open_webui":
if (!isProvided(llmConfig.OPEN_WEBUI_IMAGE_URL)) {
return "Open WebUI URL is required.";
}
break;
default:
return "Select a valid image provider.";
}