diff --git a/docker-compose.yml b/docker-compose.yml index 78861cce..6def2521 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -43,6 +43,8 @@ services: - MEM0_EMBEDDING_DIMS=${MEM0_EMBEDDING_DIMS:-384} - LITEPARSE_DPI=${LITEPARSE_DPI:-120} - LITEPARSE_NUM_WORKERS=${LITEPARSE_NUM_WORKERS:-1} + - OPEN_WEBUI_IMAGE_URL=${OPEN_WEBUI_IMAGE_URL} + - OPEN_WEBUI_IMAGE_API_KEY=${OPEN_WEBUI_IMAGE_API_KEY} production-gpu: # image: ghcr.io/presenton/presenton:latest @@ -96,6 +98,9 @@ services: - LITEPARSE_DPI=${LITEPARSE_DPI:-120} - LITEPARSE_NUM_WORKERS=${LITEPARSE_NUM_WORKERS:-1} + - OPEN_WEBUI_IMAGE_URL=${OPEN_WEBUI_IMAGE_URL} + - OPEN_WEBUI_IMAGE_API_KEY=${OPEN_WEBUI_IMAGE_API_KEY} + development: build: context: . @@ -143,6 +148,8 @@ services: - MEM0_EMBEDDING_DIMS=${MEM0_EMBEDDING_DIMS:-384} - LITEPARSE_DPI=${LITEPARSE_DPI:-120} - LITEPARSE_NUM_WORKERS=${LITEPARSE_NUM_WORKERS:-1} + - OPEN_WEBUI_IMAGE_URL=${OPEN_WEBUI_IMAGE_URL} + - OPEN_WEBUI_IMAGE_API_KEY=${OPEN_WEBUI_IMAGE_API_KEY} development-gpu: build: @@ -201,3 +208,5 @@ services: volumes: presenton_root_node_modules: presenton_document_extraction_liteparse: + - OPEN_WEBUI_IMAGE_URL=${OPEN_WEBUI_IMAGE_URL} + - OPEN_WEBUI_IMAGE_API_KEY=${OPEN_WEBUI_IMAGE_API_KEY} diff --git a/electron/app/ipc/export_handlers.ts b/electron/app/ipc/export_handlers.ts index d0691dda..f1d4271c 100644 --- a/electron/app/ipc/export_handlers.ts +++ b/electron/app/ipc/export_handlers.ts @@ -63,6 +63,7 @@ export function setupExportHandlers() { const exportTaskProcess = spawn(process.execPath, [exportScriptPath, exportTaskPath], { stdio: ["ignore", "pipe", "pipe"], cwd: baseDir, + windowsHide: process.platform === "win32", env: { ...process.env, ELECTRON_RUN_AS_NODE: "1", diff --git a/electron/app/utils/imagemagick-check.ts b/electron/app/utils/imagemagick-check.ts index 34985a74..bb3e3e60 100644 --- a/electron/app/utils/imagemagick-check.ts +++ b/electron/app/utils/imagemagick-check.ts @@ -173,7 +173,7 @@ export function getImageMagickBinaryPath(): string { export function getImageMagickDownloadUrl(): string { if (process.platform === "win32") { - return "https://imagemagick.org/archive/binaries/ImageMagick-7.1.2-18-Q16-HDRI-x64-dll.exe"; + return "https://github.com/ImageMagick/ImageMagick/releases/download/7.1.2-18/ImageMagick-7.1.2-18-Q16-HDRI-x64-dll.exe"; } if (process.platform === "darwin") { return "https://brew.sh/"; diff --git a/electron/package-lock.json b/electron/package-lock.json index 75616258..86b36cd1 100644 --- a/electron/package-lock.json +++ b/electron/package-lock.json @@ -1,12 +1,12 @@ { "name": "presenton", - "version": "0.7.2-beta", + "version": "0.7.3-beta", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "presenton", - "version": "0.7.2-beta", + "version": "0.7.3-beta", "hasInstallScript": true, "dependencies": { "@llamaindex/liteparse": "^1.4.0", diff --git a/electron/package.json b/electron/package.json index ffef3c6e..47c3c16a 100644 --- a/electron/package.json +++ b/electron/package.json @@ -1,8 +1,8 @@ { "name": "presenton", "productName": "Presenton Open Source", - "version": "0.7.2-beta", - "exportVersion": "v0.2.0", + "version": "0.7.3-beta", + "exportVersion": "v0.2.2", "main": "app_dist/main.js", "description": "Open-Source AI Presentation Generator", "homepage": "https://presenton.ai", diff --git a/electron/servers/fastapi/api/lifespan.py b/electron/servers/fastapi/api/lifespan.py index 6fe4e6c4..1ce3e26f 100644 --- a/electron/servers/fastapi/api/lifespan.py +++ b/electron/servers/fastapi/api/lifespan.py @@ -4,7 +4,7 @@ import os from fastapi import FastAPI from migrations import migrate_database_on_startup -from services.database import create_db_and_tables +from services.database import create_db_and_tables, dispose_engines from utils.get_env import get_app_data_directory_env from utils.model_availability import ( check_llm_and_image_provider_api_or_model_availability, @@ -24,3 +24,5 @@ async def app_lifespan(_: FastAPI): await create_db_and_tables() await check_llm_and_image_provider_api_or_model_availability() yield + # Shutdown: release all database connections to prevent stale/leaked pools. + await dispose_engines() diff --git a/electron/servers/fastapi/models/llm_tools.py b/electron/servers/fastapi/models/llm_tools.py index ccf64e67..4ef9ff20 100644 --- a/electron/servers/fastapi/models/llm_tools.py +++ b/electron/servers/fastapi/models/llm_tools.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Coroutine, Optional +from typing import Any, Callable, Coroutine from pydantic import BaseModel, Field diff --git a/electron/servers/fastapi/pyproject.toml b/electron/servers/fastapi/pyproject.toml index bcdc43f1..d96a65b0 100644 --- a/electron/servers/fastapi/pyproject.toml +++ b/electron/servers/fastapi/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "google-genai>=1.28.0", # Platform-specific: greenlet for macOS only (critical for SQLAlchemy async) "greenlet>=3.0.0; sys_platform == 'darwin'", + "jsonschema>=4.25.0", "nltk>=3.9.1", "openai>=1.98.0", "pathvalidate>=3.3.1", diff --git a/electron/servers/fastapi/services/database.py b/electron/servers/fastapi/services/database.py index dcc03b01..96a5c4bf 100644 --- a/electron/servers/fastapi/services/database.py +++ b/electron/servers/fastapi/services/database.py @@ -20,14 +20,20 @@ from models.sql.template import TemplateModel from models.sql.template_create_info import TemplateCreateInfoModel from models.sql.slide import SlideModel from models.sql.webhook_subscription import WebhookSubscription -from utils.db_utils import get_database_url_and_connect_args +from utils.db_utils import get_database_url_and_connect_args, get_pool_kwargs from utils.get_env import get_app_data_directory_env from utils.get_env import get_migrate_database_on_startup_env database_url, connect_args = get_database_url_and_connect_args() -sql_engine: AsyncEngine = create_async_engine(database_url, connect_args=connect_args) +# Apply connection-pool settings for server-class databases (PostgreSQL, MySQL). +# SQLite uses a file-lock model and ignores pool configuration, so we skip it. +_pool_kwargs = get_pool_kwargs() if "sqlite" not in database_url else {} + +sql_engine: AsyncEngine = create_async_engine( + database_url, connect_args=connect_args, **_pool_kwargs +) async_session_maker = async_sessionmaker(sql_engine, expire_on_commit=False) @@ -81,3 +87,14 @@ async def create_db_and_tables(): tables=[OllamaPullStatus.__table__], ) ) + + +async def dispose_engines(): + """Dispose all engine connection pools. + + Call this during application shutdown (e.g. in a FastAPI ``shutdown`` + event or lifespan context) to release every connection back to the + database and prevent stale / leaked connections. + """ + await sql_engine.dispose() + await container_db_engine.dispose() diff --git a/electron/servers/fastapi/services/llm_client.py b/electron/servers/fastapi/services/llm_client.py index 11f29dd5..63e39bb9 100644 --- a/electron/servers/fastapi/services/llm_client.py +++ b/electron/servers/fastapi/services/llm_client.py @@ -1,6 +1,7 @@ import asyncio import dirtyjson import json +import logging from typing import AsyncGenerator, List, Optional, Dict, Any from fastapi import HTTPException from openai import APIStatusError, AsyncOpenAI, OpenAIError @@ -69,11 +70,15 @@ from utils.schema_utils import ( ensure_array_schemas_have_items, ensure_strict_json_schema, flatten_json_schema, + get_schema_validation_errors, remove_titles_from_schema, ) +LOGGER = logging.getLogger(__name__) + + class LLMClient: def __init__(self): self.llm_provider = get_llm_provider() @@ -95,6 +100,59 @@ class LLMClient: return False return parse_bool_or_none(get_web_grounding_env()) or False + def web_search_enabled_for_request(self, web_search: bool) -> bool: + """Attach SearchWebTool only when the user enabled web search for this request. + + Controlled solely by the presentation ``web_search`` flag (Advanced settings). + Legacy ``WEB_GROUNDING`` / settings toggles are not consulted here so a saved + false there cannot disable per-deck web search. + """ + if not web_search: + return False + if self.llm_provider in ( + LLMProvider.OLLAMA, + LLMProvider.CUSTOM, + LLMProvider.CODEX, + ): + return False + return True + + def outline_uses_prefetched_web_facts(self, web_search: bool) -> bool: + """Chat Completions + json_schema rarely invoke custom function tools. + + For OpenAI we can prefetch via the Responses API (``web_search_preview``) + and attach the result as context so Advanced settings **Web search** still + grounds outlines without relying on ``SearchWebTool`` in the same call. + """ + if not self.web_search_enabled_for_request(web_search): + return False + return self.llm_provider == LLMProvider.OPENAI + + async def prefetch_outline_web_facts( + self, + content: str, + additional_context: Optional[str] = None, + ) -> Optional[str]: + if self.llm_provider not in (LLMProvider.OPENAI, LLMProvider.CODEX): + return None + parts = [(content or "").strip(), (additional_context or "").strip()] + topic = "\n\n".join(p for p in parts if p) + if not topic: + topic = "general presentation topic" + topic = topic[:12000] + query = ( + "Search the web and summarize the most relevant current facts, statistics, " + "and notable recent developments for this presentation topic. Use concise " + "bullet points; include approximate dates or time ranges when known.\n\n" + f"Topic:\n{topic}" + ) + try: + text = await self._search_openai(query) + out = (text or "").strip() + return out or None + except Exception: + return None + # ? Disable thinking def disable_thinking(self) -> bool: return parse_bool_or_none(get_disable_thinking_env()) or False @@ -1067,6 +1125,101 @@ class LLMClient: depth=depth, ) + async def _generate_structured_once( + self, + model: str, + messages: List[LLMMessage], + response_format: dict, + strict: bool = False, + tools: Optional[List[dict]] = None, + max_tokens: Optional[int] = None, + ) -> dict | None: + match self.llm_provider: + case LLMProvider.OPENAI: + return await self._generate_openai_structured( + model=model, + messages=messages, + response_format=response_format, + strict=strict, + tools=tools, + max_tokens=max_tokens, + ) + case LLMProvider.CODEX: + return await self._generate_codex_structured( + model=model, + messages=messages, + response_format=response_format, + strict=strict, + tools=tools, + max_tokens=max_tokens, + ) + case LLMProvider.GOOGLE: + return await self._generate_google_structured( + model=model, + messages=messages, + response_format=response_format, + tools=tools, + max_tokens=max_tokens, + ) + case LLMProvider.ANTHROPIC: + return await self._generate_anthropic_structured( + model=model, + messages=messages, + response_format=response_format, + tools=tools, + max_tokens=max_tokens, + ) + case LLMProvider.OLLAMA: + return await self._generate_ollama_structured( + model=model, + messages=messages, + response_format=response_format, + strict=strict, + max_tokens=max_tokens, + ) + case LLMProvider.CUSTOM: + return await self._generate_custom_structured( + model=model, + messages=messages, + response_format=response_format, + strict=strict, + max_tokens=max_tokens, + ) + + def _get_structured_validation_feedback_message( + self, + content: dict, + validation_errors: List[str], + ) -> LLMUserMessage: + max_error_count = 10 + max_json_chars = 6000 + + formatted_errors = validation_errors[:max_error_count] + if len(validation_errors) > max_error_count: + formatted_errors.append( + f"...and {len(validation_errors) - max_error_count} more validation errors." + ) + + previous_response = json.dumps( + content, + ensure_ascii=False, + indent=2, + default=str, + ) + if len(previous_response) > max_json_chars: + previous_response = previous_response[:max_json_chars] + "\n... (truncated)" + + return LLMUserMessage( + content=( + "The previous JSON response did not match the required response schema.\n\n" + "Validation errors:\n" + + "\n".join(f"- {error}" for error in formatted_errors) + + "\n\nPrevious invalid JSON:\n" + + f"```json\n{previous_response}\n```\n\n" + + "Return corrected JSON only. Make sure it fully matches the required schema." + ) + ) + async def generate_structured( self, model: str, @@ -1075,68 +1228,69 @@ class LLMClient: strict: bool = False, tools: Optional[List[type[LLMTool] | LLMDynamicTool]] = None, max_tokens: Optional[int] = None, + validate_schema: bool = False, + validate_schema_max_loop_count: int = 5, ) -> dict: parsed_tools = self.tool_calls_handler.parse_tools(tools) + max_validation_loops = max(1, validate_schema_max_loop_count) + working_messages = [*messages] - for attempt in range(3): + for validation_attempt in range(max_validation_loops): content = None - match self.llm_provider: - case LLMProvider.OPENAI: - content = await self._generate_openai_structured( - model=model, - messages=messages, - response_format=response_format, - strict=strict, - tools=parsed_tools, - max_tokens=max_tokens, - ) - case LLMProvider.CODEX: - content = await self._generate_codex_structured( - model=model, - messages=messages, - response_format=response_format, - strict=strict, - tools=parsed_tools, - max_tokens=max_tokens, - ) - case LLMProvider.GOOGLE: - content = await self._generate_google_structured( - model=model, - messages=messages, - response_format=response_format, - tools=parsed_tools, - max_tokens=max_tokens, - ) - case LLMProvider.ANTHROPIC: - content = await self._generate_anthropic_structured( - model=model, - messages=messages, - response_format=response_format, - tools=parsed_tools, - max_tokens=max_tokens, - ) - case LLMProvider.OLLAMA: - content = await self._generate_ollama_structured( - model=model, - messages=messages, - response_format=response_format, - strict=strict, - max_tokens=max_tokens, - ) - case LLMProvider.CUSTOM: - content = await self._generate_custom_structured( - model=model, - messages=messages, - response_format=response_format, - strict=strict, - max_tokens=max_tokens, - ) + for attempt in range(3): + content = await self._generate_structured_once( + model=model, + messages=working_messages, + response_format=response_format, + strict=strict, + tools=parsed_tools, + max_tokens=max_tokens, + ) - if content is not None: + if content is not None: + break + + if attempt < 2: + await asyncio.sleep(0.5 * (attempt + 1)) + + if content is None: + raise HTTPException( + status_code=400, + detail="LLM did not return any content", + ) + + if not validate_schema: return content - if attempt < 2: - await asyncio.sleep(0.5 * (attempt + 1)) + validation_errors = get_schema_validation_errors( + response_format, + content, + strict=strict, + ) + + if not validation_errors: + return content + + formatted_validation_errors = " | ".join(validation_errors) + if validation_attempt == max_validation_loops - 1: + LOGGER.warning( + "Validation error after max fixes, returning last response: %s", + formatted_validation_errors, + ) + return content + + LOGGER.warning( + "Validation error, attempting fix %s/%s: %s", + validation_attempt + 1, + max_validation_loops - 1, + formatted_validation_errors, + ) + working_messages.append( + self._get_structured_validation_feedback_message( + content, + validation_errors, + ) + ) raise HTTPException( status_code=400, @@ -1652,7 +1806,7 @@ class LLMClient: current_arguments = None has_response_schema_tool_call = False - async for event in await client.chat.completions.create( + completion_kwargs: Dict[str, Any] = dict( model=model, messages=[message.model_dump() for message in messages], max_completion_tokens=max_tokens, @@ -1673,7 +1827,11 @@ class LLMClient: ), extra_body=extra_body, stream=True, - ): + ) + if all_tools: + completion_kwargs["tool_choice"] = "auto" + completion_kwargs["parallel_tool_calls"] = True + async for event in await client.chat.completions.create(**completion_kwargs): event: OpenAIChatCompletionChunk = event if not event.choices: continue @@ -1754,8 +1912,6 @@ class LLMClient: ): yield event - - async def _stream_codex_structured( self, model: str, diff --git a/electron/servers/fastapi/services/llm_tool_calls_handler.py b/electron/servers/fastapi/services/llm_tool_calls_handler.py index 63476028..f396112d 100644 --- a/electron/servers/fastapi/services/llm_tool_calls_handler.py +++ b/electron/servers/fastapi/services/llm_tool_calls_handler.py @@ -55,7 +55,7 @@ class LLMToolCallsHandler: self.dynamic_tools.append(tool) match self.client.llm_provider: - case LLMProvider.OPENAI | LLMProvider.OLLAMA | LLMProvider.CUSTOM: + case LLMProvider.OPENAI | LLMProvider.OLLAMA | LLMProvider.CUSTOM | LLMProvider.CODEX: return self.parse_tool_openai(tool, strict) case LLMProvider.ANTHROPIC: return self.parse_tool_anthropic(tool) @@ -63,7 +63,7 @@ class LLMToolCallsHandler: return self.parse_tool_google(tool) case _: raise ValueError( - f"LLM provider must be either openai, anthropic, or google" + "LLM provider must be one of: openai, anthropic, google, codex, ollama, custom" ) def parse_tool_openai( @@ -181,7 +181,7 @@ class LLMToolCallsHandler: # Search web tool call handler async def search_web_tool_call_handler(self, arguments: str) -> str: match self.client.llm_provider: - case LLMProvider.OPENAI: + case LLMProvider.OPENAI | LLMProvider.CODEX: return await self.search_web_tool_call_handler_openai(arguments) case LLMProvider.ANTHROPIC: return await self.search_web_tool_call_handler_anthropic(arguments) diff --git a/electron/servers/fastapi/tests/test_llm_client_structured_validation.py b/electron/servers/fastapi/tests/test_llm_client_structured_validation.py new file mode 100644 index 00000000..0b7e5a87 --- /dev/null +++ b/electron/servers/fastapi/tests/test_llm_client_structured_validation.py @@ -0,0 +1,338 @@ +import asyncio +import uuid +from types import SimpleNamespace +from unittest.mock import AsyncMock, patch + +from enums.llm_provider import LLMProvider +from models.llm_message import LLMUserMessage +from models.presentation_outline_model import PresentationOutlineModel, SlideOutlineModel +from models.sql.slide import SlideModel +from services.llm_client import LLMClient +from templates.presentation_layout import PresentationLayoutModel, SlideLayoutModel +from utils.llm_calls.edit_slide import get_edited_slide_content +from utils.llm_calls.generate_presentation_structure import ( + generate_presentation_structure, +) +from utils.llm_calls.generate_slide_content import get_slide_content_from_type_and_outline +from utils.llm_calls.select_slide_type_on_edit import get_slide_layout_from_prompt + + +def _build_client() -> LLMClient: + client = object.__new__(LLMClient) + client.llm_provider = LLMProvider.OPENAI + client.tool_calls_handler = SimpleNamespace(parse_tools=lambda tools: None) + return client + + +def _build_layout() -> PresentationLayoutModel: + return PresentationLayoutModel( + name="Test Layout", + slides=[ + SlideLayoutModel( + id="layout-1", + name="Title Slide", + description="Single title layout", + json_schema={ + "type": "object", + "properties": { + "title": {"type": "string"}, + }, + "required": ["title"], + "additionalProperties": False, + }, + ) + ], + ) + + +def _build_slide() -> SlideModel: + return SlideModel( + presentation=uuid.uuid4(), + layout_group="default", + layout="layout-1", + index=0, + content={"title": "Current title"}, + ) + + +def test_generate_structured_skips_validation_when_disabled(): + client = _build_client() + call_messages = [] + + async def fake_generate(**kwargs): + call_messages.append(kwargs["messages"]) + return {"title": 123} + + client._generate_structured_once = AsyncMock(side_effect=fake_generate) + + response = asyncio.run( + client.generate_structured( + model="test-model", + messages=[LLMUserMessage(content="Generate JSON")], + response_format={ + "type": "object", + "properties": {"title": {"type": "string"}}, + "required": ["title"], + "additionalProperties": False, + }, + validate_schema=False, + ) + ) + + assert response == {"title": 123} + assert len(call_messages) == 1 + assert len(call_messages[0]) == 1 + + +def test_generate_structured_retries_with_validation_feedback(): + client = _build_client() + call_messages = [] + responses = [ + {"title": 123}, + {"title": "Valid title"}, + ] + + async def fake_generate(**kwargs): + call_messages.append(kwargs["messages"]) + return responses[len(call_messages) - 1] + + client._generate_structured_once = AsyncMock(side_effect=fake_generate) + + with patch("services.llm_client.LOGGER.warning") as mock_warning: + response = asyncio.run( + client.generate_structured( + model="test-model", + messages=[LLMUserMessage(content="Generate JSON")], + response_format={ + "type": "object", + "properties": {"title": {"type": "string"}}, + "required": ["title"], + "additionalProperties": False, + }, + validate_schema=True, + ) + ) + + assert response == {"title": "Valid title"} + assert len(call_messages) == 2 + feedback_message = call_messages[1][-1] + assert isinstance(feedback_message, LLMUserMessage) + assert "Validation errors:" in feedback_message.content + assert "$.title" in feedback_message.content + assert '"title": 123' in feedback_message.content + mock_warning.assert_called_once() + assert "$.title" in mock_warning.call_args.args[3] + + +def test_generate_structured_returns_last_invalid_response_at_max_loop_count(): + client = _build_client() + call_messages = [] + responses = [ + {"title": 123}, + {"title": False}, + {"title": "should not be used"}, + ] + + async def fake_generate(**kwargs): + call_messages.append(kwargs["messages"]) + return responses[len(call_messages) - 1] + + client._generate_structured_once = AsyncMock(side_effect=fake_generate) + + response = asyncio.run( + client.generate_structured( + model="test-model", + messages=[LLMUserMessage(content="Generate JSON")], + response_format={ + "type": "object", + "properties": {"title": {"type": "string"}}, + "required": ["title"], + "additionalProperties": False, + }, + validate_schema=True, + validate_schema_max_loop_count=2, + ) + ) + + assert response == {"title": False} + assert len(call_messages) == 2 + + +def test_generate_structured_uses_strict_schema_for_validation(): + client = _build_client() + call_messages = [] + responses = [ + {"title": "Only title"}, + {"title": "Valid title", "subtitle": "Valid subtitle"}, + ] + + async def fake_generate(**kwargs): + call_messages.append(kwargs["messages"]) + return responses[len(call_messages) - 1] + + client._generate_structured_once = AsyncMock(side_effect=fake_generate) + + response = asyncio.run( + client.generate_structured( + model="test-model", + messages=[LLMUserMessage(content="Generate JSON")], + response_format={ + "type": "object", + "properties": { + "title": {"type": "string"}, + "subtitle": {"type": "string"}, + }, + }, + strict=True, + validate_schema=True, + ) + ) + + assert response == {"title": "Valid title", "subtitle": "Valid subtitle"} + assert len(call_messages) == 2 + feedback_message = call_messages[1][-1] + assert "required property" in feedback_message.content + assert "subtitle" in feedback_message.content + + +def test_generate_structured_preserves_no_content_retries(): + client = _build_client() + client._generate_structured_once = AsyncMock( + side_effect=[None, None, {"title": "Valid title"}] + ) + + response = asyncio.run( + client.generate_structured( + model="test-model", + messages=[LLMUserMessage(content="Generate JSON")], + response_format={ + "type": "object", + "properties": {"title": {"type": "string"}}, + "required": ["title"], + "additionalProperties": False, + }, + ) + ) + + assert response == {"title": "Valid title"} + assert client._generate_structured_once.await_count == 3 + + +def test_edit_slide_enables_schema_validation(): + mock_client = SimpleNamespace( + generate_structured=AsyncMock( + return_value={ + "title": "Edited title", + "__speaker_note__": "x" * 120, + } + ) + ) + + with patch("utils.llm_calls.edit_slide.LLMClient", return_value=mock_client), patch( + "utils.llm_calls.edit_slide.get_model", + return_value="test-model", + ): + response = asyncio.run( + get_edited_slide_content( + prompt="Update the title", + slide=_build_slide(), + language="English", + slide_layout=_build_layout().slides[0], + ) + ) + + assert response["title"] == "Edited title" + assert mock_client.generate_structured.await_args.kwargs["validate_schema"] is True + + +def test_generate_presentation_structure_enables_schema_validation(): + mock_client = SimpleNamespace( + generate_structured=AsyncMock(return_value={"slides": [0]}) + ) + mock_response_model = SimpleNamespace( + model_json_schema=lambda: { + "type": "object", + "properties": { + "slides": { + "type": "array", + "items": {"type": "integer"}, + } + }, + "required": ["slides"], + "additionalProperties": False, + } + ) + + with patch( + "utils.llm_calls.generate_presentation_structure.LLMClient", + return_value=mock_client, + ), patch( + "utils.llm_calls.generate_presentation_structure.get_model", + return_value="test-model", + ), patch( + "utils.llm_calls.generate_presentation_structure.get_presentation_structure_model_with_n_slides", + return_value=mock_response_model, + ): + response = asyncio.run( + generate_presentation_structure( + presentation_outline=PresentationOutlineModel( + slides=[SlideOutlineModel(content="Outline content")] + ), + presentation_layout=_build_layout(), + ) + ) + + assert response.slides == [0] + assert mock_client.generate_structured.await_args.kwargs["validate_schema"] is True + + +def test_generate_slide_content_enables_schema_validation(): + mock_client = SimpleNamespace( + generate_structured=AsyncMock( + return_value={ + "title": "Slide title", + "__speaker_note__": "x" * 120, + } + ) + ) + + with patch( + "utils.llm_calls.generate_slide_content.LLMClient", + return_value=mock_client, + ), patch( + "utils.llm_calls.generate_slide_content.get_model", + return_value="test-model", + ): + response = asyncio.run( + get_slide_content_from_type_and_outline( + slide_layout=_build_layout().slides[0], + outline=SlideOutlineModel(content="Slide outline"), + language="English", + ) + ) + + assert response["title"] == "Slide title" + assert mock_client.generate_structured.await_args.kwargs["validate_schema"] is True + + +def test_select_slide_type_on_edit_enables_schema_validation(): + mock_client = SimpleNamespace(generate_structured=AsyncMock(return_value={"index": 0})) + layout = _build_layout() + + with patch( + "utils.llm_calls.select_slide_type_on_edit.LLMClient", + return_value=mock_client, + ), patch( + "utils.llm_calls.select_slide_type_on_edit.get_model", + return_value="test-model", + ): + response = asyncio.run( + get_slide_layout_from_prompt( + prompt="Use the first layout", + layout=layout, + slide=_build_slide(), + ) + ) + + assert response.id == "layout-1" + assert mock_client.generate_structured.await_args.kwargs["validate_schema"] is True diff --git a/electron/servers/fastapi/utils/db_utils.py b/electron/servers/fastapi/utils/db_utils.py index 60b521fb..8976eb8b 100644 --- a/electron/servers/fastapi/utils/db_utils.py +++ b/electron/servers/fastapi/utils/db_utils.py @@ -4,6 +4,40 @@ from urllib.parse import urlsplit, urlunsplit, parse_qsl import ssl +def _int_env(name: str, default: int) -> int: + """Read an integer from an environment variable, falling back to *default*.""" + raw = os.getenv(name) + if raw is None: + return default + try: + return int(raw) + except ValueError: + return default + + +def get_pool_kwargs() -> dict: + """Build SQLAlchemy engine pool keyword arguments from environment variables. + + Supported variables (all optional): + DB_POOL_SIZE – max persistent connections (default 5) + DB_MAX_OVERFLOW – extra connections above pool_size (default 10) + DB_POOL_TIMEOUT – seconds to wait for a connection (default 30) + DB_POOL_RECYCLE – seconds before a connection is recycled (default 1800) + DB_POOL_PRE_PING – enable connection liveness check (default true) + + For SQLite the pool settings are not applicable and an empty dict is + returned, since SQLite uses ``StaticPool`` / ``NullPool`` by default. + """ + return { + "pool_size": _int_env("DB_POOL_SIZE", 5), + "max_overflow": _int_env("DB_MAX_OVERFLOW", 10), + "pool_timeout": _int_env("DB_POOL_TIMEOUT", 30), + "pool_recycle": _int_env("DB_POOL_RECYCLE", 1800), + "pool_pre_ping": os.getenv("DB_POOL_PRE_PING", "true").lower() + not in ("false", "0", "no"), + } + + def _ensure_sqlite_parent_dir(database_url: str) -> None: if not database_url.startswith("sqlite://"): return diff --git a/electron/servers/fastapi/utils/llm_calls/edit_slide.py b/electron/servers/fastapi/utils/llm_calls/edit_slide.py index 00d2f9b5..5066bbe4 100644 --- a/electron/servers/fastapi/utils/llm_calls/edit_slide.py +++ b/electron/servers/fastapi/utils/llm_calls/edit_slide.py @@ -108,7 +108,7 @@ async def get_edited_slide_content( "__speaker_note__": { "type": "string", "minLength": 100, - "maxLength": 250, + "maxLength": 500, "description": "Speaker note for the slide", } }, @@ -124,6 +124,7 @@ async def get_edited_slide_content( ), response_format=response_schema, strict=False, + validate_schema=True, ) return response diff --git a/electron/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py b/electron/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py index 13bf3a14..d9acb5b1 100644 --- a/electron/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py +++ b/electron/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py @@ -16,6 +16,7 @@ def get_system_prompt( instructions: Optional[str] = None, include_title_slide: bool = True, include_table_of_contents: bool = False, + web_search: bool = False, ): verbosity_instruction = ( "Slide content should be abound 20 words but detailed enough to generate a good slide." @@ -40,6 +41,27 @@ def get_system_prompt( ) toc_block = f"{toc_instruction}\n" if toc_instruction else "" + if web_search: + tools_hint = "Try to use available tools when they improve accuracy.\n" + web_block = ( + "Web search is enabled: use any \"## Web research (current sources)\" section in Context when present, " + "and call SearchWebTool when it is available for fresh facts.\n" + ) + else: + tools_hint = "" + web_block = "Do not use web search for this outline; rely on Content and Context only.\n" + + url_line = ( + "Only include URLs if they appear in Content, Context, or a \"## Web research (current sources)\" block.\n" + if web_search + else "Only include URLs if they appear in the provided content/context.\n" + ) + data_line = ( + "Ground slide data in Content and Context, and in \"## Web research (current sources)\" when that block is present.\n" + if web_search + else "Make sure data used is strictly from the provided content/context.\n" + ) + slide_outline_structure = ( "Each slide content:\n" " - Must have a ## title.\n" @@ -60,11 +82,13 @@ def get_system_prompt( "If 'auto-detect' is used, figure it out from the content/context.\n" f"{title_slide_instruction}\n" f"{toc_block}" + f"{tools_hint}" + f"{web_block}" f"{slide_outline_structure}\n" "Slide content must not contain any presentation branding/styling information.\n" "Title slide must only contain title, presenter name, date and overview.\n" - "Only include URLs if they appear in the provided content/context.\n" - "Make sure data used is strictly from the provided content/context.\n" + f"{url_line}" + f"{data_line}" "Make sure data is consistent across all slides." ) @@ -124,6 +148,7 @@ def get_messages( instructions: Optional[str] = None, include_title_slide: bool = True, include_table_of_contents: bool = False, + web_search: bool = False, ): return [ LLMSystemMessage( @@ -133,6 +158,7 @@ def get_messages( instructions, include_title_slide, include_table_of_contents, + web_search=web_search, ), ), LLMUserMessage( @@ -170,6 +196,21 @@ async def generate_ppt_outline( ) client = LLMClient() + web_search_enabled = client.web_search_enabled_for_request(web_search) + + merged_context = additional_context + if client.outline_uses_prefetched_web_facts(web_search): + facts = await client.prefetch_outline_web_facts(content, additional_context) + if facts: + merged_context = ( + f"{(additional_context or '').strip()}\n\n## Web research (current sources)\n{facts}" + if (additional_context or "").strip() + else f"## Web research (current sources)\n{facts}" + ) + + use_search_tool = web_search_enabled and not client.outline_uses_prefetched_web_facts( + web_search + ) try: async for chunk in client.stream_structured( @@ -178,20 +219,17 @@ async def generate_ppt_outline( content, n_slides, language, - additional_context, + merged_context, tone, verbosity, instructions, include_title_slide, include_table_of_contents, + web_search=web_search_enabled, ), response_model.model_json_schema(), strict=True, - tools=( - [SearchWebTool] - if (client.enable_web_grounding() and web_search) - else None - ), + tools=([SearchWebTool] if use_search_tool else None), ): yield chunk except Exception as e: diff --git a/electron/servers/fastapi/utils/llm_calls/generate_presentation_structure.py b/electron/servers/fastapi/utils/llm_calls/generate_presentation_structure.py index 65c623e2..c6db3af3 100644 --- a/electron/servers/fastapi/utils/llm_calls/generate_presentation_structure.py +++ b/electron/servers/fastapi/utils/llm_calls/generate_presentation_structure.py @@ -167,6 +167,7 @@ async def generate_presentation_structure( ), response_format=response_model.model_json_schema(), strict=True, + validate_schema=True, ) return PresentationStructureModel(**response) except Exception as e: diff --git a/electron/servers/fastapi/utils/llm_calls/generate_slide_content.py b/electron/servers/fastapi/utils/llm_calls/generate_slide_content.py index 773c54dc..7e462109 100644 --- a/electron/servers/fastapi/utils/llm_calls/generate_slide_content.py +++ b/electron/servers/fastapi/utils/llm_calls/generate_slide_content.py @@ -24,7 +24,7 @@ You need to generate structured content json based on the schema. # General Rules - Make sure to follow language guidelines. - Speaker note should be normal text, not markdown. -- Never ever go over the max character limit. +- Never ever go over the max character limit but don't clip the sentence to satisfy character limit instead rephrase it. - Do not add emoji in the content. - Don't provide $schema field in content json. {markdown_emphasis_rules} @@ -167,7 +167,7 @@ async def get_slide_content_from_type_and_outline( "__speaker_note__": { "type": "string", "minLength": 100, - "maxLength": 250, + "maxLength": 500, "description": "Speaker note for the slide", } }, @@ -187,6 +187,7 @@ async def get_slide_content_from_type_and_outline( ), response_format=response_schema, strict=False, + validate_schema=True, ) return response diff --git a/electron/servers/fastapi/utils/llm_calls/select_slide_type_on_edit.py b/electron/servers/fastapi/utils/llm_calls/select_slide_type_on_edit.py index 23bbc2f9..7c4c329e 100644 --- a/electron/servers/fastapi/utils/llm_calls/select_slide_type_on_edit.py +++ b/electron/servers/fastapi/utils/llm_calls/select_slide_type_on_edit.py @@ -58,6 +58,7 @@ async def get_slide_layout_from_prompt( ), response_format=SlideLayoutIndex.model_json_schema(), strict=True, + validate_schema=True, ) index = SlideLayoutIndex(**response).index return layout.slides[index] diff --git a/electron/servers/fastapi/utils/schema_utils.py b/electron/servers/fastapi/utils/schema_utils.py index 1e0241aa..0e5f886a 100644 --- a/electron/servers/fastapi/utils/schema_utils.py +++ b/electron/servers/fastapi/utils/schema_utils.py @@ -1,6 +1,7 @@ from copy import deepcopy from typing import Any, List +from jsonschema.validators import validator_for from openai import NOT_GIVEN from utils.dict_utils import ( @@ -323,6 +324,53 @@ def ensure_array_schemas_have_items(schema: dict) -> dict[str, Any]: return _ensure(result) +def prepare_schema_for_validation( + schema: dict, + strict: bool = False, +) -> dict[str, Any]: + prepared_schema = deepcopy(schema) + if strict: + prepared_schema = ensure_strict_json_schema( + prepared_schema, + path=(), + root=prepared_schema, + ) + return ensure_array_schemas_have_items(prepared_schema) + + +def format_json_path(path: List[Any]) -> str: + if not path: + return "$" + + formatted = "$" + for part in path: + if isinstance(part, int): + formatted += f"[{part}]" + else: + formatted += f".{part}" + return formatted + + +def get_schema_validation_errors( + schema: dict, + instance: Any, + strict: bool = False, +) -> List[str]: + prepared_schema = prepare_schema_for_validation(schema, strict=strict) + validator_cls = validator_for(prepared_schema) + validator_cls.check_schema(prepared_schema) + validator = validator_cls(prepared_schema) + + errors = sorted( + validator.iter_errors(instance), + key=lambda error: (format_json_path(list(error.path)), error.message), + ) + + return [ + f"{format_json_path(list(error.path))}: {error.message}" for error in errors + ] + + def remove_titles_from_schema(schema: dict) -> dict[str, Any]: def _strip_titles(node: Any) -> Any: diff --git a/electron/servers/fastapi/uv.lock b/electron/servers/fastapi/uv.lock index deb6f807..c59f127d 100644 --- a/electron/servers/fastapi/uv.lock +++ b/electron/servers/fastapi/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = "==3.11.*" resolution-markers = [ "platform_machine == 'aarch64' and sys_platform == 'linux'", @@ -624,6 +624,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/2e/d4fcb2978f826358b673f779f78fa8a32ee37df11920dc2bb5589cbeecef/greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822", size = 270219, upload-time = "2025-06-05T16:10:10.414Z" }, { url = "https://files.pythonhosted.org/packages/16/24/929f853e0202130e4fe163bc1d05a671ce8dcd604f790e14896adac43a52/greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83", size = 630383, upload-time = "2025-06-05T16:38:51.785Z" }, { url = "https://files.pythonhosted.org/packages/d1/b2/0320715eb61ae70c25ceca2f1d5ae620477d246692d9cc284c13242ec31c/greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf", size = 642422, upload-time = "2025-06-05T16:41:35.259Z" }, + { url = "https://files.pythonhosted.org/packages/bd/49/445fd1a210f4747fedf77615d941444349c6a3a4a1135bba9701337cd966/greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b", size = 638375, upload-time = "2025-06-05T16:48:18.235Z" }, { url = "https://files.pythonhosted.org/packages/7e/c8/ca19760cf6eae75fa8dc32b487e963d863b3ee04a7637da77b616703bc37/greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147", size = 637627, upload-time = "2025-06-05T16:13:02.858Z" }, { url = "https://files.pythonhosted.org/packages/65/89/77acf9e3da38e9bcfca881e43b02ed467c1dedc387021fc4d9bd9928afb8/greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5", size = 585502, upload-time = "2025-06-05T16:12:49.642Z" }, { url = "https://files.pythonhosted.org/packages/97/c6/ae244d7c95b23b7130136e07a9cc5aadd60d59b5951180dc7dc7e8edaba7/greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc", size = 1114498, upload-time = "2025-06-05T16:36:46.598Z" }, @@ -1302,6 +1303,7 @@ dependencies = [ { name = "fastmcp" }, { name = "google-genai" }, { name = "greenlet", marker = "sys_platform == 'darwin'" }, + { name = "jsonschema" }, { name = "nltk" }, { name = "openai" }, { name = "pathvalidate" }, @@ -1329,6 +1331,7 @@ requires-dist = [ { name = "fastmcp", specifier = ">=2.11.0" }, { name = "google-genai", specifier = ">=1.28.0" }, { name = "greenlet", marker = "sys_platform == 'darwin'", specifier = ">=3.0.0" }, + { name = "jsonschema", specifier = ">=4.25.0" }, { name = "nltk", specifier = ">=3.9.1" }, { name = "openai", specifier = ">=1.98.0" }, { name = "pathvalidate", specifier = ">=3.3.1" }, diff --git a/electron/version.json b/electron/version.json index aedbe8c9..ffadbbe7 100644 --- a/electron/version.json +++ b/electron/version.json @@ -1,9 +1,9 @@ { - "version": "0.7.2-beta", - "message": "What's New\n\n:repeat: Smarter Streaming & Retries\n- Outline and slide generation now retries automatically on failure - fewer interrupted generations, smoother experience end to end\n\n:frame_photo: ComfyUI Fix\n- Image generation via ComfyUI is back to working correctly - mid-generation failures resolved\n\n:art: UI & Template Polish\n- Continued refinements to UI components - tighter layouts, cleaner interactions\n- Template improvements - more consistent rendering across providers\n\n:bar_chart: Better Analytics & Error Tracking\n- Sentry integrated for crash and error monitoring - helps us catch and fix issues faster\n\n:wrench: Fixes\n- Download URL and version message corrected in version.json\n- Various stability and content fixes across the board", + "version": "0.7.3-beta", + "message": "Presenton Desktop electron-v0.7.3-beta\n\nSmarter content generation, reliable web search, no more shady Windows popups, and a round of minor fixes under the hood. Clean update. 🙌\n\nWhat's New\n\n🧠 Smarter Slide Content Generation\n• Overflow mitigation loop added — slides no longer clip or overflow when content runs long\n• Improved system prompt for slide content generation — cleaner, better-fitting output every time\n\n🔍 Web Search Fixed\n• Web search is back to working reliably during presentation generation\n\n🪟 Windows Fix\n• Export tasks no longer flash a console window on Windows — cleaner, more polished experience\n\n🔧 Minor Fixes\n• Various small fixes and stability improvements across the app\n\n---\nView full diff: electron-v0.7.2-beta → electron-v0.7.3-beta\nhttps://github.com/presenton/presenton/compare/electron-v0.7.2-beta...electron-v0.7.3-beta\n\nInstallation\nDownload Link: https://presenton.ai/download\nLove the app? Star us on GitHub → github.com/presenton/presenton", "downloads": { - "linux": "https://github.com/presenton/presenton/releases/download/electron-v0.7.2-beta/Presenton-0.7.2-beta.deb", - "mac": "https://github.com/presenton/presenton/releases/download/electron-v0.7.2-beta/Presenton-0.7.2-beta.dmg", - "windows": "https://github.com/presenton/presenton/releases/download/electron-v0.7.2-beta/Presenton-0.7.2-beta.exe" + "linux": "https://github.com/presenton/presenton/releases/download/electron-v0.7.3-beta/Presenton-0.7.3-beta.deb", + "mac": "https://github.com/presenton/presenton/releases/download/electron-v0.7.3-beta/Presenton-0.7.3-beta.dmg", + "windows": "https://github.com/presenton/presenton/releases/download/electron-v0.7.3-beta/Presenton-0.7.3-beta.exe" } -} \ No newline at end of file +} diff --git a/servers/fastapi/api/lifespan.py b/servers/fastapi/api/lifespan.py index 6fe4e6c4..1ce3e26f 100644 --- a/servers/fastapi/api/lifespan.py +++ b/servers/fastapi/api/lifespan.py @@ -4,7 +4,7 @@ import os from fastapi import FastAPI from migrations import migrate_database_on_startup -from services.database import create_db_and_tables +from services.database import create_db_and_tables, dispose_engines from utils.get_env import get_app_data_directory_env from utils.model_availability import ( check_llm_and_image_provider_api_or_model_availability, @@ -24,3 +24,5 @@ async def app_lifespan(_: FastAPI): await create_db_and_tables() await check_llm_and_image_provider_api_or_model_availability() yield + # Shutdown: release all database connections to prevent stale/leaked pools. + await dispose_engines() diff --git a/servers/fastapi/enums/image_provider.py b/servers/fastapi/enums/image_provider.py index 9d773ad5..76312b73 100644 --- a/servers/fastapi/enums/image_provider.py +++ b/servers/fastapi/enums/image_provider.py @@ -9,3 +9,4 @@ class ImageProvider(Enum): DALLE3 = "dall-e-3" GPT_IMAGE_1_5 = "gpt-image-1.5" COMFYUI = "comfyui" + OPEN_WEBUI = "open_webui" diff --git a/servers/fastapi/models/user_config.py b/servers/fastapi/models/user_config.py index 05b050d7..111c585d 100644 --- a/servers/fastapi/models/user_config.py +++ b/servers/fastapi/models/user_config.py @@ -36,6 +36,10 @@ class UserConfig(BaseModel): COMFYUI_URL: Optional[str] = None COMFYUI_WORKFLOW: Optional[str] = None + # Open WebUI Image Provider + OPEN_WEBUI_IMAGE_URL: Optional[str] = None + OPEN_WEBUI_IMAGE_API_KEY: Optional[str] = None + # Dalle 3 Quality DALL_E_3_QUALITY: Optional[str] = None # Gpt Image 1.5 Quality diff --git a/servers/fastapi/services/database.py b/servers/fastapi/services/database.py index dcc03b01..6bd6aaf1 100644 --- a/servers/fastapi/services/database.py +++ b/servers/fastapi/services/database.py @@ -20,14 +20,20 @@ from models.sql.template import TemplateModel from models.sql.template_create_info import TemplateCreateInfoModel from models.sql.slide import SlideModel from models.sql.webhook_subscription import WebhookSubscription -from utils.db_utils import get_database_url_and_connect_args from utils.get_env import get_app_data_directory_env from utils.get_env import get_migrate_database_on_startup_env +from utils.db_utils import get_database_url_and_connect_args, get_pool_kwargs database_url, connect_args = get_database_url_and_connect_args() -sql_engine: AsyncEngine = create_async_engine(database_url, connect_args=connect_args) +# Apply connection-pool settings for server-class databases (PostgreSQL, MySQL). +# SQLite uses a file-lock model and ignores pool configuration, so we skip it. +_pool_kwargs = get_pool_kwargs() if "sqlite" not in database_url else {} + +sql_engine: AsyncEngine = create_async_engine( + database_url, connect_args=connect_args, **_pool_kwargs +) async_session_maker = async_sessionmaker(sql_engine, expire_on_commit=False) @@ -81,3 +87,14 @@ async def create_db_and_tables(): tables=[OllamaPullStatus.__table__], ) ) + + +async def dispose_engines(): + """Dispose all engine connection pools. + + Call this during application shutdown (e.g. in a FastAPI ``shutdown`` + event or lifespan context) to release every connection back to the + database and prevent stale / leaked connections. + """ + await sql_engine.dispose() + await container_db_engine.dispose() diff --git a/servers/fastapi/services/image_generation_service.py b/servers/fastapi/services/image_generation_service.py index 8d0ea9cb..c7f06bd9 100644 --- a/servers/fastapi/services/image_generation_service.py +++ b/servers/fastapi/services/image_generation_service.py @@ -13,6 +13,8 @@ from utils.get_env import ( get_dall_e_3_quality_env, get_gpt_image_1_5_quality_env, get_pexels_api_key_env, + get_open_webui_image_url_env, + get_open_webui_image_api_key_env, ) from utils.get_env import get_pixabay_api_key_env from utils.get_env import get_comfyui_url_env @@ -26,6 +28,7 @@ from utils.image_provider import ( is_nanobanana_pro_selected, is_dalle3_selected, is_comfyui_selected, + is_open_webui_selected, ) import uuid @@ -54,6 +57,8 @@ class ImageGenerationService: return self.generate_image_openai_gpt_image_1_5 elif is_comfyui_selected(): return self.generate_image_comfyui + elif is_open_webui_selected(): + return self.generate_image_open_webui return None def is_stock_provider_selected(self): @@ -146,6 +151,88 @@ class ImageGenerationService: get_gpt_image_1_5_quality_env() or "medium", ) + async def generate_image_open_webui( + self, prompt: str, output_directory: str + ) -> str: + base_url = get_open_webui_image_url_env() + if not base_url: + raise ValueError("OPEN_WEBUI_IMAGE_URL environment variable is not set") + + base_url = base_url.rstrip("/") + api_key = get_open_webui_image_api_key_env() or "" + + from urllib.parse import urlparse + + parsed = urlparse(base_url) + origin = f"{parsed.scheme}://{parsed.netloc}" + + headers = {"Content-Type": "application/json"} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + + payload = { + "prompt": prompt, + "n": 1, + "size": "1024x1024", + } + + async with aiohttp.ClientSession(trust_env=True) as session: + resp = await session.post( + f"{base_url}/images/generations", + json=payload, + headers=headers, + timeout=aiohttp.ClientTimeout(total=300), + ) + + if resp.status != 200: + error_text = await resp.text() + raise Exception( + f"Open WebUI image generation returned {resp.status}: {error_text}" + ) + + body = await resp.json() + + # Open WebUI returns a bare [...] array instead of {"data": [...]}. + if isinstance(body, list): + items = body + elif isinstance(body, dict) and "data" in body: + items = body["data"] + else: + raise Exception(f"Unexpected response format: {type(body)}") + + if not items: + raise Exception("Open WebUI returned empty results") + + item = items[0] + image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png") + + if item.get("b64_json"): + with open(image_path, "wb") as f: + f.write(base64.b64decode(item["b64_json"])) + elif item.get("url"): + image_url = item["url"] + # Open WebUI returns relative URLs like /api/v1/files/.../content + if image_url.startswith("/"): + image_url = origin + image_url + dl_headers = {} + if api_key: + dl_headers["Authorization"] = f"Bearer {api_key}" + dl_resp = await session.get( + image_url, + headers=dl_headers, + timeout=aiohttp.ClientTimeout(total=120), + ) + if dl_resp.status != 200: + raise Exception( + f"Failed to download image: {dl_resp.status}" + ) + with open(image_path, "wb") as f: + f.write(await dl_resp.read()) + else: + raise Exception("Open WebUI returned no image data") + + return image_path + async def _generate_image_google( self, prompt: str, output_directory: str, model: str ) -> str: diff --git a/servers/fastapi/services/llm_client.py b/servers/fastapi/services/llm_client.py index 11f29dd5..a62e8098 100644 --- a/servers/fastapi/services/llm_client.py +++ b/servers/fastapi/services/llm_client.py @@ -91,6 +91,7 @@ class LLMClient: if ( self.llm_provider == LLMProvider.OLLAMA or self.llm_provider == LLMProvider.CUSTOM + or self.llm_provider == LLMProvider.CODEX ): return False return parse_bool_or_none(get_web_grounding_env()) or False diff --git a/servers/fastapi/utils/db_utils.py b/servers/fastapi/utils/db_utils.py index 60b521fb..a60009f9 100644 --- a/servers/fastapi/utils/db_utils.py +++ b/servers/fastapi/utils/db_utils.py @@ -20,6 +20,38 @@ def _ensure_sqlite_parent_dir(database_url: str) -> None: parent = os.path.dirname(db_path) if parent: os.makedirs(parent, exist_ok=True) +def _int_env(name: str, default: int) -> int: + """Read an integer from an environment variable, falling back to *default*.""" + raw = os.getenv(name) + if raw is None: + return default + try: + return int(raw) + except ValueError: + return default + + +def get_pool_kwargs() -> dict: + """Build SQLAlchemy engine pool keyword arguments from environment variables. + + Supported variables (all optional): + DB_POOL_SIZE – max persistent connections (default 5) + DB_MAX_OVERFLOW – extra connections above pool_size (default 10) + DB_POOL_TIMEOUT – seconds to wait for a connection (default 30) + DB_POOL_RECYCLE – seconds before a connection is recycled (default 1800) + DB_POOL_PRE_PING – enable connection liveness check (default true) + + For SQLite the pool settings are not applicable and an empty dict is + returned, since SQLite uses ``StaticPool`` / ``NullPool`` by default. + """ + return { + "pool_size": _int_env("DB_POOL_SIZE", 5), + "max_overflow": _int_env("DB_MAX_OVERFLOW", 10), + "pool_timeout": _int_env("DB_POOL_TIMEOUT", 30), + "pool_recycle": _int_env("DB_POOL_RECYCLE", 1800), + "pool_pre_ping": os.getenv("DB_POOL_PRE_PING", "true").lower() + not in ("false", "0", "no"), + } def get_database_url_and_connect_args() -> tuple[str, dict]: diff --git a/servers/fastapi/utils/get_env.py b/servers/fastapi/utils/get_env.py index 84d9b749..5a940f78 100644 --- a/servers/fastapi/utils/get_env.py +++ b/servers/fastapi/utils/get_env.py @@ -166,3 +166,10 @@ def get_sentry_traces_sample_rate_env(): def get_sentry_send_default_pii_env(): return os.getenv("SENTRY_SEND_DEFAULT_PII") +# Open WebUI Image Provider +def get_open_webui_image_url_env(): + return os.getenv("OPEN_WEBUI_IMAGE_URL") + + +def get_open_webui_image_api_key_env(): + return os.getenv("OPEN_WEBUI_IMAGE_API_KEY") diff --git a/servers/fastapi/utils/image_provider.py b/servers/fastapi/utils/image_provider.py index 15469709..cb3525e0 100644 --- a/servers/fastapi/utils/image_provider.py +++ b/servers/fastapi/utils/image_provider.py @@ -38,6 +38,10 @@ def is_comfyui_selected() -> bool: return ImageProvider.COMFYUI == get_selected_image_provider() +def is_open_webui_selected() -> bool: + return ImageProvider.OPEN_WEBUI == get_selected_image_provider() + + def get_selected_image_provider() -> ImageProvider | None: """ Get the selected image provider from environment variables. diff --git a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py index 13bf3a14..8ae47ae7 100644 --- a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py +++ b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py @@ -1,6 +1,7 @@ from datetime import datetime from typing import Optional +from enums.llm_provider import LLMProvider from models.llm_message import LLMSystemMessage, LLMUserMessage from models.presentation_outline_model import PresentationOutlineModel from models.llm_tools import SearchWebTool @@ -170,6 +171,16 @@ async def generate_ppt_outline( ) client = LLMClient() + providers_with_search_tool = { + LLMProvider.OPENAI, + LLMProvider.ANTHROPIC, + LLMProvider.GOOGLE, + } + use_search_tool = ( + web_search + and client.enable_web_grounding() + and client.llm_provider in providers_with_search_tool + ) try: async for chunk in client.stream_structured( @@ -187,11 +198,7 @@ async def generate_ppt_outline( ), response_model.model_json_schema(), strict=True, - tools=( - [SearchWebTool] - if (client.enable_web_grounding() and web_search) - else None - ), + tools=([SearchWebTool] if use_search_tool else None), ): yield chunk except Exception as e: diff --git a/servers/fastapi/utils/set_env.py b/servers/fastapi/utils/set_env.py index f626f4df..1a367735 100644 --- a/servers/fastapi/utils/set_env.py +++ b/servers/fastapi/utils/set_env.py @@ -136,3 +136,12 @@ def set_codex_is_pro_env(value: str): def set_codex_model_env(value: str): os.environ["CODEX_MODEL"] = value + + +# Open WebUI Image Provider +def set_open_webui_image_url_env(value: str): + os.environ["OPEN_WEBUI_IMAGE_URL"] = value + + +def set_open_webui_image_api_key_env(value: str): + os.environ["OPEN_WEBUI_IMAGE_API_KEY"] = value diff --git a/servers/fastapi/utils/user_config.py b/servers/fastapi/utils/user_config.py index ab1d91da..b7bfaab1 100644 --- a/servers/fastapi/utils/user_config.py +++ b/servers/fastapi/utils/user_config.py @@ -36,6 +36,8 @@ from utils.get_env import ( get_codex_email_env, get_codex_is_pro_env, get_codex_model_env, + get_open_webui_image_url_env, + get_open_webui_image_api_key_env, ) from utils.parsers import parse_bool_or_none from utils.set_env import ( @@ -71,6 +73,8 @@ from utils.set_env import ( set_codex_email_env, set_codex_is_pro_env, set_codex_model_env, + set_open_webui_image_url_env, + set_open_webui_image_api_key_env, ) @@ -146,6 +150,8 @@ def get_user_config(): if existing_config.CODEX_IS_PRO is not None else parse_bool_or_none(get_codex_is_pro_env()) ), + OPEN_WEBUI_IMAGE_URL=existing_config.OPEN_WEBUI_IMAGE_URL or get_open_webui_image_url_env(), + OPEN_WEBUI_IMAGE_API_KEY=existing_config.OPEN_WEBUI_IMAGE_API_KEY or get_open_webui_image_api_key_env(), ) @@ -215,6 +221,10 @@ def update_env_with_user_config(): set_codex_email_env(user_config.CODEX_EMAIL) if user_config.CODEX_IS_PRO is not None: set_codex_is_pro_env(str(user_config.CODEX_IS_PRO)) + if user_config.OPEN_WEBUI_IMAGE_URL: + set_open_webui_image_url_env(user_config.OPEN_WEBUI_IMAGE_URL) + if user_config.OPEN_WEBUI_IMAGE_API_KEY: + set_open_webui_image_api_key_env(user_config.OPEN_WEBUI_IMAGE_API_KEY) def save_codex_tokens_to_user_config() -> None: diff --git a/servers/nextjs/app/(presentation-generator)/(dashboard)/settings/ImageProvider.tsx b/servers/nextjs/app/(presentation-generator)/(dashboard)/settings/ImageProvider.tsx index 1ad7983a..4f0f521e 100644 --- a/servers/nextjs/app/(presentation-generator)/(dashboard)/settings/ImageProvider.tsx +++ b/servers/nextjs/app/(presentation-generator)/(dashboard)/settings/ImageProvider.tsx @@ -262,6 +262,33 @@ const ImageProvider = ({ llmConfig, setLlmConfig }: { llmConfig: LLMConfig, setL ); } + // Show Open WebUI configuration + if (provider.value === "open_webui") { + return ( +
+
+ +
+ { + input_field_changed( + e.target.value, + "OPEN_WEBUI_IMAGE_URL" + ); + }} + /> +
+
+
+ ); + } + // Show API key input for other providers return (
@@ -300,6 +327,31 @@ const ImageProvider = ({ llmConfig, setLlmConfig }: { llmConfig: LLMConfig, setL {!isImageGenerationDisabled &&
{renderQualitySelector(llmConfig, input_field_changed)} + {llmConfig.IMAGE_PROVIDER === "open_webui" && ( +
+ +
+ { + input_field_changed(e.target.value, "OPEN_WEBUI_IMAGE_API_KEY"); + }} + /> + +
+
+ )} {llmConfig.IMAGE_PROVIDER === "comfyui" &&