Merge pull request #532 from presenton/refactor/use-llmai

refactor: use llmai as llm client, removes old llm client and tool call handler
This commit is contained in:
Saurav Niraula 2026-04-23 13:46:11 +05:45 committed by GitHub
commit 6765897913
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 701 additions and 3370 deletions

View file

@ -214,7 +214,6 @@ Other optional variables exist in code (for example advanced Mem0 paths, LitePar
- **CUSTOM_LLM_URL**: OpenAI-compatible base URL if **LLM** is **custom**.
- **CUSTOM_LLM_API_KEY**: API key if **LLM** is **custom**.
- **CUSTOM_MODEL**: Model id if **LLM** is **custom**.
- **TOOL_CALLS**=[true/false]: If **true**, the custom LLM uses tool calls instead of JSON schema for structured output.
- **DISABLE_THINKING**=[true/false]: If **true**, disables “thinking” on the custom LLM.
- **WEB_GROUNDING**=[true/false]: If **true**, enables web search for OpenAI, Google, and Anthropic models.
- **EXTENDED_REASONING**=[true/false]: Enables extended reasoning where supported by the configured stack.

View file

@ -35,7 +35,6 @@ services:
- DALL_E_3_QUALITY=${DALL_E_3_QUALITY}
- GPT_IMAGE_1_5_QUALITY=${GPT_IMAGE_1_5_QUALITY}
- EXTENDED_REASONING=${EXTENDED_REASONING}
- TOOL_CALLS=${TOOL_CALLS}
- DISABLE_THINKING=${DISABLE_THINKING}
- WEB_GROUNDING=${WEB_GROUNDING}
- DATABASE_URL=${DATABASE_URL}
@ -99,7 +98,6 @@ services:
- DALL_E_3_QUALITY=${DALL_E_3_QUALITY}
- GPT_IMAGE_1_5_QUALITY=${GPT_IMAGE_1_5_QUALITY}
- EXTENDED_REASONING=${EXTENDED_REASONING}
- TOOL_CALLS=${TOOL_CALLS}
- DISABLE_THINKING=${DISABLE_THINKING}
- WEB_GROUNDING=${WEB_GROUNDING}
- DATABASE_URL=${DATABASE_URL}
@ -158,7 +156,6 @@ services:
- DALL_E_3_QUALITY=${DALL_E_3_QUALITY}
- GPT_IMAGE_1_5_QUALITY=${GPT_IMAGE_1_5_QUALITY}
- EXTENDED_REASONING=${EXTENDED_REASONING}
- TOOL_CALLS=${TOOL_CALLS}
- DISABLE_THINKING=${DISABLE_THINKING}
- WEB_GROUNDING=${WEB_GROUNDING}
- DATABASE_URL=${DATABASE_URL}
@ -223,7 +220,6 @@ services:
- DALL_E_3_QUALITY=${DALL_E_3_QUALITY}
- GPT_IMAGE_1_5_QUALITY=${GPT_IMAGE_1_5_QUALITY}
- EXTENDED_REASONING=${EXTENDED_REASONING}
- TOOL_CALLS=${TOOL_CALLS}
- DISABLE_THINKING=${DISABLE_THINKING}
- WEB_GROUNDING=${WEB_GROUNDING}
- DATABASE_URL=${DATABASE_URL}

View file

@ -21,6 +21,7 @@ from services.documents_loader import DocumentsLoader
from services.mem0_presentation_memory_service import (
MEM0_PRESENTATION_MEMORY_SERVICE,
)
from utils.llm_utils import message_content_to_text
from utils.outline_utils import (
get_no_of_outlines_to_generate_for_n_slides,
get_presentation_title_from_presentation_outline,
@ -85,12 +86,12 @@ async def stream_outlines(
await MEM0_PRESENTATION_MEMORY_SERVICE.store_generation_context(
presentation_id=presentation.id,
system_prompt=(
outline_messages[0].content
message_content_to_text(outline_messages[0].content)
if len(outline_messages) > 0
else None
),
user_prompt=(
outline_messages[1].content
message_content_to_text(outline_messages[1].content)
if len(outline_messages) > 1
else None
),

View file

@ -75,6 +75,7 @@ from utils.process_slides import (
process_slide_and_fetch_assets,
)
from utils.get_layout_by_name import get_layout_by_name
from utils.llm_utils import message_content_to_text
from models.presentation_layout import PresentationLayoutModel
import uuid
@ -666,12 +667,12 @@ async def generate_presentation_handler(
await MEM0_PRESENTATION_MEMORY_SERVICE.store_generation_context(
presentation_id=presentation_id,
system_prompt=(
outline_messages[0].content
message_content_to_text(outline_messages[0].content)
if len(outline_messages) > 0
else None
),
user_prompt=(
outline_messages[1].content
message_content_to_text(outline_messages[1].content)
if len(outline_messages) > 1
else None
),

View file

@ -1,59 +0,0 @@
from typing import Any, List, Literal, Optional
from pydantic import BaseModel
from google.genai.types import Content as GoogleContent
from models.llm_tool_call import AnthropicToolCall
class LLMMessage(BaseModel):
pass
class LLMUserMessage(LLMMessage):
role: Literal["user"] = "user"
content: str
class LLMSystemMessage(LLMMessage):
role: Literal["system"] = "system"
content: str
class OpenAIAssistantMessage(LLMMessage):
role: Literal["assistant"] = "assistant"
content: str | None = None
tool_calls: Optional[List[dict]] = None
class GoogleAssistantMessage(LLMMessage):
role: Literal["assistant"] = "assistant"
content: GoogleContent
class AnthropicAssistantMessage(LLMMessage):
role: Literal["assistant"] = "assistant"
content: List[AnthropicToolCall]
class AnthropicToolCallMessage(LLMMessage):
type: Literal["tool_result"] = "tool_result"
tool_use_id: str
content: str
class AnthropicUserMessage(LLMMessage):
role: Literal["user"] = "user"
content: List[AnthropicToolCallMessage]
class OpenAIToolCallMessage(LLMMessage):
role: Literal["tool"] = "tool"
content: str
tool_call_id: str
class GoogleToolCallMessage(LLMMessage):
role: Literal["tool"] = "tool"
id: Optional[str] = None
name: str
response: dict

View file

@ -1,30 +0,0 @@
from typing import Literal, Optional
from pydantic import BaseModel
class LLMToolCall(BaseModel):
pass
class OpenAIToolCallFunction(BaseModel):
name: str
arguments: str
class OpenAIToolCall(LLMToolCall):
id: str
type: Literal["function"] = "function"
function: OpenAIToolCallFunction
class GoogleToolCall(LLMToolCall):
id: Optional[str] = None
name: str
arguments: Optional[dict] = None
class AnthropicToolCall(LLMToolCall):
type: Literal["tool_use"] = "tool_use"
id: str
name: str
input: object

View file

@ -1,29 +0,0 @@
from typing import Any, Callable, Coroutine, Optional
from pydantic import BaseModel, Field
class LLMTool(BaseModel):
pass
class LLMDynamicTool(LLMTool):
name: str
description: str
parameters: dict = {}
handler: Callable[..., Coroutine[Any, Any, str]]
class SearchWebTool(LLMTool):
"""
Search the web for information.
"""
query: str = Field(description="The query to search the web for")
class GetCurrentDatetimeTool(LLMTool):
"""
Get the current datetime.
"""
pass

View file

@ -46,7 +46,6 @@ class UserConfig(BaseModel):
GPT_IMAGE_1_5_QUALITY: Optional[str] = None
# Reasoning
TOOL_CALLS: Optional[bool] = None
DISABLE_THINKING: Optional[bool] = None
EXTENDED_REASONING: Optional[bool] = None

View file

@ -7,19 +7,17 @@ Requires-Dist: alembic>=1.14.0
Requires-Dist: aiohttp>=3.12.15
Requires-Dist: aiomysql>=0.2.0
Requires-Dist: aiosqlite>=0.21.0
Requires-Dist: anthropic>=0.60.0
Requires-Dist: asyncpg>=0.30.0
Requires-Dist: chromadb>=1.0.15
Requires-Dist: dirtyjson>=1.0.8
Requires-Dist: fastapi[standard]>=0.116.1
Requires-Dist: fastembed-vectorstore>=0.5.2
Requires-Dist: fastmcp>=2.11.0
Requires-Dist: google-genai>=1.28.0
Requires-Dist: mem0ai[nlp]>=0.1.115
Requires-Dist: nltk>=3.9.1
Requires-Dist: openai>=1.98.0
Requires-Dist: pathvalidate>=3.3.1
Requires-Dist: pdfplumber>=0.11.7
Requires-Dist: pytest>=8.4.1
Requires-Dist: python-pptx>=1.0.2
Requires-Dist: redis>=6.2.0
Requires-Dist: sqlmodel>=0.0.24
Requires-Dist: llmai==0.1.9

View file

@ -3,6 +3,7 @@ api/__init__.py
api/lifespan.py
api/main.py
api/middlewares.py
api/v1/auth/router.py
api/v1/mock/router.py
api/v1/ppt/background_tasks.py
api/v1/ppt/router.py
@ -46,9 +47,6 @@ models/document_chunk.py
models/generate_presentation_request.py
models/image_prompt.py
models/json_path_guide.py
models/llm_message.py
models/llm_tool_call.py
models/llm_tools.py
models/ollama_model_metadata.py
models/ollama_model_status.py
models/pptx_models.py
@ -78,7 +76,6 @@ presenton_backend.egg-info/dependency_links.txt
presenton_backend.egg-info/requires.txt
presenton_backend.egg-info/top_level.txt
services/__init__.py
services/codex_llm.py
services/concurrent_service.py
services/database.py
services/document_conversion_service.py
@ -88,8 +85,7 @@ services/html_to_text_runs_service.py
services/icon_finder_service.py
services/image_generation_service.py
services/liteparse_service.py
services/llm_client.py
services/llm_tool_calls_handler.py
services/mem0_presentation_memory_service.py
services/pptx_presentation_creator.py
services/score_based_chunker.py
services/temp_file_service.py
@ -106,7 +102,9 @@ templates/providers.py
templates/router.py
tests/test_gemini_schema_support.py
tests/test_image_generation.py
tests/test_liteparse_service.py
tests/test_mcp_server.py
tests/test_mem0_presentation_memory_service.py
tests/test_openai_schema_support.py
tests/test_pptx_creator.py
tests/test_pptx_slides_processing.py
@ -130,7 +128,9 @@ utils/get_layout_by_name.py
utils/image_provider.py
utils/image_utils.py
utils/llm_client_error_handler.py
utils/llm_config.py
utils/llm_provider.py
utils/llm_utils.py
utils/model_availability.py
utils/ocr_language.py
utils/ollama.py
@ -141,6 +141,7 @@ utils/ppt_utils.py
utils/process_slides.py
utils/schema_utils.py
utils/set_env.py
utils/simple_auth.py
utils/theme_utils.py
utils/user_config.py
utils/validators.py

View file

@ -2,19 +2,17 @@ alembic>=1.14.0
aiohttp>=3.12.15
aiomysql>=0.2.0
aiosqlite>=0.21.0
anthropic>=0.60.0
asyncpg>=0.30.0
chromadb>=1.0.15
dirtyjson>=1.0.8
fastapi[standard]>=0.116.1
fastembed-vectorstore>=0.5.2
fastmcp>=2.11.0
google-genai>=1.28.0
mem0ai[nlp]>=0.1.115
nltk>=3.9.1
openai>=1.98.0
pathvalidate>=3.3.1
pdfplumber>=0.11.7
pytest>=8.4.1
python-pptx>=1.0.2
redis>=6.2.0
sqlmodel>=0.0.24
llmai==0.1.9

View file

@ -12,7 +12,6 @@ dependencies = [
"aiohttp>=3.12.15",
"aiomysql>=0.2.0",
"aiosqlite>=0.21.0",
"anthropic>=0.60.0",
"asyncpg>=0.30.0",
"dirtyjson>=1.0.8",
"fastapi[standard]>=0.116.1",
@ -26,6 +25,7 @@ dependencies = [
"pdfplumber>=0.11.7",
"python-pptx>=1.0.2",
"sqlmodel>=0.0.24",
"llmai==0.1.9",
]
[tool.uv]
@ -33,4 +33,12 @@ index-strategy = "unsafe-best-match"
[tool.setuptools.packages.find]
where = ["."]
include = ["api*", "enums*", "models*", "services*", "constants*", "utils*", "templates*"]
include = [
"api*",
"enums*",
"models*",
"services*",
"constants*",
"utils*",
"templates*",
]

View file

@ -1,431 +0,0 @@
"""Codex (Responses API) adapter for structured and unstructured LLM calls.
Stateless adapter: receives AsyncOpenAI client and tool_calls_handler at call time.
Auth and client creation stay in LLMClient. Structure matches other providers:
generate = call API, collect content + tool_calls, recurse on tool_calls; stream = same but yield deltas.
Uses LLMToolCallsHandler directly: tools are parsed via parse_tools() in llm_client (handler supports
Codex and returns OpenAI-style dicts); this module flattens them for the Responses API. Tool execution
uses tool_calls_handler.handle_tool_calls_openai().
"""
import dirtyjson
from typing import Any, AsyncGenerator, List, Optional, Union
from fastapi import HTTPException
from openai import APIStatusError, AsyncOpenAI, OpenAIError
from models.llm_message import (
LLMMessage,
OpenAIAssistantMessage,
LLMSystemMessage,
LLMUserMessage,
)
from models.llm_tool_call import OpenAIToolCall, OpenAIToolCallFunction
from utils.schema_utils import ensure_strict_json_schema
# Responses API requires flat tool format: {"type":"function","name":...,"description":...,"parameters":...}
RESPONSE_SCHEMA_NAME = "ResponseSchema"
# Required tool choice for structured: force ResponseSchema (no plain-text fallback).
STRUCTURED_TOOL_CHOICE = {"type": "function", "name": RESPONSE_SCHEMA_NAME}
MAX_RECURSION_DEPTH = 5
def _to_responses_tools(chat_tools: List[dict]) -> List[dict]:
"""Convert Chat Completions tool format to flat Responses API format."""
result = []
for tool in chat_tools:
if tool.get("type") != "function":
result.append(tool)
continue
fn = tool.get("function") or tool
result.append({
"type": "function",
"name": fn.get("name", ""),
"description": fn.get("description", ""),
"parameters": fn.get("parameters", {}),
})
return result
def _items_to_openai_calls(items_by_id: dict[str, dict]) -> List[OpenAIToolCall]:
"""Build OpenAIToolCall list from Responses API output_item map."""
return [
OpenAIToolCall(
id=item.get("call_id", item.get("id", "")),
type="function",
function=OpenAIToolCallFunction(
name=item.get("name", ""),
arguments=item.get("arguments", "{}"),
),
)
for item in items_by_id.values()
]
async def _messages_after_tool_turn(
messages: List[LLMMessage],
items_by_id: dict[str, dict],
tool_calls_handler: Any,
) -> List[LLMMessage]:
"""Handle tool calls and return messages extended with assistant turn + tool results."""
openai_calls = _items_to_openai_calls(items_by_id)
tool_call_messages = await tool_calls_handler.handle_tool_calls_openai(openai_calls)
return [
*messages,
OpenAIAssistantMessage(
role="assistant",
content=None,
tool_calls=[tc.model_dump() for tc in openai_calls],
),
*tool_call_messages,
]
def _build_body(
model: str,
messages: List[LLMMessage],
tools: Optional[List[dict]] = None,
tool_choice: Optional[Union[str, dict]] = None,
) -> dict:
"""Build Responses API request body."""
instructions = None
input_messages = []
for msg in messages:
if isinstance(msg, LLMSystemMessage):
instructions = msg.content
elif isinstance(msg, LLMUserMessage):
input_messages.append({
"role": "user",
"content": [{"type": "input_text", "text": msg.content}],
})
elif isinstance(msg, OpenAIAssistantMessage):
text = msg.content or ""
if text:
input_messages.append({
"role": "assistant",
"content": [{"type": "output_text", "text": text}],
})
else:
text = getattr(msg, "content", "") or ""
if text:
input_messages.append({
"role": "user",
"content": [{"type": "input_text", "text": text}],
})
body: dict = {
"model": model,
"store": False,
"stream": True,
"text": {"verbosity": "medium"},
"include": ["reasoning.encrypted_content"],
"tool_choice": tool_choice if tool_choice is not None else "auto",
"parallel_tool_calls": True,
}
if instructions:
body["instructions"] = instructions
if input_messages:
body["input"] = input_messages
if tools:
body["tools"] = tools
return body
def _event_to_dict(event: Any) -> dict:
"""Convert SDK event to dict."""
if hasattr(event, "model_dump"):
return event.model_dump()
return {
"type": getattr(event, "type", None),
"delta": getattr(event, "delta", None),
"item": getattr(event, "item", None),
"message": getattr(event, "message", None),
"arguments": getattr(event, "arguments", None),
"name": getattr(event, "name", None),
}
async def _stream_raw(
client: AsyncOpenAI,
model: str,
messages: List[LLMMessage],
tools: Optional[List[dict]] = None,
tool_choice: Optional[Union[str, dict]] = None,
) -> AsyncGenerator[dict, None]:
"""Yield raw SSE event dicts from Codex Responses API."""
body = _build_body(model, messages, tools, tool_choice=tool_choice)
create_kwargs = {k: v for k, v in body.items() if k != "stream"}
try:
stream = await client.responses.create(stream=True, **create_kwargs)
except (APIStatusError, OpenAIError) as e:
status = getattr(e, "status_code", 502)
detail = getattr(e, "message", str(e)) or str(e)
raise HTTPException(
status_code=status,
detail=f"Codex API error: {detail}"[:400],
) from e
async for event in stream:
yield _event_to_dict(event)
class CodexLLMAdapter:
"""Stateless adapter for Codex Responses API. Matches other providers: generate/stream + tool recursion."""
@staticmethod
async def generate_codex(
client: AsyncOpenAI,
model: str,
messages: List[LLMMessage],
tool_calls_handler: Any,
max_tokens: Optional[int] = None,
tools: Optional[List[dict]] = None,
depth: int = 0,
) -> Optional[str]:
"""Generate text; on tool_calls handle and recurse (like _generate_openai / _generate_anthropic)."""
print(
f"Codex generate: model={model} depth={depth} tools_count={len(tools) if tools else 0}"
)
responses_tools = _to_responses_tools(tools) if tools else None
text_parts: List[str] = []
tool_calls_by_id: dict[str, dict] = {}
async for event in _stream_raw(client, model, messages, responses_tools, tool_choice=None):
event_type = event.get("type", "")
if event_type == "response.output_text.delta":
delta = event.get("delta", "")
if delta:
text_parts.append(delta)
elif event_type == "response.output_item.done":
item = event.get("item") or {}
if item.get("type") == "function_call":
tool_calls_by_id[item.get("call_id", item.get("id", ""))] = item
elif event_type in ("response.failed", "error"):
msg_text = event.get("message") or str(event)
raise HTTPException(status_code=502, detail=f"Codex error: {msg_text}")
if tool_calls_by_id and tools and depth < MAX_RECURSION_DEPTH:
print(
f"Codex generate: tool calls detected depth={depth} count={len(tool_calls_by_id)}"
)
new_messages = await _messages_after_tool_turn(
messages, tool_calls_by_id, tool_calls_handler
)
return await CodexLLMAdapter.generate_codex(
client, model, new_messages, tool_calls_handler,
max_tokens=max_tokens, tools=tools, depth=depth + 1,
)
return "".join(text_parts) or None
@staticmethod
async def stream_codex(
client: AsyncOpenAI,
model: str,
messages: List[LLMMessage],
tool_calls_handler: Any,
max_tokens: Optional[int] = None,
tools: Optional[List[dict]] = None,
depth: int = 0,
) -> AsyncGenerator[str, None]:
"""Stream text deltas; on tool_calls handle and recurse (like _stream_openai)."""
print(
f"Codex stream: model={model} depth={depth} tools_count={len(tools) if tools else 0}"
)
responses_tools = _to_responses_tools(tools) if tools else None
tool_calls_by_id: dict[str, dict] = {}
async for event in _stream_raw(client, model, messages, responses_tools, tool_choice=None):
event_type = event.get("type", "")
if event_type == "response.output_text.delta":
delta = event.get("delta", "")
if delta:
yield delta
elif event_type == "response.output_item.done":
item = event.get("item") or {}
if item.get("type") == "function_call":
tool_calls_by_id[item.get("call_id", item.get("id", ""))] = item
elif event_type in ("response.failed", "error"):
msg_text = event.get("message") or str(event)
raise HTTPException(status_code=502, detail=f"Codex stream error: {msg_text}")
if tool_calls_by_id and tools and depth < MAX_RECURSION_DEPTH:
print(
f"Codex stream: tool calls detected depth={depth} count={len(tool_calls_by_id)}"
)
new_messages = await _messages_after_tool_turn(
messages, tool_calls_by_id, tool_calls_handler
)
async for chunk in CodexLLMAdapter.stream_codex(
client, model, new_messages, tool_calls_handler,
max_tokens=max_tokens, tools=tools, depth=depth + 1,
):
yield chunk
@staticmethod
async def stream_codex_structured(
client: AsyncOpenAI,
model: str,
messages: List[LLMMessage],
response_format: dict,
tool_calls_handler: Any,
strict: bool = False,
max_tokens: Optional[int] = None,
tools: Optional[List[dict]] = None,
depth: int = 0,
) -> AsyncGenerator[str, None]:
"""Stream JSON chunks from ResponseSchema tool; recurse for other tool_calls.
Structured output is achieved by always adding an internal ResponseSchema "tool"
(with response_format as its parameters) and tool_choice=ResponseSchema. So
user_tools=0 only means no extra tools like web search; we still use the
ResponseSchema tool to receive the model's JSON.
"""
user_tools_count = len(tools) if tools else 0
print(
f"Codex stream_structured: model={model} depth={depth} strict={strict} "
f"user_tools={user_tools_count} (always adding ResponseSchema tool for structured JSON)"
)
schema = ensure_strict_json_schema(response_format, path=(), root=response_format) if strict and depth == 0 else response_format
response_schema_tool = {
"type": "function",
"name": RESPONSE_SCHEMA_NAME,
"description": "Provide response to the user",
"parameters": schema,
}
all_tools: List[dict] = [response_schema_tool]
if tools:
all_tools.extend(_to_responses_tools(tools))
tool_calls_by_id: dict[str, dict] = {}
current_call_id: Optional[str] = None
async for event in _stream_raw(
client, model, messages, all_tools, tool_choice=STRUCTURED_TOOL_CHOICE
):
event_type = event.get("type", "")
if event_type == "response.output_item.added":
item = event.get("item") or {}
if item.get("type") == "function_call" and item.get("name") == RESPONSE_SCHEMA_NAME:
current_call_id = item.get("call_id", item.get("id"))
print(
f"Codex stream_structured: ResponseSchema call started call_id={current_call_id}"
)
elif event_type == "response.function_call_arguments.delta":
if current_call_id is not None:
delta = event.get("delta", "")
if delta:
# Log only first few chunks to avoid log spam
print(
f"Codex stream_structured: ResponseSchema delta chunk len={len(delta)}"
)
yield delta
elif event_type == "response.function_call_arguments.done":
if event.get("name") == RESPONSE_SCHEMA_NAME:
arguments = event.get("arguments", "")
if arguments:
print(
f"Codex stream_structured: ResponseSchema arguments.done len={len(arguments)}"
)
yield arguments
elif event_type == "response.output_item.done":
item = event.get("item") or {}
if item.get("type") == "function_call":
tool_calls_by_id[item.get("call_id", item.get("id", ""))] = item
if item.get("name") == RESPONSE_SCHEMA_NAME:
arguments = item.get("arguments", "")
if arguments:
print(
f"Codex stream_structured: ResponseSchema output_item.done len={len(arguments)}"
)
yield arguments
elif event_type in ("response.failed", "error"):
msg_text = event.get("message") or str(event)
raise HTTPException(status_code=502, detail=f"Codex structured error: {msg_text}")
other_tool_calls = {
k: v for k, v in tool_calls_by_id.items()
if v.get("name") != RESPONSE_SCHEMA_NAME
}
if other_tool_calls and tools and depth < MAX_RECURSION_DEPTH:
print(
f"Codex stream_structured: recursing for non-ResponseSchema tool calls "
f"depth={depth} count={len(other_tool_calls)}"
)
new_messages = await _messages_after_tool_turn(
messages, other_tool_calls, tool_calls_handler
)
async for chunk in CodexLLMAdapter.stream_codex_structured(
client, model, new_messages, response_format, tool_calls_handler,
strict=strict, max_tokens=max_tokens, tools=tools, depth=depth + 1,
):
yield chunk
@staticmethod
async def generate_codex_structured(
client: AsyncOpenAI,
model: str,
messages: List[LLMMessage],
response_format: dict,
tool_calls_handler: Any,
strict: bool = False,
max_tokens: Optional[int] = None,
tools: Optional[List[dict]] = None,
depth: int = 0,
) -> Optional[dict]:
"""Collect stream and parse JSON (like _generate_openai_structured)."""
user_tools_count = len(tools) if tools else 0
print(
f"Codex generate_structured: model={model} depth={depth} strict={strict} "
f"user_tools={user_tools_count} (using ResponseSchema tool for structured JSON)"
)
accumulated: List[str] = []
async for chunk in CodexLLMAdapter.stream_codex_structured(
client, model, messages, response_format, tool_calls_handler,
strict=strict, max_tokens=max_tokens, tools=tools, depth=depth,
):
accumulated.append(chunk)
raw = "".join(accumulated)
if not raw:
return None
if depth == 0:
try:
parsed = dict(dirtyjson.loads(raw))
print(
f"Codex generate_structured: parsed JSON keys={list(parsed.keys())[:8]}"
)
return parsed
except Exception:
start = raw.find("{")
if start >= 0:
try:
parsed = dict(dirtyjson.loads(raw[start:]))
print(
"Codex generate_structured: parsed JSON from offset "
f"{start} keys={list(parsed.keys())[:8]}"
)
return parsed
except Exception:
pass
raise HTTPException(
status_code=502,
detail=(
"Model did not return valid structured output (expected JSON from ResponseSchema). "
"Please retry."
),
)
return None

File diff suppressed because it is too large Load diff

View file

@ -1,211 +0,0 @@
import asyncio
from datetime import datetime
import json
from typing import Any, Callable, Coroutine, List, Optional
from fastapi import HTTPException
from enums.llm_provider import LLMProvider
from models.llm_message import (
AnthropicToolCallMessage,
GoogleToolCallMessage,
OpenAIToolCallMessage,
)
from models.llm_tool_call import AnthropicToolCall, GoogleToolCall, OpenAIToolCall
from models.llm_tools import LLMDynamicTool, LLMTool, SearchWebTool
from utils.schema_utils import (
ensure_strict_json_schema,
flatten_json_schema,
remove_titles_from_schema,
)
class LLMToolCallsHandler:
def __init__(self, client):
from services.llm_client import LLMClient
self.client: LLMClient = client
self.tools_map: dict[str, Callable[..., Coroutine[Any, Any, str]]] = {
"SearchWebTool": self.search_web_tool_call_handler,
"GetCurrentDatetimeTool": self.get_current_datetime_tool_call_handler,
}
self.dynamic_tools: List[LLMDynamicTool] = []
def get_tool_handler(
self, tool_name: str
) -> Callable[..., Coroutine[Any, Any, str]]:
handler = self.tools_map.get(tool_name)
if handler:
return handler
else:
dynamic_tools = list(
filter(lambda tool: tool.name == tool_name, self.dynamic_tools)
)
if dynamic_tools:
return dynamic_tools[0].handler
raise HTTPException(status_code=500, detail=f"Tool {tool_name} not found")
def parse_tools(self, tools: Optional[List[type[LLMTool] | LLMDynamicTool]] = None):
if tools is None:
return None
parsed_tools = map(self.parse_tool, tools)
return list(parsed_tools)
def parse_tool(self, tool: type[LLMTool] | LLMDynamicTool, strict: bool = False):
if isinstance(tool, LLMDynamicTool):
self.dynamic_tools.append(tool)
match self.client.llm_provider:
case LLMProvider.OPENAI | LLMProvider.OLLAMA | LLMProvider.CUSTOM:
return self.parse_tool_openai(tool, strict)
case LLMProvider.ANTHROPIC:
return self.parse_tool_anthropic(tool)
case LLMProvider.GOOGLE:
return self.parse_tool_google(tool)
case _:
raise ValueError(
f"LLM provider must be either openai, anthropic, or google"
)
def parse_tool_openai(
self, tool: type[LLMTool] | LLMDynamicTool, strict: bool = False
):
if isinstance(tool, LLMDynamicTool):
name = tool.name
description = tool.description
parameters = tool.parameters
else:
name = tool.__name__
description = tool.__doc__ or ""
parameters = tool.model_json_schema()
if strict:
parameters = ensure_strict_json_schema(parameters, path=(), root=parameters)
return {
"type": "function",
"function": {
"name": name,
"description": description,
"strict": strict,
"parameters": parameters,
},
}
def parse_tool_google(self, tool: type[LLMTool] | LLMDynamicTool):
parsed = self.parse_tool_openai(tool)
parsed["function"]["parameters"] = (
remove_titles_from_schema(
flatten_json_schema(parsed["function"]["parameters"])
)
if parsed["function"]["parameters"]
else {}
)
return {
"name": parsed["function"]["name"],
"description": parsed["function"]["description"],
"parameters": parsed["function"]["parameters"],
}
def parse_tool_anthropic(self, tool: type[LLMTool] | LLMDynamicTool):
parsed = self.parse_tool_openai(tool)
input_schema = parsed["function"]["parameters"]
return {
"name": parsed["function"]["name"],
"description": parsed["function"]["description"],
"input_schema": {"type": "object"} if input_schema == {} else input_schema,
}
async def handle_tool_calls_openai(
self,
tool_calls: List[OpenAIToolCall],
) -> List[OpenAIToolCallMessage]:
async_tool_calls_tasks = []
for tool_call in tool_calls:
tool_name = tool_call.function.name
tool_handler = self.get_tool_handler(tool_name)
async_tool_calls_tasks.append(tool_handler(tool_call.function.arguments))
tool_call_results: List[str] = await asyncio.gather(*async_tool_calls_tasks)
tool_call_messages = [
OpenAIToolCallMessage(
content=result,
tool_call_id=tool_call.id,
)
for tool_call, result in zip(tool_calls, tool_call_results)
]
return tool_call_messages
async def handle_tool_calls_google(
self,
tool_calls: List[GoogleToolCall],
) -> List[GoogleToolCallMessage]:
async_tool_calls_tasks = []
for tool_call in tool_calls:
tool_name = tool_call.name
tool_handler = self.get_tool_handler(tool_name)
async_tool_calls_tasks.append(tool_handler(json.dumps(tool_call.arguments)))
tool_call_results: List[str] = await asyncio.gather(*async_tool_calls_tasks)
tool_call_messages = [
GoogleToolCallMessage(
id=tool_call.id,
name=tool_call.name,
response={"result": result},
)
for tool_call, result in zip(tool_calls, tool_call_results)
]
return tool_call_messages
async def handle_tool_calls_anthropic(
self,
tool_calls: List[AnthropicToolCall],
) -> List[AnthropicToolCallMessage]:
async_tool_calls_tasks = []
for tool_call in tool_calls:
tool_name = tool_call.name
tool_handler = self.get_tool_handler(tool_name)
async_tool_calls_tasks.append(tool_handler(json.dumps(tool_call.input)))
tool_call_results: List[str] = await asyncio.gather(*async_tool_calls_tasks)
tool_call_messages = [
AnthropicToolCallMessage(
content=result,
tool_use_id=tool_call.id,
)
for tool_call, result in zip(tool_calls, tool_call_results)
]
return tool_call_messages
# ? Tool call handlers
# Search web tool call handler
async def search_web_tool_call_handler(self, arguments: str) -> str:
match self.client.llm_provider:
case LLMProvider.OPENAI:
return await self.search_web_tool_call_handler_openai(arguments)
case LLMProvider.ANTHROPIC:
return await self.search_web_tool_call_handler_anthropic(arguments)
case LLMProvider.GOOGLE:
return await self.search_web_tool_call_handler_google(arguments)
case _:
return (
"Web search tool call handler not implemented for this LLM provider: "
+ self.client.llm_provider.value
)
async def search_web_tool_call_handler_openai(self, arguments: str) -> str:
args = SearchWebTool.model_validate_json(arguments)
return await self.client._search_openai(args.query)
async def search_web_tool_call_handler_google(self, arguments: str) -> str:
args = SearchWebTool.model_validate_json(arguments)
return await self.client._search_google(args.query)
async def search_web_tool_call_handler_anthropic(self, arguments: str) -> str:
args = SearchWebTool.model_validate_json(arguments)
return await self.client._search_anthropic(args.query)
# Get current datetime tool call handler
async def get_current_datetime_tool_call_handler(self, _) -> str:
current_time = datetime.now()
return f"{current_time.strftime('%A, %B %d, %Y')} at {current_time.strftime('%I:%M:%S %p')}"

View file

@ -4,10 +4,17 @@ from dataclasses import dataclass
import time
from typing import Any, Awaitable, Callable, Optional
from anthropic import AsyncAnthropic
from fastapi import HTTPException
from google import genai
from google.genai import types as google_types
from llmai import AnthropicClient
from llmai.shared import (
AnthropicClientConfig,
ImageContentPart,
SystemMessage,
TextResponse,
UserMessage,
)
from openai import AsyncOpenAI
from enums.llm_provider import LLMProvider
@ -160,11 +167,28 @@ def _get_google_client() -> genai.Client:
return genai.Client(api_key=api_key)
def _get_anthropic_client() -> AsyncAnthropic:
def _get_anthropic_client() -> AnthropicClient:
api_key = get_anthropic_api_key_env()
if not api_key:
raise HTTPException(status_code=400, detail="ANTHROPIC_API_KEY is not set")
return AsyncAnthropic(api_key=api_key)
return AnthropicClient(config=AnthropicClientConfig(api_key=api_key))
def _read_llmai_response_text(response: Any) -> str:
content = getattr(response, "content", None)
if isinstance(content, str):
return content
if isinstance(content, list):
parts: list[str] = []
for part in content:
if isinstance(part, str):
parts.append(part)
continue
text = getattr(part, "text", None)
if isinstance(text, str):
parts.append(text)
return "".join(parts)
return getattr(content, "text", None) or ""
async def _call_openai_like(
@ -308,28 +332,24 @@ async def _call_anthropic(
media_type: str = "image/png",
) -> str:
client = _get_anthropic_client()
content = [{"type": "text", "text": user_text}]
content: str | list[object] = user_text
if image_bytes:
content.append(
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": base64.b64encode(image_bytes).decode("utf-8"),
},
}
)
content = [
user_text,
ImageContentPart(data=image_bytes, mime_type=media_type),
]
response = await client.messages.create(
response = await asyncio.to_thread(
client.generate,
model=model,
messages=[
SystemMessage(content=system_prompt),
UserMessage(content=content),
],
response_format=TextResponse(),
max_tokens=8192,
system=system_prompt,
messages=[{"role": "user", "content": content}],
)
output_text = "".join(
block.text for block in response.content if getattr(block, "type", None) == "text"
)
output_text = _read_llmai_response_text(response)
if not output_text:
raise HTTPException(status_code=500, detail="No output from template provider")
return output_text

View file

@ -1,4 +1,4 @@
from anthropic import AsyncAnthropic
import aiohttp
from openai import AsyncOpenAI
from google import genai
@ -12,8 +12,21 @@ async def list_available_openai_compatible_models(url: str, api_key: str) -> lis
async def list_available_anthropic_models(api_key: str) -> list[str]:
client = AsyncAnthropic(api_key=api_key)
return list(map(lambda x: x.id, (await client.models.list(limit=50)).data))
async with aiohttp.ClientSession(
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
}
) as session:
async with session.get(
"https://api.anthropic.com/v1/models",
params={"limit": 50},
) as response:
response.raise_for_status()
data = await response.json()
models = data.get("data", [])
return [model.get("id") for model in models if model.get("id")]
async def list_available_google_models(api_key: str) -> list[str]:

View file

@ -85,10 +85,6 @@ def get_pixabay_api_key_env():
return os.getenv("PIXABAY_API_KEY")
def get_tool_calls_env():
return os.getenv("TOOL_CALLS")
def get_disable_thinking_env():
return os.getenv("DISABLE_THINKING")

View file

@ -1,10 +1,14 @@
import asyncio
from datetime import datetime
from typing import Optional
from models.llm_message import LLMSystemMessage, LLMUserMessage
from fastapi import HTTPException
from llmai import get_client
from llmai.shared import JSONSchemaResponse, Message, SystemMessage, UserMessage
from models.presentation_layout import SlideLayoutModel
from models.sql.slide import SlideModel
from services.llm_client import LLMClient
from utils.llm_config import get_llm_config
from utils.llm_client_error_handler import handle_llm_client_exceptions
from utils.llm_utils import extract_structured_content, get_generate_kwargs
from utils.llm_provider import get_model
from utils.schema_utils import add_field_in_schema, remove_fields_from_schema
@ -89,12 +93,12 @@ def get_messages(
verbosity: Optional[str] = None,
instructions: Optional[str] = None,
memory_context: Optional[str] = None,
):
) -> list[Message]:
return [
LLMSystemMessage(
SystemMessage(
content=get_system_prompt(tone, verbosity, instructions, memory_context),
),
LLMUserMessage(
UserMessage(
content=get_user_prompt(prompt, slide_data, language),
),
]
@ -128,23 +132,40 @@ async def get_edited_slide_content(
True,
)
client = LLMClient()
client = get_client(config=get_llm_config())
try:
response = await client.generate_structured(
model=model,
messages=get_messages(
prompt,
slide.content,
language,
tone,
verbosity,
instructions,
memory_context,
),
response_format=response_schema,
response_format = JSONSchemaResponse(
name="response",
json_schema=response_schema,
strict=False,
)
return response
messages = get_messages(
prompt,
slide.content,
language,
tone,
verbosity,
instructions,
memory_context,
)
for attempt in range(3):
response = await asyncio.to_thread(
client.generate,
**get_generate_kwargs(
model=model,
messages=messages,
response_format=response_format,
),
)
content = extract_structured_content(response.content)
if content is not None:
return content
if attempt < 2:
await asyncio.sleep(0.5 * (attempt + 1))
raise HTTPException(status_code=400, detail="LLM did not return any content")
except Exception as e:
raise handle_llm_client_exceptions(e)

View file

@ -1,7 +1,11 @@
import asyncio
from typing import Optional
from models.llm_message import LLMSystemMessage, LLMUserMessage
from services.llm_client import LLMClient
from fastapi import HTTPException
from llmai import get_client
from llmai.shared import SystemMessage, UserMessage
from utils.llm_config import get_llm_config
from utils.llm_client_error_handler import handle_llm_client_exceptions
from utils.llm_utils import extract_text, get_generate_kwargs
from utils.llm_provider import get_model
system_prompt = """
@ -59,18 +63,24 @@ async def get_edited_slide_html(
):
model = get_model()
client = LLMClient()
client = get_client(config=get_llm_config())
try:
response = await client.generate(
model=model,
messages=[
LLMSystemMessage(content=system_prompt),
LLMUserMessage(
content=get_user_prompt(prompt, html, memory_context)
),
],
response = await asyncio.to_thread(
client.generate,
**get_generate_kwargs(
model=model,
messages=[
SystemMessage(content=system_prompt),
UserMessage(
content=get_user_prompt(prompt, html, memory_context)
),
],
),
)
return extract_html_from_response(response) or html
response_text = extract_text(response.content)
if response_text is None:
raise HTTPException(status_code=400, detail="LLM did not return any content")
return extract_html_from_response(response_text) or html
except Exception as e:
raise handle_llm_client_exceptions(e)

View file

@ -1,14 +1,26 @@
from datetime import datetime
from typing import Optional
from enums.llm_provider import LLMProvider
from models.llm_message import LLMSystemMessage, LLMUserMessage
from llmai import get_client
from llmai.shared import (
JSONSchemaResponse,
Message,
ResponseStreamCompletionChunk,
SystemMessage,
UserMessage,
WebSearchTool,
)
from models.presentation_outline_model import PresentationOutlineModel
from models.llm_tools import SearchWebTool
from services.llm_client import LLMClient
from utils.get_dynamic_models import get_presentation_outline_model_with_n_slides
from utils.llm_config import enable_web_grounding, get_llm_config
from utils.llm_client_error_handler import handle_llm_client_exceptions
from utils.llm_provider import get_model
from utils.llm_utils import (
get_generate_kwargs,
serialize_structured_content,
stream_generate_events,
)
def get_system_prompt(
@ -125,9 +137,9 @@ def get_messages(
instructions: Optional[str] = None,
include_title_slide: bool = True,
include_table_of_contents: bool = False,
):
) -> list[Message]:
return [
LLMSystemMessage(
SystemMessage(
content=get_system_prompt(
tone,
verbosity,
@ -136,7 +148,7 @@ def get_messages(
include_table_of_contents,
),
),
LLMUserMessage(
UserMessage(
content=get_user_prompt(
content,
n_slides,
@ -170,36 +182,47 @@ async def generate_ppt_outline(
else PresentationOutlineModel
)
client = LLMClient()
providers_with_search_tool = {
LLMProvider.OPENAI,
LLMProvider.ANTHROPIC,
LLMProvider.GOOGLE,
}
use_search_tool = (
web_search
and client.enable_web_grounding()
and client.llm_provider in providers_with_search_tool
)
client = get_client(config=get_llm_config())
use_search_tool = web_search and enable_web_grounding()
try:
async for chunk in client.stream_structured(
model,
get_messages(
content,
n_slides,
language,
additional_context,
tone,
verbosity,
instructions,
include_title_slide,
include_table_of_contents,
),
response_model.model_json_schema(),
response_format = JSONSchemaResponse(
name="response",
json_schema=response_model.model_json_schema(),
strict=True,
tools=([SearchWebTool] if use_search_tool else None),
)
emitted_content = False
async for event in stream_generate_events(
client,
**get_generate_kwargs(
model=model,
messages=get_messages(
content,
n_slides,
language,
additional_context,
tone,
verbosity,
instructions,
include_title_slide,
include_table_of_contents,
),
response_format=response_format,
tools=([WebSearchTool()] if use_search_tool else None),
stream=True,
),
):
yield chunk
if getattr(event, "type", None) == "content":
chunk = getattr(event, "chunk", None)
if chunk:
emitted_content = True
yield chunk
elif (
isinstance(event, ResponseStreamCompletionChunk)
and not emitted_content
):
final_content = serialize_structured_content(event.content)
if final_content:
yield final_content
except Exception as e:
yield handle_llm_client_exceptions(e)

View file

@ -1,10 +1,14 @@
from typing import Optional, Dict
import asyncio
from typing import Optional
from models.llm_message import LLMSystemMessage, LLMUserMessage
from fastapi import HTTPException
from llmai import get_client
from llmai.shared import JSONSchemaResponse, Message, SystemMessage, UserMessage
from models.presentation_layout import PresentationLayoutModel
from models.presentation_outline_model import PresentationOutlineModel
from services.llm_client import LLMClient
from utils.llm_config import get_llm_config
from utils.llm_client_error_handler import handle_llm_client_exceptions
from utils.llm_utils import extract_structured_content, get_generate_kwargs
from utils.llm_provider import get_model
from utils.get_dynamic_models import get_presentation_structure_model_with_n_slides
from models.presentation_structure_model import PresentationStructureModel
@ -97,19 +101,21 @@ def get_messages(
n_slides: int,
data: str,
instructions: Optional[str] = None,
):
) -> list[Message]:
system_prompt = GET_MESSAGES_SYSTEM_PROMPT.format(
user_instruction_header="# User Instruction:" if instructions else "",
n_slides=n_slides,
)
return [
LLMSystemMessage(content=system_prompt),
LLMUserMessage(content=(
f"{presentation_layout.to_string()}\n\n"
"--------------------------------------\n\n"
f"{data}"
)),
SystemMessage(content=system_prompt),
UserMessage(
content=(
f"{presentation_layout.to_string()}\n\n"
"--------------------------------------\n\n"
f"{data}"
)
),
]
@ -118,20 +124,13 @@ def get_messages_for_slides_markdown(
n_slides: int,
data: str,
instructions: Optional[str] = None,
):
) -> list[Message]:
system_prompt = STRUCTURE_FROM_SLIDES_MARKDOWN_SYSTEM_PROMPT.format(
user_instructions=instructions or "",
presentation_layout=presentation_layout.to_string(with_schema=True),
)
return [
LLMSystemMessage(
content=system_prompt
),
LLMUserMessage(
content=data
)
]
return [SystemMessage(content=system_prompt), UserMessage(content=data)]
async def generate_presentation_structure(
@ -140,34 +139,50 @@ async def generate_presentation_structure(
instructions: Optional[str] = None,
using_slides_markdown: bool = False,
) -> PresentationStructureModel:
client = LLMClient()
client = get_client(config=get_llm_config())
model = get_model()
response_model = get_presentation_structure_model_with_n_slides(
len(presentation_outline.slides)
)
try:
response = await client.generate_structured(
model=model,
messages=(
get_messages_for_slides_markdown(
presentation_layout,
len(presentation_outline.slides),
presentation_outline.to_string(),
instructions,
)
if using_slides_markdown
else get_messages(
presentation_layout,
len(presentation_outline.slides),
presentation_outline.to_string(),
instructions,
)
),
response_format=response_model.model_json_schema(),
messages = (
get_messages_for_slides_markdown(
presentation_layout,
len(presentation_outline.slides),
presentation_outline.to_string(),
instructions,
)
if using_slides_markdown
else get_messages(
presentation_layout,
len(presentation_outline.slides),
presentation_outline.to_string(),
instructions,
)
)
response_format = JSONSchemaResponse(
name="response",
json_schema=response_model.model_json_schema(),
strict=True,
)
return PresentationStructureModel(**response)
for attempt in range(3):
response = await asyncio.to_thread(
client.generate,
**get_generate_kwargs(
model=model,
messages=messages,
response_format=response_format,
),
)
content = extract_structured_content(response.content)
if content is not None:
return PresentationStructureModel(**content)
if attempt < 2:
await asyncio.sleep(0.5 * (attempt + 1))
raise HTTPException(status_code=400, detail="LLM did not return any content")
except Exception as e:
raise handle_llm_client_exceptions(e)

View file

@ -1,11 +1,15 @@
import asyncio
from datetime import datetime
import json
from typing import Optional
from models.llm_message import LLMSystemMessage, LLMUserMessage
from fastapi import HTTPException
from llmai import get_client
from llmai.shared import JSONSchemaResponse, Message, SystemMessage, UserMessage
from models.presentation_layout import SlideLayoutModel
from models.presentation_outline_model import SlideOutlineModel
from services.llm_client import LLMClient
from utils.llm_config import get_llm_config
from utils.llm_client_error_handler import handle_llm_client_exceptions
from utils.llm_utils import extract_structured_content, get_generate_kwargs
from utils.llm_provider import get_model
from utils.schema_utils import add_field_in_schema, remove_fields_from_schema
@ -130,10 +134,10 @@ def get_messages(
verbosity: Optional[str] = None,
instructions: Optional[str] = None,
response_schema: Optional[dict] = None,
):
) -> list[Message]:
return [
LLMSystemMessage(
SystemMessage(
content=get_system_prompt(
tone,
verbosity,
@ -141,7 +145,7 @@ def get_messages(
response_schema,
),
),
LLMUserMessage(
UserMessage(
content=get_user_prompt(outline, language),
),
]
@ -155,7 +159,7 @@ async def get_slide_content_from_type_and_outline(
verbosity: Optional[str] = None,
instructions: Optional[str] = None,
):
client = LLMClient()
client = get_client(config=get_llm_config())
model = get_model()
response_schema = remove_fields_from_schema(
@ -175,20 +179,37 @@ async def get_slide_content_from_type_and_outline(
)
try:
response = await client.generate_structured(
model=model,
messages=get_messages(
outline.content,
language,
tone,
verbosity,
instructions,
response_schema,
),
response_format=response_schema,
response_format = JSONSchemaResponse(
name="response",
json_schema=response_schema,
strict=False,
)
return response
messages = get_messages(
outline.content,
language,
tone,
verbosity,
instructions,
response_schema,
)
for attempt in range(3):
response = await asyncio.to_thread(
client.generate,
**get_generate_kwargs(
model=model,
messages=messages,
response_format=response_format,
),
)
content = extract_structured_content(response.content)
if content is not None:
return content
if attempt < 2:
await asyncio.sleep(0.5 * (attempt + 1))
raise HTTPException(status_code=400, detail="LLM did not return any content")
except Exception as e:
raise handle_llm_client_exceptions(e)

View file

@ -1,9 +1,13 @@
from models.llm_message import LLMSystemMessage, LLMUserMessage
import asyncio
from fastapi import HTTPException
from llmai import get_client
from llmai.shared import JSONSchemaResponse, Message, SystemMessage, UserMessage
from models.presentation_layout import PresentationLayoutModel, SlideLayoutModel
from models.slide_layout_index import SlideLayoutIndex
from models.sql.slide import SlideModel
from services.llm_client import LLMClient
from utils.llm_config import get_llm_config
from utils.llm_client_error_handler import handle_llm_client_exceptions
from utils.llm_utils import extract_structured_content, get_generate_kwargs
from utils.llm_provider import get_model
@ -13,7 +17,7 @@ def get_messages(
layout: PresentationLayoutModel,
current_slide_layout: int,
memory_context: str = "",
):
) -> list[Message]:
memory_block = (
f"\n # Retrieved Presentation Memory Context\n {memory_context}\n"
if memory_context
@ -21,7 +25,7 @@ def get_messages(
)
return [
LLMSystemMessage(
SystemMessage(
content=f"""
Select a Slide Layout index based on provided user prompt and current slide data.
{layout.to_string()}
@ -34,7 +38,7 @@ def get_messages(
**Go through all notes and steps and make sure they are followed, including mentioned constraints**
""",
),
LLMUserMessage(
UserMessage(
content=f"""
- User Prompt: {prompt}
- Current Slide Data: {slide_data}
@ -50,27 +54,43 @@ async def get_slide_layout_from_prompt(
slide: SlideModel,
memory_context: str = "",
) -> SlideLayoutModel:
client = LLMClient()
client = get_client(config=get_llm_config())
model = get_model()
slide_layout_index = layout.get_slide_layout_index(slide.layout)
try:
response = await client.generate_structured(
model=model,
messages=get_messages(
prompt,
slide.content,
layout,
slide_layout_index,
memory_context,
),
response_format=SlideLayoutIndex.model_json_schema(),
response_format = JSONSchemaResponse(
name="response",
json_schema=SlideLayoutIndex.model_json_schema(),
strict=True,
)
index = SlideLayoutIndex(**response).index
return layout.slides[index]
messages = get_messages(
prompt,
slide.content,
layout,
slide_layout_index,
memory_context,
)
for attempt in range(3):
response = await asyncio.to_thread(
client.generate,
**get_generate_kwargs(
model=model,
messages=messages,
response_format=response_format,
),
)
content = extract_structured_content(response.content)
if content is not None:
index = SlideLayoutIndex(**content).index
return layout.slides[index]
if attempt < 2:
await asyncio.sleep(0.5 * (attempt + 1))
raise HTTPException(status_code=400, detail="LLM did not return any content")
except Exception as e:
raise handle_llm_client_exceptions(e)

View file

@ -1,18 +1,19 @@
from fastapi import HTTPException
from anthropic import APIError as AnthropicAPIError
from openai import APIError as OpenAIAPIError
from google.genai.errors import APIError as GoogleAPIError
import traceback
from llmai.shared.errors import BaseError as LLMAIBaseError
def handle_llm_client_exceptions(e: Exception) -> HTTPException:
traceback.print_exc()
if isinstance(e, HTTPException):
return e
if isinstance(e, LLMAIBaseError):
return HTTPException(status_code=e.status_code, detail=e.message)
if isinstance(e, OpenAIAPIError):
return HTTPException(status_code=500, detail=f"OpenAI API error: {e.message}")
if isinstance(e, GoogleAPIError):
return HTTPException(status_code=500, detail=f"Google API error: {e.message}")
if isinstance(e, AnthropicAPIError):
return HTTPException(
status_code=500, detail=f"Anthropic API error: {e.message}"
)
return HTTPException(status_code=500, detail=f"LLM API error: {e}")

View file

@ -0,0 +1,146 @@
import time
from typing import Optional
from fastapi import HTTPException
from llmai.shared import (
AnthropicClientConfig,
ChatGPTClientConfig,
ClientConfig,
GoogleClientConfig,
OpenAIApiType,
OpenAIClientConfig,
)
from enums.llm_provider import LLMProvider
from utils.get_env import (
get_anthropic_api_key_env,
get_codex_access_token_env,
get_codex_account_id_env,
get_codex_refresh_token_env,
get_codex_token_expires_env,
get_custom_llm_api_key_env,
get_custom_llm_url_env,
get_disable_thinking_env,
get_google_api_key_env,
get_ollama_url_env,
get_openai_api_key_env,
get_web_grounding_env,
)
from utils.llm_provider import get_llm_provider
from utils.parsers import parse_bool_or_none
from utils.set_env import (
set_codex_access_token_env,
set_codex_account_id_env,
set_codex_refresh_token_env,
set_codex_token_expires_env,
)
def enable_web_grounding() -> bool:
return parse_bool_or_none(get_web_grounding_env()) or False
def disable_thinking() -> bool:
return parse_bool_or_none(get_disable_thinking_env()) or False
def _get_codex_access_token() -> str:
access_token = get_codex_access_token_env()
if not access_token:
raise HTTPException(
status_code=400,
detail=(
"Codex OAuth access token is not set. Please authenticate via "
"/api/v1/ppt/codex/auth/initiate"
),
)
expires_str = get_codex_token_expires_env()
if expires_str:
try:
expires_ms = int(expires_str)
now_ms = int(time.time() * 1000)
if now_ms >= expires_ms - 60_000:
refresh_token = get_codex_refresh_token_env()
if refresh_token:
from utils.oauth.openai_codex import (
TokenSuccess,
get_account_id,
refresh_access_token,
)
result = refresh_access_token(refresh_token)
if isinstance(result, TokenSuccess):
set_codex_access_token_env(result.access)
set_codex_refresh_token_env(result.refresh)
set_codex_token_expires_env(str(result.expires))
account_id = get_account_id(result.access)
if account_id:
set_codex_account_id_env(account_id)
access_token = result.access
except (TypeError, ValueError):
pass
return access_token
def get_llm_config() -> ClientConfig:
llm_provider = get_llm_provider()
match llm_provider:
case LLMProvider.OPENAI:
api_key = get_openai_api_key_env()
if not api_key:
raise HTTPException(status_code=400, detail="OpenAI API Key is not set")
return OpenAIClientConfig(
api_key=api_key,
api_type=OpenAIApiType.RESPONSES,
)
case LLMProvider.GOOGLE:
api_key = get_google_api_key_env()
if not api_key:
raise HTTPException(status_code=400, detail="Google API Key is not set")
return GoogleClientConfig(api_key=api_key)
case LLMProvider.ANTHROPIC:
api_key = get_anthropic_api_key_env()
if not api_key:
raise HTTPException(
status_code=400,
detail="Anthropic API Key is not set",
)
return AnthropicClientConfig(api_key=api_key)
case LLMProvider.OLLAMA:
return OpenAIClientConfig(
base_url=(get_ollama_url_env() or "http://localhost:11434") + "/v1",
api_key="ollama",
)
case LLMProvider.CUSTOM:
base_url = get_custom_llm_url_env()
if not base_url:
raise HTTPException(
status_code=400,
detail="Custom LLM URL is not set",
)
return OpenAIClientConfig(
base_url=base_url,
api_key=get_custom_llm_api_key_env() or "null",
)
case LLMProvider.CODEX:
return ChatGPTClientConfig(
access_token=_get_codex_access_token(),
account_id=get_codex_account_id_env() or None,
)
case _:
raise HTTPException(
status_code=400,
detail=(
"LLM Provider must be either openai, google, anthropic, "
"ollama, custom, or codex"
),
)
def get_extra_body() -> Optional[dict]:
if get_llm_provider() == LLMProvider.CUSTOM and disable_thinking():
return {"enable_thinking": False}
return None

View file

@ -0,0 +1,134 @@
import asyncio
import json
from collections.abc import AsyncGenerator, Sequence
from typing import Any, Optional
import dirtyjson
from llmai.shared import (
LLMTool,
Message,
ResponseFormat,
normalize_content_parts,
)
from utils.llm_config import get_extra_body
def get_generate_kwargs(
model: str,
messages: Sequence[Message],
max_tokens: Optional[int] = None,
tools: Optional[list[LLMTool]] = None,
response_format: Optional[ResponseFormat] = None,
stream: bool = False,
) -> dict[str, Any]:
kwargs: dict[str, Any] = {
"model": model,
"messages": list(messages),
"stream": stream,
}
if max_tokens is not None:
kwargs["max_tokens"] = max_tokens
if tools:
kwargs["tools"] = tools
if response_format is not None:
kwargs["response_format"] = response_format
extra_body = get_extra_body()
if extra_body:
kwargs["extra_body"] = extra_body
return kwargs
def extract_text(content: Any) -> Optional[str]:
if content is None:
return None
if isinstance(content, str):
return content
if isinstance(content, Sequence) and not isinstance(content, (bytes, bytearray)):
parts: list[str] = []
for part in content:
if isinstance(part, str):
parts.append(part)
continue
text = getattr(part, "text", None)
if isinstance(text, str):
parts.append(text)
joined = "".join(parts)
return joined or None
text = getattr(content, "text", None)
if isinstance(text, str):
return text
return None
def extract_structured_content(content: Any) -> Optional[dict]:
if content is None:
return None
if isinstance(content, dict):
return content
if hasattr(content, "model_dump"):
dumped = content.model_dump(mode="json")
if isinstance(dumped, dict):
return dumped
raw_text = extract_text(content)
if not raw_text:
return None
try:
parsed = dirtyjson.loads(raw_text)
except Exception:
return None
if isinstance(parsed, dict):
return dict(parsed)
return None
def serialize_structured_content(content: Any) -> Optional[str]:
parsed = extract_structured_content(content)
if parsed is not None:
return json.dumps(parsed, ensure_ascii=False)
raw_text = extract_text(content)
if raw_text:
return raw_text
return None
def message_content_to_text(content: Sequence[Any] | str | None) -> Optional[str]:
joined = "".join(
part.text
for part in normalize_content_parts(content)
if isinstance(getattr(part, "text", None), str)
)
return joined or None
async def stream_generate_events(client: Any, **kwargs) -> AsyncGenerator[Any, None]:
loop = asyncio.get_running_loop()
queue: asyncio.Queue[Any] = asyncio.Queue()
sentinel = object()
def worker():
try:
for event in client.generate(**kwargs):
loop.call_soon_threadsafe(queue.put_nowait, event)
except Exception as exc:
loop.call_soon_threadsafe(queue.put_nowait, exc)
finally:
loop.call_soon_threadsafe(queue.put_nowait, sentinel)
worker_task = asyncio.create_task(asyncio.to_thread(worker))
try:
while True:
item = await queue.get()
if item is sentinel:
break
if isinstance(item, Exception):
raise item
yield item
finally:
await worker_task

View file

@ -73,10 +73,6 @@ def set_disable_image_generation_env(value):
os.environ["DISABLE_IMAGE_GENERATION"] = value
def set_tool_calls_env(value):
os.environ["TOOL_CALLS"] = value
def set_disable_thinking_env(value):
os.environ["DISABLE_THINKING"] = value

View file

@ -22,7 +22,6 @@ from utils.get_env import (
get_openai_api_key_env,
get_openai_model_env,
get_pexels_api_key_env,
get_tool_calls_env,
get_user_config_path_env,
get_image_provider_env,
get_pixabay_api_key_env,
@ -63,7 +62,6 @@ from utils.set_env import (
set_pexels_api_key_env,
set_image_provider_env,
set_pixabay_api_key_env,
set_tool_calls_env,
set_web_grounding_env,
set_codex_access_token_env,
set_codex_refresh_token_env,
@ -118,11 +116,6 @@ def get_user_config():
DALL_E_3_QUALITY=existing_config.DALL_E_3_QUALITY or get_dall_e_3_quality_env(),
GPT_IMAGE_1_5_QUALITY=existing_config.GPT_IMAGE_1_5_QUALITY
or get_gpt_image_1_5_quality_env(),
TOOL_CALLS=(
existing_config.TOOL_CALLS
if existing_config.TOOL_CALLS is not None
else (parse_bool_or_none(get_tool_calls_env()) or False)
),
DISABLE_THINKING=(
existing_config.DISABLE_THINKING
if existing_config.DISABLE_THINKING is not None
@ -197,8 +190,6 @@ def update_env_with_user_config():
set_dall_e_3_quality_env(user_config.DALL_E_3_QUALITY)
if user_config.GPT_IMAGE_1_5_QUALITY:
set_gpt_image_1_5_quality_env(user_config.GPT_IMAGE_1_5_QUALITY)
if user_config.TOOL_CALLS is not None:
set_tool_calls_env(str(user_config.TOOL_CALLS))
if user_config.DISABLE_THINKING is not None:
set_disable_thinking_env(str(user_config.DISABLE_THINKING))
if user_config.EXTENDED_REASONING is not None:

View file

@ -1,5 +1,5 @@
version = 1
revision = 3
revision = 2
requires-python = "==3.11.*"
[[package]]
@ -238,6 +238,34 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ea/44/b749f8777b020b420bceaaf60f66432fc30cc904ca5b69640ec9cbef11ed/blis-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:27f82b8633030f8d095d2b412dffa7eb6dbc8ee43813139909a20012e54422ea", size = 6171233, upload-time = "2025-11-17T12:27:41.921Z" },
]
[[package]]
name = "boto3"
version = "1.42.94"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "botocore" },
{ name = "jmespath" },
{ name = "s3transfer" },
]
sdist = { url = "https://files.pythonhosted.org/packages/6a/6a/95302333208830de932ad1d0b69599ee13e936349a44981fb72632507861/boto3-1.42.94.tar.gz", hash = "sha256:5b6056a661c19e974aaea3cb97690ddbe30d10c31e4f887df3bff06574f34510", size = 113211, upload-time = "2026-04-22T20:36:19.167Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c4/6f/4e175604f3168befcb413c95bf45eada67d12042f92f76a9305d6a817ea9/boto3-1.42.94-py3-none-any.whl", hash = "sha256:56d53bce75629cc7c78a32da8b62de74cee3e2a3d54a2b60ba1a65f9f1b129da", size = 140555, upload-time = "2026-04-22T20:36:16.182Z" },
]
[[package]]
name = "botocore"
version = "1.42.94"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "jmespath" },
{ name = "python-dateutil" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b7/90/1a4d0e81b325d38e37f81d907ceacac3b8f509ad38b495bb95086ecb609d/botocore-1.42.94.tar.gz", hash = "sha256:41c6b3b11b073221a41f52b222ba387be34459fb77cdc506e8b74cdaf24bdcce", size = 15260901, upload-time = "2026-04-22T20:36:00.853Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/61/73/313af9ee02ac0155247bcf3f04fcf54fcae2e33250bb437528c18aeefd81/botocore-1.42.94-py3-none-any.whl", hash = "sha256:a2143742132ed0f6cdb90204d667b89d0301068b1045e8bc099efa267bf1b348", size = 14942938, upload-time = "2026-04-22T20:35:55.663Z" },
]
[[package]]
name = "cachetools"
version = "7.0.6"
@ -783,7 +811,9 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fb/c6/dba32cab7e3a625b011aa5647486e2d28423a48845a2998c126dd69c85e1/greenlet-3.4.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:805bebb4945094acbab757d34d6e1098be6de8966009ab9ca54f06ff492def58", size = 285504, upload-time = "2026-04-08T15:52:14.071Z" },
{ url = "https://files.pythonhosted.org/packages/54/f4/7cb5c2b1feb9a1f50e038be79980dfa969aa91979e5e3a18fdbcfad2c517/greenlet-3.4.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:439fc2f12b9b512d9dfa681c5afe5f6b3232c708d13e6f02c845e0d9f4c2d8c6", size = 605476, upload-time = "2026-04-08T16:24:37.064Z" },
{ url = "https://files.pythonhosted.org/packages/d6/af/b66ab0b2f9a4c5a867c136bf66d9599f34f21a1bcca26a2884a29c450bd9/greenlet-3.4.0-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a70ed1cb0295bee1df57b63bf7f46b4e56a5c93709eea769c1fec1bb23a95875", size = 618336, upload-time = "2026-04-08T16:30:56.59Z" },
{ url = "https://files.pythonhosted.org/packages/6d/31/56c43d2b5de476f77d36ceeec436328533bff960a4cba9a07616e93063ab/greenlet-3.4.0-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8c5696c42e6bb5cfb7c6ff4453789081c66b9b91f061e5e9367fa15792644e76", size = 625045, upload-time = "2026-04-08T16:40:37.111Z" },
{ url = "https://files.pythonhosted.org/packages/e5/5c/8c5633ece6ba611d64bf2770219a98dd439921d6424e4e8cf16b0ac74ea5/greenlet-3.4.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c660bce1940a1acae5f51f0a064f1bc785d07ea16efcb4bc708090afc4d69e83", size = 613515, upload-time = "2026-04-08T15:56:32.478Z" },
{ url = "https://files.pythonhosted.org/packages/80/ca/704d4e2c90acb8bdf7ae593f5cbc95f58e82de95cc540fb75631c1054533/greenlet-3.4.0-cp311-cp311-manylinux_2_39_riscv64.whl", hash = "sha256:89995ce5ddcd2896d89615116dd39b9703bfa0c07b583b85b89bf1b5d6eddf81", size = 419745, upload-time = "2026-04-08T16:43:04.022Z" },
{ url = "https://files.pythonhosted.org/packages/a9/df/950d15bca0d90a0e7395eb777903060504cdb509b7b705631e8fb69ff415/greenlet-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ee407d4d1ca9dc632265aee1c8732c4a2d60adff848057cdebfe5fe94eb2c8a2", size = 1574623, upload-time = "2026-04-08T16:26:18.596Z" },
{ url = "https://files.pythonhosted.org/packages/1a/e7/0839afab829fcb7333c9ff6d80c040949510055d2d4d63251f0d1c7c804e/greenlet-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:956215d5e355fffa7c021d168728321fd4d31fd730ac609b1653b450f6a4bc71", size = 1639579, upload-time = "2026-04-08T15:57:29.231Z" },
{ url = "https://files.pythonhosted.org/packages/d9/2b/b4482401e9bcaf9f5c97f67ead38db89c19520ff6d0d6699979c6efcc200/greenlet-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:5cb614ace7c27571270354e9c9f696554d073f8aa9319079dcba466bbdead711", size = 238233, upload-time = "2026-04-08T17:02:54.286Z" },
@ -1057,6 +1087,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/99/8f/15e7741ff19e9bcd4d753f7ff22f988fd54592f134ca13701c13ea8c20e0/jiter-0.14.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e52c076f187405fc21523c746c04399c9af8ece566077ed147b2126f2bcba577", size = 351445, upload-time = "2026-04-10T14:28:33.093Z" },
]
[[package]]
name = "jmespath"
version = "1.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
]
[[package]]
name = "joblib"
version = "1.5.3"
@ -1146,6 +1185,28 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" },
]
[[package]]
name = "llmai"
version = "0.1.9"
source = { url = "https://files.pythonhosted.org/packages/c6/86/5dcfd77b634947cd570680b13217b40bc72cd7d9e7f04cc1a52ff5f549a0/llmai-0.1.9-py3-none-any.whl" }
dependencies = [
{ name = "anthropic" },
{ name = "boto3" },
{ name = "google-genai" },
{ name = "openai" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/c6/86/5dcfd77b634947cd570680b13217b40bc72cd7d9e7f04cc1a52ff5f549a0/llmai-0.1.9-py3-none-any.whl", hash = "sha256:dcd94502516586bbd6394fe2c9c610941ff4c19eae0f1316825435f35134cfb4" },
]
[package.metadata]
requires-dist = [
{ name = "anthropic", specifier = ">=0.79.0" },
{ name = "boto3", specifier = ">=1.42.89" },
{ name = "google-genai", specifier = ">=1.62.0" },
{ name = "openai", specifier = ">=2.18.0" },
]
[[package]]
name = "loguru"
version = "0.7.3"
@ -1604,13 +1665,13 @@ dependencies = [
{ name = "aiomysql" },
{ name = "aiosqlite" },
{ name = "alembic" },
{ name = "anthropic" },
{ name = "asyncpg" },
{ name = "dirtyjson" },
{ name = "fastapi", extra = ["standard"] },
{ name = "fastembed-vectorstore" },
{ name = "fastmcp" },
{ name = "google-genai" },
{ name = "llmai" },
{ name = "mem0ai", extra = ["nlp"] },
{ name = "nltk" },
{ name = "openai" },
@ -1626,13 +1687,13 @@ requires-dist = [
{ name = "aiomysql", specifier = ">=0.2.0" },
{ name = "aiosqlite", specifier = ">=0.21.0" },
{ name = "alembic", specifier = ">=1.14.0" },
{ name = "anthropic", specifier = ">=0.60.0" },
{ name = "asyncpg", specifier = ">=0.30.0" },
{ name = "dirtyjson", specifier = ">=1.0.8" },
{ name = "fastapi", extras = ["standard"], specifier = ">=0.116.1" },
{ name = "fastembed-vectorstore", specifier = ">=0.5.2" },
{ name = "fastmcp", specifier = ">=2.11.0" },
{ name = "google-genai", specifier = ">=1.28.0" },
{ name = "llmai", url = "https://files.pythonhosted.org/packages/c6/86/5dcfd77b634947cd570680b13217b40bc72cd7d9e7f04cc1a52ff5f549a0/llmai-0.1.9-py3-none-any.whl" },
{ name = "mem0ai", extras = ["nlp"], specifier = ">=0.1.115" },
{ name = "nltk", specifier = ">=3.9.1" },
{ name = "openai", specifier = ">=1.98.0" },
@ -2200,6 +2261,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" },
]
[[package]]
name = "s3transfer"
version = "0.16.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "botocore" },
]
sdist = { url = "https://files.pythonhosted.org/packages/46/29/af14f4ef3c11a50435308660e2cc68761c9a7742475e0585cd4396b91777/s3transfer-0.16.1.tar.gz", hash = "sha256:8e424355754b9ccb32467bdc568edf55be82692ef2002d934b1311dbb3b9e524", size = 154801, upload-time = "2026-04-22T20:36:06.475Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/03/19/90d7d4ed51932c022d53f1d02d564b62d10e272692a1f9b76425c1ad2a02/s3transfer-0.16.1-py3-none-any.whl", hash = "sha256:61bcd00ccb83b21a0fe7e91a553fff9729d46c83b4e0106e7c314a733891f7c2", size = 86825, upload-time = "2026-04-22T20:36:04.992Z" },
]
[[package]]
name = "secretstorage"
version = "3.5.0"

View file

@ -20,7 +20,6 @@ interface CustomConfigProps {
customLlmUrl: string;
customLlmApiKey: string;
customModel: string;
toolCalls: boolean;
disableThinking: boolean;
onInputChange: (value: string | boolean, field: string) => void;
}
@ -29,7 +28,6 @@ export default function CustomConfig({
customLlmUrl,
customLlmApiKey,
customModel,
toolCalls,
disableThinking,
onInputChange,
}: CustomConfigProps) {
@ -165,9 +163,8 @@ export default function CustomConfig({
<div className="mb-4">
<div className="mb-3 p-3 bg-amber-50 border border-amber-200 rounded-lg">
<p className="text-sm text-amber-800">
<strong>Important:</strong> Only models with function
calling capabilities (tool calls) or JSON schema support
will work.
<strong>Important:</strong> Only models with structured
JSON schema output support will work reliably.
</p>
</div>
<label className="block text-sm font-medium text-gray-700 mb-2">
@ -231,23 +228,6 @@ export default function CustomConfig({
</div>
</div>
)}
{/* Tool Calls Toggle */}
<div>
<div className="flex items-center justify-between mb-4 bg-green-50 p-2 rounded-sm">
<label className="text-sm font-medium text-gray-700">
Use Tool Calls
</label>
<Switch
checked={toolCalls}
onCheckedChange={(checked) => onInputChange(checked, "tool_calls")}
/>
</div>
<p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
<span className="block w-1 h-1 rounded-full bg-gray-400"></span>
If enabled, Tool Calls will be used instead of JSON Schema for Structured Output.
</p>
</div>
{/* Disable Thinking Toggle */}
<div>
<div className="flex items-center justify-between mb-4 bg-green-50 p-2 rounded-sm">
@ -266,4 +246,4 @@ export default function CustomConfig({
</div>
</div >
);
}
}

View file

@ -292,7 +292,6 @@ export default function LLMProviderSelection({
customLlmUrl={llmConfig.CUSTOM_LLM_URL || ""}
customLlmApiKey={llmConfig.CUSTOM_LLM_API_KEY || ""}
customModel={llmConfig.CUSTOM_MODEL || ""}
toolCalls={llmConfig.TOOL_CALLS || false}
disableThinking={llmConfig.DISABLE_THINKING || false}
onInputChange={input_field_changed}
/>

View file

@ -42,7 +42,6 @@ export interface LLMConfig {
GPT_IMAGE_1_5_QUALITY?: string;
// Other Configs
TOOL_CALLS?: boolean;
DISABLE_THINKING?: boolean;
EXTENDED_REASONING?: boolean;
WEB_GROUNDING?: boolean;

View file

@ -46,7 +46,6 @@ export const updateLLMConfig = (
image_provider: "IMAGE_PROVIDER",
disable_image_generation: "DISABLE_IMAGE_GENERATION",
use_custom_url: "USE_CUSTOM_URL",
tool_calls: "TOOL_CALLS",
disable_thinking: "DISABLE_THINKING",
extended_reasoning: "EXTENDED_REASONING",
web_grounding: "WEB_GROUNDING",
@ -244,4 +243,4 @@ export const pullOllamaModel = async (
void pollOnce();
}, 1000);
});
};
};

View file

@ -175,7 +175,6 @@ const setupUserConfigFromEnv = () => {
PIXABAY_API_KEY:
process.env.PIXABAY_API_KEY || existingConfig.PIXABAY_API_KEY,
IMAGE_PROVIDER: process.env.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER,
TOOL_CALLS: process.env.TOOL_CALLS || existingConfig.TOOL_CALLS,
DISABLE_THINKING:
process.env.DISABLE_THINKING || existingConfig.DISABLE_THINKING,
EXTENDED_REASONING: