feat: integrates llmai instead of using old llm client and tool call handlers
This commit is contained in:
parent
ad3b31a359
commit
f76d17314a
35 changed files with 695 additions and 3369 deletions
|
|
@ -214,7 +214,6 @@ Other optional variables exist in code (for example advanced Mem0 paths, LitePar
|
|||
- **CUSTOM_LLM_URL**: OpenAI-compatible base URL if **LLM** is **custom**.
|
||||
- **CUSTOM_LLM_API_KEY**: API key if **LLM** is **custom**.
|
||||
- **CUSTOM_MODEL**: Model id if **LLM** is **custom**.
|
||||
- **TOOL_CALLS**=[true/false]: If **true**, the custom LLM uses tool calls instead of JSON schema for structured output.
|
||||
- **DISABLE_THINKING**=[true/false]: If **true**, disables “thinking” on the custom LLM.
|
||||
- **WEB_GROUNDING**=[true/false]: If **true**, enables web search for OpenAI, Google, and Anthropic models.
|
||||
- **EXTENDED_REASONING**=[true/false]: Enables extended reasoning where supported by the configured stack.
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ services:
|
|||
- DALL_E_3_QUALITY=${DALL_E_3_QUALITY}
|
||||
- GPT_IMAGE_1_5_QUALITY=${GPT_IMAGE_1_5_QUALITY}
|
||||
- EXTENDED_REASONING=${EXTENDED_REASONING}
|
||||
- TOOL_CALLS=${TOOL_CALLS}
|
||||
- DISABLE_THINKING=${DISABLE_THINKING}
|
||||
- WEB_GROUNDING=${WEB_GROUNDING}
|
||||
- DATABASE_URL=${DATABASE_URL}
|
||||
|
|
@ -99,7 +98,6 @@ services:
|
|||
- DALL_E_3_QUALITY=${DALL_E_3_QUALITY}
|
||||
- GPT_IMAGE_1_5_QUALITY=${GPT_IMAGE_1_5_QUALITY}
|
||||
- EXTENDED_REASONING=${EXTENDED_REASONING}
|
||||
- TOOL_CALLS=${TOOL_CALLS}
|
||||
- DISABLE_THINKING=${DISABLE_THINKING}
|
||||
- WEB_GROUNDING=${WEB_GROUNDING}
|
||||
- DATABASE_URL=${DATABASE_URL}
|
||||
|
|
@ -158,7 +156,6 @@ services:
|
|||
- DALL_E_3_QUALITY=${DALL_E_3_QUALITY}
|
||||
- GPT_IMAGE_1_5_QUALITY=${GPT_IMAGE_1_5_QUALITY}
|
||||
- EXTENDED_REASONING=${EXTENDED_REASONING}
|
||||
- TOOL_CALLS=${TOOL_CALLS}
|
||||
- DISABLE_THINKING=${DISABLE_THINKING}
|
||||
- WEB_GROUNDING=${WEB_GROUNDING}
|
||||
- DATABASE_URL=${DATABASE_URL}
|
||||
|
|
@ -223,7 +220,6 @@ services:
|
|||
- DALL_E_3_QUALITY=${DALL_E_3_QUALITY}
|
||||
- GPT_IMAGE_1_5_QUALITY=${GPT_IMAGE_1_5_QUALITY}
|
||||
- EXTENDED_REASONING=${EXTENDED_REASONING}
|
||||
- TOOL_CALLS=${TOOL_CALLS}
|
||||
- DISABLE_THINKING=${DISABLE_THINKING}
|
||||
- WEB_GROUNDING=${WEB_GROUNDING}
|
||||
- DATABASE_URL=${DATABASE_URL}
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ from services.documents_loader import DocumentsLoader
|
|||
from services.mem0_presentation_memory_service import (
|
||||
MEM0_PRESENTATION_MEMORY_SERVICE,
|
||||
)
|
||||
from utils.llm_utils import message_content_to_text
|
||||
from utils.outline_utils import (
|
||||
get_no_of_outlines_to_generate_for_n_slides,
|
||||
get_presentation_title_from_presentation_outline,
|
||||
|
|
@ -85,12 +86,12 @@ async def stream_outlines(
|
|||
await MEM0_PRESENTATION_MEMORY_SERVICE.store_generation_context(
|
||||
presentation_id=presentation.id,
|
||||
system_prompt=(
|
||||
outline_messages[0].content
|
||||
message_content_to_text(outline_messages[0].content)
|
||||
if len(outline_messages) > 0
|
||||
else None
|
||||
),
|
||||
user_prompt=(
|
||||
outline_messages[1].content
|
||||
message_content_to_text(outline_messages[1].content)
|
||||
if len(outline_messages) > 1
|
||||
else None
|
||||
),
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ from utils.process_slides import (
|
|||
process_slide_and_fetch_assets,
|
||||
)
|
||||
from utils.get_layout_by_name import get_layout_by_name
|
||||
from utils.llm_utils import message_content_to_text
|
||||
from models.presentation_layout import PresentationLayoutModel
|
||||
import uuid
|
||||
|
||||
|
|
@ -666,12 +667,12 @@ async def generate_presentation_handler(
|
|||
await MEM0_PRESENTATION_MEMORY_SERVICE.store_generation_context(
|
||||
presentation_id=presentation_id,
|
||||
system_prompt=(
|
||||
outline_messages[0].content
|
||||
message_content_to_text(outline_messages[0].content)
|
||||
if len(outline_messages) > 0
|
||||
else None
|
||||
),
|
||||
user_prompt=(
|
||||
outline_messages[1].content
|
||||
message_content_to_text(outline_messages[1].content)
|
||||
if len(outline_messages) > 1
|
||||
else None
|
||||
),
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
from typing import Any, List, Literal, Optional
|
||||
from pydantic import BaseModel
|
||||
from google.genai.types import Content as GoogleContent
|
||||
|
||||
from models.llm_tool_call import AnthropicToolCall
|
||||
|
||||
|
||||
class LLMMessage(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
class LLMUserMessage(LLMMessage):
|
||||
role: Literal["user"] = "user"
|
||||
content: str
|
||||
|
||||
|
||||
class LLMSystemMessage(LLMMessage):
|
||||
role: Literal["system"] = "system"
|
||||
content: str
|
||||
|
||||
|
||||
class OpenAIAssistantMessage(LLMMessage):
|
||||
role: Literal["assistant"] = "assistant"
|
||||
content: str | None = None
|
||||
tool_calls: Optional[List[dict]] = None
|
||||
|
||||
|
||||
class GoogleAssistantMessage(LLMMessage):
|
||||
role: Literal["assistant"] = "assistant"
|
||||
content: GoogleContent
|
||||
|
||||
|
||||
class AnthropicAssistantMessage(LLMMessage):
|
||||
role: Literal["assistant"] = "assistant"
|
||||
content: List[AnthropicToolCall]
|
||||
|
||||
|
||||
class AnthropicToolCallMessage(LLMMessage):
|
||||
type: Literal["tool_result"] = "tool_result"
|
||||
tool_use_id: str
|
||||
content: str
|
||||
|
||||
|
||||
class AnthropicUserMessage(LLMMessage):
|
||||
role: Literal["user"] = "user"
|
||||
content: List[AnthropicToolCallMessage]
|
||||
|
||||
|
||||
class OpenAIToolCallMessage(LLMMessage):
|
||||
role: Literal["tool"] = "tool"
|
||||
content: str
|
||||
tool_call_id: str
|
||||
|
||||
|
||||
class GoogleToolCallMessage(LLMMessage):
|
||||
role: Literal["tool"] = "tool"
|
||||
id: Optional[str] = None
|
||||
name: str
|
||||
response: dict
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
from typing import Literal, Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class LLMToolCall(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
class OpenAIToolCallFunction(BaseModel):
|
||||
name: str
|
||||
arguments: str
|
||||
|
||||
|
||||
class OpenAIToolCall(LLMToolCall):
|
||||
id: str
|
||||
type: Literal["function"] = "function"
|
||||
function: OpenAIToolCallFunction
|
||||
|
||||
|
||||
class GoogleToolCall(LLMToolCall):
|
||||
id: Optional[str] = None
|
||||
name: str
|
||||
arguments: Optional[dict] = None
|
||||
|
||||
|
||||
class AnthropicToolCall(LLMToolCall):
|
||||
type: Literal["tool_use"] = "tool_use"
|
||||
id: str
|
||||
name: str
|
||||
input: object
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
from typing import Any, Callable, Coroutine, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class LLMTool(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
class LLMDynamicTool(LLMTool):
|
||||
name: str
|
||||
description: str
|
||||
parameters: dict = {}
|
||||
handler: Callable[..., Coroutine[Any, Any, str]]
|
||||
|
||||
|
||||
class SearchWebTool(LLMTool):
|
||||
"""
|
||||
Search the web for information.
|
||||
"""
|
||||
|
||||
query: str = Field(description="The query to search the web for")
|
||||
|
||||
|
||||
class GetCurrentDatetimeTool(LLMTool):
|
||||
"""
|
||||
Get the current datetime.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
|
@ -46,7 +46,6 @@ class UserConfig(BaseModel):
|
|||
GPT_IMAGE_1_5_QUALITY: Optional[str] = None
|
||||
|
||||
# Reasoning
|
||||
TOOL_CALLS: Optional[bool] = None
|
||||
DISABLE_THINKING: Optional[bool] = None
|
||||
EXTENDED_REASONING: Optional[bool] = None
|
||||
|
||||
|
|
|
|||
|
|
@ -7,19 +7,17 @@ Requires-Dist: alembic>=1.14.0
|
|||
Requires-Dist: aiohttp>=3.12.15
|
||||
Requires-Dist: aiomysql>=0.2.0
|
||||
Requires-Dist: aiosqlite>=0.21.0
|
||||
Requires-Dist: anthropic>=0.60.0
|
||||
Requires-Dist: asyncpg>=0.30.0
|
||||
Requires-Dist: chromadb>=1.0.15
|
||||
Requires-Dist: dirtyjson>=1.0.8
|
||||
Requires-Dist: fastapi[standard]>=0.116.1
|
||||
Requires-Dist: fastembed-vectorstore>=0.5.2
|
||||
Requires-Dist: fastmcp>=2.11.0
|
||||
Requires-Dist: google-genai>=1.28.0
|
||||
Requires-Dist: mem0ai[nlp]>=0.1.115
|
||||
Requires-Dist: nltk>=3.9.1
|
||||
Requires-Dist: openai>=1.98.0
|
||||
Requires-Dist: pathvalidate>=3.3.1
|
||||
Requires-Dist: pdfplumber>=0.11.7
|
||||
Requires-Dist: pytest>=8.4.1
|
||||
Requires-Dist: python-pptx>=1.0.2
|
||||
Requires-Dist: redis>=6.2.0
|
||||
Requires-Dist: sqlmodel>=0.0.24
|
||||
Requires-Dist: llmai==0.1.8
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ api/__init__.py
|
|||
api/lifespan.py
|
||||
api/main.py
|
||||
api/middlewares.py
|
||||
api/v1/auth/router.py
|
||||
api/v1/mock/router.py
|
||||
api/v1/ppt/background_tasks.py
|
||||
api/v1/ppt/router.py
|
||||
|
|
@ -46,9 +47,6 @@ models/document_chunk.py
|
|||
models/generate_presentation_request.py
|
||||
models/image_prompt.py
|
||||
models/json_path_guide.py
|
||||
models/llm_message.py
|
||||
models/llm_tool_call.py
|
||||
models/llm_tools.py
|
||||
models/ollama_model_metadata.py
|
||||
models/ollama_model_status.py
|
||||
models/pptx_models.py
|
||||
|
|
@ -78,7 +76,6 @@ presenton_backend.egg-info/dependency_links.txt
|
|||
presenton_backend.egg-info/requires.txt
|
||||
presenton_backend.egg-info/top_level.txt
|
||||
services/__init__.py
|
||||
services/codex_llm.py
|
||||
services/concurrent_service.py
|
||||
services/database.py
|
||||
services/document_conversion_service.py
|
||||
|
|
@ -88,8 +85,7 @@ services/html_to_text_runs_service.py
|
|||
services/icon_finder_service.py
|
||||
services/image_generation_service.py
|
||||
services/liteparse_service.py
|
||||
services/llm_client.py
|
||||
services/llm_tool_calls_handler.py
|
||||
services/mem0_presentation_memory_service.py
|
||||
services/pptx_presentation_creator.py
|
||||
services/score_based_chunker.py
|
||||
services/temp_file_service.py
|
||||
|
|
@ -106,7 +102,9 @@ templates/providers.py
|
|||
templates/router.py
|
||||
tests/test_gemini_schema_support.py
|
||||
tests/test_image_generation.py
|
||||
tests/test_liteparse_service.py
|
||||
tests/test_mcp_server.py
|
||||
tests/test_mem0_presentation_memory_service.py
|
||||
tests/test_openai_schema_support.py
|
||||
tests/test_pptx_creator.py
|
||||
tests/test_pptx_slides_processing.py
|
||||
|
|
@ -130,7 +128,9 @@ utils/get_layout_by_name.py
|
|||
utils/image_provider.py
|
||||
utils/image_utils.py
|
||||
utils/llm_client_error_handler.py
|
||||
utils/llm_config.py
|
||||
utils/llm_provider.py
|
||||
utils/llm_utils.py
|
||||
utils/model_availability.py
|
||||
utils/ocr_language.py
|
||||
utils/ollama.py
|
||||
|
|
@ -141,6 +141,7 @@ utils/ppt_utils.py
|
|||
utils/process_slides.py
|
||||
utils/schema_utils.py
|
||||
utils/set_env.py
|
||||
utils/simple_auth.py
|
||||
utils/theme_utils.py
|
||||
utils/user_config.py
|
||||
utils/validators.py
|
||||
|
|
|
|||
|
|
@ -2,19 +2,17 @@ alembic>=1.14.0
|
|||
aiohttp>=3.12.15
|
||||
aiomysql>=0.2.0
|
||||
aiosqlite>=0.21.0
|
||||
anthropic>=0.60.0
|
||||
asyncpg>=0.30.0
|
||||
chromadb>=1.0.15
|
||||
dirtyjson>=1.0.8
|
||||
fastapi[standard]>=0.116.1
|
||||
fastembed-vectorstore>=0.5.2
|
||||
fastmcp>=2.11.0
|
||||
google-genai>=1.28.0
|
||||
mem0ai[nlp]>=0.1.115
|
||||
nltk>=3.9.1
|
||||
openai>=1.98.0
|
||||
pathvalidate>=3.3.1
|
||||
pdfplumber>=0.11.7
|
||||
pytest>=8.4.1
|
||||
python-pptx>=1.0.2
|
||||
redis>=6.2.0
|
||||
sqlmodel>=0.0.24
|
||||
llmai==0.1.8
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ dependencies = [
|
|||
"aiohttp>=3.12.15",
|
||||
"aiomysql>=0.2.0",
|
||||
"aiosqlite>=0.21.0",
|
||||
"anthropic>=0.60.0",
|
||||
"asyncpg>=0.30.0",
|
||||
"dirtyjson>=1.0.8",
|
||||
"fastapi[standard]>=0.116.1",
|
||||
|
|
@ -26,11 +25,15 @@ dependencies = [
|
|||
"pdfplumber>=0.11.7",
|
||||
"python-pptx>=1.0.2",
|
||||
"sqlmodel>=0.0.24",
|
||||
"llmai==0.1.8",
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
index-strategy = "unsafe-best-match"
|
||||
|
||||
[tool.uv.sources]
|
||||
llmai = { url = "https://files.pythonhosted.org/packages/49/9e/64fb2453d9eace7fd50b25635ae267422d014c64861ac511a5e953884f85/llmai-0.1.8-py3-none-any.whl" }
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
include = ["api*", "enums*", "models*", "services*", "constants*", "utils*", "templates*"]
|
||||
|
|
|
|||
|
|
@ -1,431 +0,0 @@
|
|||
"""Codex (Responses API) adapter for structured and unstructured LLM calls.
|
||||
|
||||
Stateless adapter: receives AsyncOpenAI client and tool_calls_handler at call time.
|
||||
Auth and client creation stay in LLMClient. Structure matches other providers:
|
||||
generate = call API, collect content + tool_calls, recurse on tool_calls; stream = same but yield deltas.
|
||||
|
||||
Uses LLMToolCallsHandler directly: tools are parsed via parse_tools() in llm_client (handler supports
|
||||
Codex and returns OpenAI-style dicts); this module flattens them for the Responses API. Tool execution
|
||||
uses tool_calls_handler.handle_tool_calls_openai().
|
||||
"""
|
||||
|
||||
import dirtyjson
|
||||
from typing import Any, AsyncGenerator, List, Optional, Union
|
||||
|
||||
from fastapi import HTTPException
|
||||
from openai import APIStatusError, AsyncOpenAI, OpenAIError
|
||||
|
||||
from models.llm_message import (
|
||||
LLMMessage,
|
||||
OpenAIAssistantMessage,
|
||||
LLMSystemMessage,
|
||||
LLMUserMessage,
|
||||
)
|
||||
from models.llm_tool_call import OpenAIToolCall, OpenAIToolCallFunction
|
||||
from utils.schema_utils import ensure_strict_json_schema
|
||||
|
||||
# Responses API requires flat tool format: {"type":"function","name":...,"description":...,"parameters":...}
|
||||
RESPONSE_SCHEMA_NAME = "ResponseSchema"
|
||||
# Required tool choice for structured: force ResponseSchema (no plain-text fallback).
|
||||
STRUCTURED_TOOL_CHOICE = {"type": "function", "name": RESPONSE_SCHEMA_NAME}
|
||||
MAX_RECURSION_DEPTH = 5
|
||||
|
||||
|
||||
def _to_responses_tools(chat_tools: List[dict]) -> List[dict]:
|
||||
"""Convert Chat Completions tool format to flat Responses API format."""
|
||||
result = []
|
||||
for tool in chat_tools:
|
||||
if tool.get("type") != "function":
|
||||
result.append(tool)
|
||||
continue
|
||||
fn = tool.get("function") or tool
|
||||
result.append({
|
||||
"type": "function",
|
||||
"name": fn.get("name", ""),
|
||||
"description": fn.get("description", ""),
|
||||
"parameters": fn.get("parameters", {}),
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
def _items_to_openai_calls(items_by_id: dict[str, dict]) -> List[OpenAIToolCall]:
|
||||
"""Build OpenAIToolCall list from Responses API output_item map."""
|
||||
return [
|
||||
OpenAIToolCall(
|
||||
id=item.get("call_id", item.get("id", "")),
|
||||
type="function",
|
||||
function=OpenAIToolCallFunction(
|
||||
name=item.get("name", ""),
|
||||
arguments=item.get("arguments", "{}"),
|
||||
),
|
||||
)
|
||||
for item in items_by_id.values()
|
||||
]
|
||||
|
||||
|
||||
async def _messages_after_tool_turn(
|
||||
messages: List[LLMMessage],
|
||||
items_by_id: dict[str, dict],
|
||||
tool_calls_handler: Any,
|
||||
) -> List[LLMMessage]:
|
||||
"""Handle tool calls and return messages extended with assistant turn + tool results."""
|
||||
openai_calls = _items_to_openai_calls(items_by_id)
|
||||
tool_call_messages = await tool_calls_handler.handle_tool_calls_openai(openai_calls)
|
||||
return [
|
||||
*messages,
|
||||
OpenAIAssistantMessage(
|
||||
role="assistant",
|
||||
content=None,
|
||||
tool_calls=[tc.model_dump() for tc in openai_calls],
|
||||
),
|
||||
*tool_call_messages,
|
||||
]
|
||||
|
||||
|
||||
def _build_body(
|
||||
model: str,
|
||||
messages: List[LLMMessage],
|
||||
tools: Optional[List[dict]] = None,
|
||||
tool_choice: Optional[Union[str, dict]] = None,
|
||||
) -> dict:
|
||||
"""Build Responses API request body."""
|
||||
instructions = None
|
||||
input_messages = []
|
||||
|
||||
for msg in messages:
|
||||
if isinstance(msg, LLMSystemMessage):
|
||||
instructions = msg.content
|
||||
elif isinstance(msg, LLMUserMessage):
|
||||
input_messages.append({
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": msg.content}],
|
||||
})
|
||||
elif isinstance(msg, OpenAIAssistantMessage):
|
||||
text = msg.content or ""
|
||||
if text:
|
||||
input_messages.append({
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": text}],
|
||||
})
|
||||
else:
|
||||
text = getattr(msg, "content", "") or ""
|
||||
if text:
|
||||
input_messages.append({
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": text}],
|
||||
})
|
||||
|
||||
body: dict = {
|
||||
"model": model,
|
||||
"store": False,
|
||||
"stream": True,
|
||||
"text": {"verbosity": "medium"},
|
||||
"include": ["reasoning.encrypted_content"],
|
||||
"tool_choice": tool_choice if tool_choice is not None else "auto",
|
||||
"parallel_tool_calls": True,
|
||||
}
|
||||
if instructions:
|
||||
body["instructions"] = instructions
|
||||
if input_messages:
|
||||
body["input"] = input_messages
|
||||
if tools:
|
||||
body["tools"] = tools
|
||||
|
||||
return body
|
||||
|
||||
|
||||
def _event_to_dict(event: Any) -> dict:
|
||||
"""Convert SDK event to dict."""
|
||||
if hasattr(event, "model_dump"):
|
||||
return event.model_dump()
|
||||
return {
|
||||
"type": getattr(event, "type", None),
|
||||
"delta": getattr(event, "delta", None),
|
||||
"item": getattr(event, "item", None),
|
||||
"message": getattr(event, "message", None),
|
||||
"arguments": getattr(event, "arguments", None),
|
||||
"name": getattr(event, "name", None),
|
||||
}
|
||||
|
||||
|
||||
async def _stream_raw(
|
||||
client: AsyncOpenAI,
|
||||
model: str,
|
||||
messages: List[LLMMessage],
|
||||
tools: Optional[List[dict]] = None,
|
||||
tool_choice: Optional[Union[str, dict]] = None,
|
||||
) -> AsyncGenerator[dict, None]:
|
||||
"""Yield raw SSE event dicts from Codex Responses API."""
|
||||
body = _build_body(model, messages, tools, tool_choice=tool_choice)
|
||||
create_kwargs = {k: v for k, v in body.items() if k != "stream"}
|
||||
|
||||
try:
|
||||
stream = await client.responses.create(stream=True, **create_kwargs)
|
||||
except (APIStatusError, OpenAIError) as e:
|
||||
status = getattr(e, "status_code", 502)
|
||||
detail = getattr(e, "message", str(e)) or str(e)
|
||||
raise HTTPException(
|
||||
status_code=status,
|
||||
detail=f"Codex API error: {detail}"[:400],
|
||||
) from e
|
||||
|
||||
async for event in stream:
|
||||
yield _event_to_dict(event)
|
||||
|
||||
|
||||
class CodexLLMAdapter:
|
||||
"""Stateless adapter for Codex Responses API. Matches other providers: generate/stream + tool recursion."""
|
||||
|
||||
@staticmethod
|
||||
async def generate_codex(
|
||||
client: AsyncOpenAI,
|
||||
model: str,
|
||||
messages: List[LLMMessage],
|
||||
tool_calls_handler: Any,
|
||||
max_tokens: Optional[int] = None,
|
||||
tools: Optional[List[dict]] = None,
|
||||
depth: int = 0,
|
||||
) -> Optional[str]:
|
||||
"""Generate text; on tool_calls handle and recurse (like _generate_openai / _generate_anthropic)."""
|
||||
print(
|
||||
f"Codex generate: model={model} depth={depth} tools_count={len(tools) if tools else 0}"
|
||||
)
|
||||
responses_tools = _to_responses_tools(tools) if tools else None
|
||||
text_parts: List[str] = []
|
||||
tool_calls_by_id: dict[str, dict] = {}
|
||||
|
||||
async for event in _stream_raw(client, model, messages, responses_tools, tool_choice=None):
|
||||
event_type = event.get("type", "")
|
||||
|
||||
if event_type == "response.output_text.delta":
|
||||
delta = event.get("delta", "")
|
||||
if delta:
|
||||
text_parts.append(delta)
|
||||
elif event_type == "response.output_item.done":
|
||||
item = event.get("item") or {}
|
||||
if item.get("type") == "function_call":
|
||||
tool_calls_by_id[item.get("call_id", item.get("id", ""))] = item
|
||||
elif event_type in ("response.failed", "error"):
|
||||
msg_text = event.get("message") or str(event)
|
||||
raise HTTPException(status_code=502, detail=f"Codex error: {msg_text}")
|
||||
|
||||
if tool_calls_by_id and tools and depth < MAX_RECURSION_DEPTH:
|
||||
print(
|
||||
f"Codex generate: tool calls detected depth={depth} count={len(tool_calls_by_id)}"
|
||||
)
|
||||
new_messages = await _messages_after_tool_turn(
|
||||
messages, tool_calls_by_id, tool_calls_handler
|
||||
)
|
||||
return await CodexLLMAdapter.generate_codex(
|
||||
client, model, new_messages, tool_calls_handler,
|
||||
max_tokens=max_tokens, tools=tools, depth=depth + 1,
|
||||
)
|
||||
|
||||
return "".join(text_parts) or None
|
||||
|
||||
@staticmethod
|
||||
async def stream_codex(
|
||||
client: AsyncOpenAI,
|
||||
model: str,
|
||||
messages: List[LLMMessage],
|
||||
tool_calls_handler: Any,
|
||||
max_tokens: Optional[int] = None,
|
||||
tools: Optional[List[dict]] = None,
|
||||
depth: int = 0,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""Stream text deltas; on tool_calls handle and recurse (like _stream_openai)."""
|
||||
print(
|
||||
f"Codex stream: model={model} depth={depth} tools_count={len(tools) if tools else 0}"
|
||||
)
|
||||
responses_tools = _to_responses_tools(tools) if tools else None
|
||||
tool_calls_by_id: dict[str, dict] = {}
|
||||
|
||||
async for event in _stream_raw(client, model, messages, responses_tools, tool_choice=None):
|
||||
event_type = event.get("type", "")
|
||||
|
||||
if event_type == "response.output_text.delta":
|
||||
delta = event.get("delta", "")
|
||||
if delta:
|
||||
yield delta
|
||||
elif event_type == "response.output_item.done":
|
||||
item = event.get("item") or {}
|
||||
if item.get("type") == "function_call":
|
||||
tool_calls_by_id[item.get("call_id", item.get("id", ""))] = item
|
||||
elif event_type in ("response.failed", "error"):
|
||||
msg_text = event.get("message") or str(event)
|
||||
raise HTTPException(status_code=502, detail=f"Codex stream error: {msg_text}")
|
||||
|
||||
if tool_calls_by_id and tools and depth < MAX_RECURSION_DEPTH:
|
||||
print(
|
||||
f"Codex stream: tool calls detected depth={depth} count={len(tool_calls_by_id)}"
|
||||
)
|
||||
new_messages = await _messages_after_tool_turn(
|
||||
messages, tool_calls_by_id, tool_calls_handler
|
||||
)
|
||||
async for chunk in CodexLLMAdapter.stream_codex(
|
||||
client, model, new_messages, tool_calls_handler,
|
||||
max_tokens=max_tokens, tools=tools, depth=depth + 1,
|
||||
):
|
||||
yield chunk
|
||||
|
||||
@staticmethod
|
||||
async def stream_codex_structured(
|
||||
client: AsyncOpenAI,
|
||||
model: str,
|
||||
messages: List[LLMMessage],
|
||||
response_format: dict,
|
||||
tool_calls_handler: Any,
|
||||
strict: bool = False,
|
||||
max_tokens: Optional[int] = None,
|
||||
tools: Optional[List[dict]] = None,
|
||||
depth: int = 0,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""Stream JSON chunks from ResponseSchema tool; recurse for other tool_calls.
|
||||
|
||||
Structured output is achieved by always adding an internal ResponseSchema "tool"
|
||||
(with response_format as its parameters) and tool_choice=ResponseSchema. So
|
||||
user_tools=0 only means no extra tools like web search; we still use the
|
||||
ResponseSchema tool to receive the model's JSON.
|
||||
"""
|
||||
user_tools_count = len(tools) if tools else 0
|
||||
print(
|
||||
f"Codex stream_structured: model={model} depth={depth} strict={strict} "
|
||||
f"user_tools={user_tools_count} (always adding ResponseSchema tool for structured JSON)"
|
||||
)
|
||||
schema = ensure_strict_json_schema(response_format, path=(), root=response_format) if strict and depth == 0 else response_format
|
||||
response_schema_tool = {
|
||||
"type": "function",
|
||||
"name": RESPONSE_SCHEMA_NAME,
|
||||
"description": "Provide response to the user",
|
||||
"parameters": schema,
|
||||
}
|
||||
all_tools: List[dict] = [response_schema_tool]
|
||||
if tools:
|
||||
all_tools.extend(_to_responses_tools(tools))
|
||||
|
||||
tool_calls_by_id: dict[str, dict] = {}
|
||||
current_call_id: Optional[str] = None
|
||||
|
||||
async for event in _stream_raw(
|
||||
client, model, messages, all_tools, tool_choice=STRUCTURED_TOOL_CHOICE
|
||||
):
|
||||
event_type = event.get("type", "")
|
||||
|
||||
if event_type == "response.output_item.added":
|
||||
item = event.get("item") or {}
|
||||
if item.get("type") == "function_call" and item.get("name") == RESPONSE_SCHEMA_NAME:
|
||||
current_call_id = item.get("call_id", item.get("id"))
|
||||
print(
|
||||
f"Codex stream_structured: ResponseSchema call started call_id={current_call_id}"
|
||||
)
|
||||
|
||||
elif event_type == "response.function_call_arguments.delta":
|
||||
if current_call_id is not None:
|
||||
delta = event.get("delta", "")
|
||||
if delta:
|
||||
# Log only first few chunks to avoid log spam
|
||||
print(
|
||||
f"Codex stream_structured: ResponseSchema delta chunk len={len(delta)}"
|
||||
)
|
||||
yield delta
|
||||
|
||||
elif event_type == "response.function_call_arguments.done":
|
||||
if event.get("name") == RESPONSE_SCHEMA_NAME:
|
||||
arguments = event.get("arguments", "")
|
||||
if arguments:
|
||||
print(
|
||||
f"Codex stream_structured: ResponseSchema arguments.done len={len(arguments)}"
|
||||
)
|
||||
yield arguments
|
||||
|
||||
elif event_type == "response.output_item.done":
|
||||
item = event.get("item") or {}
|
||||
if item.get("type") == "function_call":
|
||||
tool_calls_by_id[item.get("call_id", item.get("id", ""))] = item
|
||||
if item.get("name") == RESPONSE_SCHEMA_NAME:
|
||||
arguments = item.get("arguments", "")
|
||||
if arguments:
|
||||
print(
|
||||
f"Codex stream_structured: ResponseSchema output_item.done len={len(arguments)}"
|
||||
)
|
||||
yield arguments
|
||||
|
||||
elif event_type in ("response.failed", "error"):
|
||||
msg_text = event.get("message") or str(event)
|
||||
raise HTTPException(status_code=502, detail=f"Codex structured error: {msg_text}")
|
||||
|
||||
other_tool_calls = {
|
||||
k: v for k, v in tool_calls_by_id.items()
|
||||
if v.get("name") != RESPONSE_SCHEMA_NAME
|
||||
}
|
||||
if other_tool_calls and tools and depth < MAX_RECURSION_DEPTH:
|
||||
print(
|
||||
f"Codex stream_structured: recursing for non-ResponseSchema tool calls "
|
||||
f"depth={depth} count={len(other_tool_calls)}"
|
||||
)
|
||||
new_messages = await _messages_after_tool_turn(
|
||||
messages, other_tool_calls, tool_calls_handler
|
||||
)
|
||||
async for chunk in CodexLLMAdapter.stream_codex_structured(
|
||||
client, model, new_messages, response_format, tool_calls_handler,
|
||||
strict=strict, max_tokens=max_tokens, tools=tools, depth=depth + 1,
|
||||
):
|
||||
yield chunk
|
||||
|
||||
@staticmethod
|
||||
async def generate_codex_structured(
|
||||
client: AsyncOpenAI,
|
||||
model: str,
|
||||
messages: List[LLMMessage],
|
||||
response_format: dict,
|
||||
tool_calls_handler: Any,
|
||||
strict: bool = False,
|
||||
max_tokens: Optional[int] = None,
|
||||
tools: Optional[List[dict]] = None,
|
||||
depth: int = 0,
|
||||
) -> Optional[dict]:
|
||||
"""Collect stream and parse JSON (like _generate_openai_structured)."""
|
||||
user_tools_count = len(tools) if tools else 0
|
||||
print(
|
||||
f"Codex generate_structured: model={model} depth={depth} strict={strict} "
|
||||
f"user_tools={user_tools_count} (using ResponseSchema tool for structured JSON)"
|
||||
)
|
||||
accumulated: List[str] = []
|
||||
async for chunk in CodexLLMAdapter.stream_codex_structured(
|
||||
client, model, messages, response_format, tool_calls_handler,
|
||||
strict=strict, max_tokens=max_tokens, tools=tools, depth=depth,
|
||||
):
|
||||
accumulated.append(chunk)
|
||||
|
||||
raw = "".join(accumulated)
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
if depth == 0:
|
||||
try:
|
||||
parsed = dict(dirtyjson.loads(raw))
|
||||
print(
|
||||
f"Codex generate_structured: parsed JSON keys={list(parsed.keys())[:8]}"
|
||||
)
|
||||
return parsed
|
||||
except Exception:
|
||||
start = raw.find("{")
|
||||
if start >= 0:
|
||||
try:
|
||||
parsed = dict(dirtyjson.loads(raw[start:]))
|
||||
print(
|
||||
"Codex generate_structured: parsed JSON from offset "
|
||||
f"{start} keys={list(parsed.keys())[:8]}"
|
||||
)
|
||||
return parsed
|
||||
except Exception:
|
||||
pass
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail=(
|
||||
"Model did not return valid structured output (expected JSON from ResponseSchema). "
|
||||
"Please retry."
|
||||
),
|
||||
)
|
||||
|
||||
return None
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,211 +0,0 @@
|
|||
import asyncio
|
||||
from datetime import datetime
|
||||
import json
|
||||
from typing import Any, Callable, Coroutine, List, Optional
|
||||
from fastapi import HTTPException
|
||||
from enums.llm_provider import LLMProvider
|
||||
from models.llm_message import (
|
||||
AnthropicToolCallMessage,
|
||||
GoogleToolCallMessage,
|
||||
OpenAIToolCallMessage,
|
||||
)
|
||||
from models.llm_tool_call import AnthropicToolCall, GoogleToolCall, OpenAIToolCall
|
||||
from models.llm_tools import LLMDynamicTool, LLMTool, SearchWebTool
|
||||
from utils.schema_utils import (
|
||||
ensure_strict_json_schema,
|
||||
flatten_json_schema,
|
||||
remove_titles_from_schema,
|
||||
)
|
||||
|
||||
|
||||
class LLMToolCallsHandler:
|
||||
def __init__(self, client):
|
||||
from services.llm_client import LLMClient
|
||||
|
||||
self.client: LLMClient = client
|
||||
|
||||
self.tools_map: dict[str, Callable[..., Coroutine[Any, Any, str]]] = {
|
||||
"SearchWebTool": self.search_web_tool_call_handler,
|
||||
"GetCurrentDatetimeTool": self.get_current_datetime_tool_call_handler,
|
||||
}
|
||||
self.dynamic_tools: List[LLMDynamicTool] = []
|
||||
|
||||
def get_tool_handler(
|
||||
self, tool_name: str
|
||||
) -> Callable[..., Coroutine[Any, Any, str]]:
|
||||
handler = self.tools_map.get(tool_name)
|
||||
if handler:
|
||||
return handler
|
||||
else:
|
||||
dynamic_tools = list(
|
||||
filter(lambda tool: tool.name == tool_name, self.dynamic_tools)
|
||||
)
|
||||
if dynamic_tools:
|
||||
return dynamic_tools[0].handler
|
||||
raise HTTPException(status_code=500, detail=f"Tool {tool_name} not found")
|
||||
|
||||
def parse_tools(self, tools: Optional[List[type[LLMTool] | LLMDynamicTool]] = None):
|
||||
if tools is None:
|
||||
return None
|
||||
parsed_tools = map(self.parse_tool, tools)
|
||||
return list(parsed_tools)
|
||||
|
||||
def parse_tool(self, tool: type[LLMTool] | LLMDynamicTool, strict: bool = False):
|
||||
if isinstance(tool, LLMDynamicTool):
|
||||
self.dynamic_tools.append(tool)
|
||||
|
||||
match self.client.llm_provider:
|
||||
case LLMProvider.OPENAI | LLMProvider.OLLAMA | LLMProvider.CUSTOM:
|
||||
return self.parse_tool_openai(tool, strict)
|
||||
case LLMProvider.ANTHROPIC:
|
||||
return self.parse_tool_anthropic(tool)
|
||||
case LLMProvider.GOOGLE:
|
||||
return self.parse_tool_google(tool)
|
||||
case _:
|
||||
raise ValueError(
|
||||
f"LLM provider must be either openai, anthropic, or google"
|
||||
)
|
||||
|
||||
def parse_tool_openai(
|
||||
self, tool: type[LLMTool] | LLMDynamicTool, strict: bool = False
|
||||
):
|
||||
if isinstance(tool, LLMDynamicTool):
|
||||
name = tool.name
|
||||
description = tool.description
|
||||
parameters = tool.parameters
|
||||
else:
|
||||
name = tool.__name__
|
||||
description = tool.__doc__ or ""
|
||||
parameters = tool.model_json_schema()
|
||||
|
||||
if strict:
|
||||
parameters = ensure_strict_json_schema(parameters, path=(), root=parameters)
|
||||
|
||||
return {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": name,
|
||||
"description": description,
|
||||
"strict": strict,
|
||||
"parameters": parameters,
|
||||
},
|
||||
}
|
||||
|
||||
def parse_tool_google(self, tool: type[LLMTool] | LLMDynamicTool):
|
||||
parsed = self.parse_tool_openai(tool)
|
||||
parsed["function"]["parameters"] = (
|
||||
remove_titles_from_schema(
|
||||
flatten_json_schema(parsed["function"]["parameters"])
|
||||
)
|
||||
if parsed["function"]["parameters"]
|
||||
else {}
|
||||
)
|
||||
return {
|
||||
"name": parsed["function"]["name"],
|
||||
"description": parsed["function"]["description"],
|
||||
"parameters": parsed["function"]["parameters"],
|
||||
}
|
||||
|
||||
def parse_tool_anthropic(self, tool: type[LLMTool] | LLMDynamicTool):
|
||||
parsed = self.parse_tool_openai(tool)
|
||||
input_schema = parsed["function"]["parameters"]
|
||||
return {
|
||||
"name": parsed["function"]["name"],
|
||||
"description": parsed["function"]["description"],
|
||||
"input_schema": {"type": "object"} if input_schema == {} else input_schema,
|
||||
}
|
||||
|
||||
async def handle_tool_calls_openai(
|
||||
self,
|
||||
tool_calls: List[OpenAIToolCall],
|
||||
) -> List[OpenAIToolCallMessage]:
|
||||
async_tool_calls_tasks = []
|
||||
for tool_call in tool_calls:
|
||||
tool_name = tool_call.function.name
|
||||
tool_handler = self.get_tool_handler(tool_name)
|
||||
async_tool_calls_tasks.append(tool_handler(tool_call.function.arguments))
|
||||
|
||||
tool_call_results: List[str] = await asyncio.gather(*async_tool_calls_tasks)
|
||||
tool_call_messages = [
|
||||
OpenAIToolCallMessage(
|
||||
content=result,
|
||||
tool_call_id=tool_call.id,
|
||||
)
|
||||
for tool_call, result in zip(tool_calls, tool_call_results)
|
||||
]
|
||||
return tool_call_messages
|
||||
|
||||
async def handle_tool_calls_google(
|
||||
self,
|
||||
tool_calls: List[GoogleToolCall],
|
||||
) -> List[GoogleToolCallMessage]:
|
||||
async_tool_calls_tasks = []
|
||||
for tool_call in tool_calls:
|
||||
tool_name = tool_call.name
|
||||
tool_handler = self.get_tool_handler(tool_name)
|
||||
async_tool_calls_tasks.append(tool_handler(json.dumps(tool_call.arguments)))
|
||||
|
||||
tool_call_results: List[str] = await asyncio.gather(*async_tool_calls_tasks)
|
||||
|
||||
tool_call_messages = [
|
||||
GoogleToolCallMessage(
|
||||
id=tool_call.id,
|
||||
name=tool_call.name,
|
||||
response={"result": result},
|
||||
)
|
||||
for tool_call, result in zip(tool_calls, tool_call_results)
|
||||
]
|
||||
return tool_call_messages
|
||||
|
||||
async def handle_tool_calls_anthropic(
|
||||
self,
|
||||
tool_calls: List[AnthropicToolCall],
|
||||
) -> List[AnthropicToolCallMessage]:
|
||||
async_tool_calls_tasks = []
|
||||
for tool_call in tool_calls:
|
||||
tool_name = tool_call.name
|
||||
tool_handler = self.get_tool_handler(tool_name)
|
||||
async_tool_calls_tasks.append(tool_handler(json.dumps(tool_call.input)))
|
||||
|
||||
tool_call_results: List[str] = await asyncio.gather(*async_tool_calls_tasks)
|
||||
tool_call_messages = [
|
||||
AnthropicToolCallMessage(
|
||||
content=result,
|
||||
tool_use_id=tool_call.id,
|
||||
)
|
||||
for tool_call, result in zip(tool_calls, tool_call_results)
|
||||
]
|
||||
return tool_call_messages
|
||||
|
||||
# ? Tool call handlers
|
||||
# Search web tool call handler
|
||||
async def search_web_tool_call_handler(self, arguments: str) -> str:
|
||||
match self.client.llm_provider:
|
||||
case LLMProvider.OPENAI:
|
||||
return await self.search_web_tool_call_handler_openai(arguments)
|
||||
case LLMProvider.ANTHROPIC:
|
||||
return await self.search_web_tool_call_handler_anthropic(arguments)
|
||||
case LLMProvider.GOOGLE:
|
||||
return await self.search_web_tool_call_handler_google(arguments)
|
||||
case _:
|
||||
return (
|
||||
"Web search tool call handler not implemented for this LLM provider: "
|
||||
+ self.client.llm_provider.value
|
||||
)
|
||||
|
||||
async def search_web_tool_call_handler_openai(self, arguments: str) -> str:
|
||||
args = SearchWebTool.model_validate_json(arguments)
|
||||
return await self.client._search_openai(args.query)
|
||||
|
||||
async def search_web_tool_call_handler_google(self, arguments: str) -> str:
|
||||
args = SearchWebTool.model_validate_json(arguments)
|
||||
return await self.client._search_google(args.query)
|
||||
|
||||
async def search_web_tool_call_handler_anthropic(self, arguments: str) -> str:
|
||||
args = SearchWebTool.model_validate_json(arguments)
|
||||
return await self.client._search_anthropic(args.query)
|
||||
|
||||
# Get current datetime tool call handler
|
||||
async def get_current_datetime_tool_call_handler(self, _) -> str:
|
||||
current_time = datetime.now()
|
||||
return f"{current_time.strftime('%A, %B %d, %Y')} at {current_time.strftime('%I:%M:%S %p')}"
|
||||
|
|
@ -4,10 +4,17 @@ from dataclasses import dataclass
|
|||
import time
|
||||
from typing import Any, Awaitable, Callable, Optional
|
||||
|
||||
from anthropic import AsyncAnthropic
|
||||
from fastapi import HTTPException
|
||||
from google import genai
|
||||
from google.genai import types as google_types
|
||||
from llmai import AnthropicClient
|
||||
from llmai.shared import (
|
||||
AnthropicClientConfig,
|
||||
ImageContentPart,
|
||||
SystemMessage,
|
||||
TextResponse,
|
||||
UserMessage,
|
||||
)
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from enums.llm_provider import LLMProvider
|
||||
|
|
@ -160,11 +167,28 @@ def _get_google_client() -> genai.Client:
|
|||
return genai.Client(api_key=api_key)
|
||||
|
||||
|
||||
def _get_anthropic_client() -> AsyncAnthropic:
|
||||
def _get_anthropic_client() -> AnthropicClient:
|
||||
api_key = get_anthropic_api_key_env()
|
||||
if not api_key:
|
||||
raise HTTPException(status_code=400, detail="ANTHROPIC_API_KEY is not set")
|
||||
return AsyncAnthropic(api_key=api_key)
|
||||
return AnthropicClient(config=AnthropicClientConfig(api_key=api_key))
|
||||
|
||||
|
||||
def _read_llmai_response_text(response: Any) -> str:
|
||||
content = getattr(response, "content", None)
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
parts.append(part)
|
||||
continue
|
||||
text = getattr(part, "text", None)
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
return "".join(parts)
|
||||
return getattr(content, "text", None) or ""
|
||||
|
||||
|
||||
async def _call_openai_like(
|
||||
|
|
@ -308,28 +332,24 @@ async def _call_anthropic(
|
|||
media_type: str = "image/png",
|
||||
) -> str:
|
||||
client = _get_anthropic_client()
|
||||
content = [{"type": "text", "text": user_text}]
|
||||
content: str | list[object] = user_text
|
||||
if image_bytes:
|
||||
content.append(
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": media_type,
|
||||
"data": base64.b64encode(image_bytes).decode("utf-8"),
|
||||
},
|
||||
}
|
||||
)
|
||||
content = [
|
||||
user_text,
|
||||
ImageContentPart(data=image_bytes, mime_type=media_type),
|
||||
]
|
||||
|
||||
response = await client.messages.create(
|
||||
response = await asyncio.to_thread(
|
||||
client.generate,
|
||||
model=model,
|
||||
messages=[
|
||||
SystemMessage(content=system_prompt),
|
||||
UserMessage(content=content),
|
||||
],
|
||||
response_format=TextResponse(),
|
||||
max_tokens=8192,
|
||||
system=system_prompt,
|
||||
messages=[{"role": "user", "content": content}],
|
||||
)
|
||||
output_text = "".join(
|
||||
block.text for block in response.content if getattr(block, "type", None) == "text"
|
||||
)
|
||||
output_text = _read_llmai_response_text(response)
|
||||
if not output_text:
|
||||
raise HTTPException(status_code=500, detail="No output from template provider")
|
||||
return output_text
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from anthropic import AsyncAnthropic
|
||||
import aiohttp
|
||||
from openai import AsyncOpenAI
|
||||
from google import genai
|
||||
|
||||
|
|
@ -12,8 +12,21 @@ async def list_available_openai_compatible_models(url: str, api_key: str) -> lis
|
|||
|
||||
|
||||
async def list_available_anthropic_models(api_key: str) -> list[str]:
|
||||
client = AsyncAnthropic(api_key=api_key)
|
||||
return list(map(lambda x: x.id, (await client.models.list(limit=50)).data))
|
||||
async with aiohttp.ClientSession(
|
||||
headers={
|
||||
"x-api-key": api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
}
|
||||
) as session:
|
||||
async with session.get(
|
||||
"https://api.anthropic.com/v1/models",
|
||||
params={"limit": 50},
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
data = await response.json()
|
||||
|
||||
models = data.get("data", [])
|
||||
return [model.get("id") for model in models if model.get("id")]
|
||||
|
||||
|
||||
async def list_available_google_models(api_key: str) -> list[str]:
|
||||
|
|
|
|||
|
|
@ -85,10 +85,6 @@ def get_pixabay_api_key_env():
|
|||
return os.getenv("PIXABAY_API_KEY")
|
||||
|
||||
|
||||
def get_tool_calls_env():
|
||||
return os.getenv("TOOL_CALLS")
|
||||
|
||||
|
||||
def get_disable_thinking_env():
|
||||
return os.getenv("DISABLE_THINKING")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,14 @@
|
|||
import asyncio
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from models.llm_message import LLMSystemMessage, LLMUserMessage
|
||||
from fastapi import HTTPException
|
||||
from llmai import get_client
|
||||
from llmai.shared import JSONSchemaResponse, Message, SystemMessage, UserMessage
|
||||
from models.presentation_layout import SlideLayoutModel
|
||||
from models.sql.slide import SlideModel
|
||||
from services.llm_client import LLMClient
|
||||
from utils.llm_config import get_llm_config
|
||||
from utils.llm_client_error_handler import handle_llm_client_exceptions
|
||||
from utils.llm_utils import extract_structured_content, get_generate_kwargs
|
||||
from utils.llm_provider import get_model
|
||||
from utils.schema_utils import add_field_in_schema, remove_fields_from_schema
|
||||
|
||||
|
|
@ -89,12 +93,12 @@ def get_messages(
|
|||
verbosity: Optional[str] = None,
|
||||
instructions: Optional[str] = None,
|
||||
memory_context: Optional[str] = None,
|
||||
):
|
||||
) -> list[Message]:
|
||||
return [
|
||||
LLMSystemMessage(
|
||||
SystemMessage(
|
||||
content=get_system_prompt(tone, verbosity, instructions, memory_context),
|
||||
),
|
||||
LLMUserMessage(
|
||||
UserMessage(
|
||||
content=get_user_prompt(prompt, slide_data, language),
|
||||
),
|
||||
]
|
||||
|
|
@ -128,23 +132,40 @@ async def get_edited_slide_content(
|
|||
True,
|
||||
)
|
||||
|
||||
client = LLMClient()
|
||||
client = get_client(config=get_llm_config())
|
||||
try:
|
||||
response = await client.generate_structured(
|
||||
model=model,
|
||||
messages=get_messages(
|
||||
prompt,
|
||||
slide.content,
|
||||
language,
|
||||
tone,
|
||||
verbosity,
|
||||
instructions,
|
||||
memory_context,
|
||||
),
|
||||
response_format=response_schema,
|
||||
response_format = JSONSchemaResponse(
|
||||
name="response",
|
||||
json_schema=response_schema,
|
||||
strict=False,
|
||||
)
|
||||
return response
|
||||
messages = get_messages(
|
||||
prompt,
|
||||
slide.content,
|
||||
language,
|
||||
tone,
|
||||
verbosity,
|
||||
instructions,
|
||||
memory_context,
|
||||
)
|
||||
|
||||
for attempt in range(3):
|
||||
response = await asyncio.to_thread(
|
||||
client.generate,
|
||||
**get_generate_kwargs(
|
||||
model=model,
|
||||
messages=messages,
|
||||
response_format=response_format,
|
||||
),
|
||||
)
|
||||
content = extract_structured_content(response.content)
|
||||
if content is not None:
|
||||
return content
|
||||
|
||||
if attempt < 2:
|
||||
await asyncio.sleep(0.5 * (attempt + 1))
|
||||
|
||||
raise HTTPException(status_code=400, detail="LLM did not return any content")
|
||||
|
||||
except Exception as e:
|
||||
raise handle_llm_client_exceptions(e)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
import asyncio
|
||||
from typing import Optional
|
||||
from models.llm_message import LLMSystemMessage, LLMUserMessage
|
||||
from services.llm_client import LLMClient
|
||||
from fastapi import HTTPException
|
||||
from llmai import get_client
|
||||
from llmai.shared import SystemMessage, UserMessage
|
||||
from utils.llm_config import get_llm_config
|
||||
from utils.llm_client_error_handler import handle_llm_client_exceptions
|
||||
from utils.llm_utils import extract_text, get_generate_kwargs
|
||||
from utils.llm_provider import get_model
|
||||
|
||||
system_prompt = """
|
||||
|
|
@ -59,18 +63,24 @@ async def get_edited_slide_html(
|
|||
):
|
||||
model = get_model()
|
||||
|
||||
client = LLMClient()
|
||||
client = get_client(config=get_llm_config())
|
||||
try:
|
||||
response = await client.generate(
|
||||
model=model,
|
||||
messages=[
|
||||
LLMSystemMessage(content=system_prompt),
|
||||
LLMUserMessage(
|
||||
content=get_user_prompt(prompt, html, memory_context)
|
||||
),
|
||||
],
|
||||
response = await asyncio.to_thread(
|
||||
client.generate,
|
||||
**get_generate_kwargs(
|
||||
model=model,
|
||||
messages=[
|
||||
SystemMessage(content=system_prompt),
|
||||
UserMessage(
|
||||
content=get_user_prompt(prompt, html, memory_context)
|
||||
),
|
||||
],
|
||||
),
|
||||
)
|
||||
return extract_html_from_response(response) or html
|
||||
response_text = extract_text(response.content)
|
||||
if response_text is None:
|
||||
raise HTTPException(status_code=400, detail="LLM did not return any content")
|
||||
return extract_html_from_response(response_text) or html
|
||||
except Exception as e:
|
||||
raise handle_llm_client_exceptions(e)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,26 @@
|
|||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from enums.llm_provider import LLMProvider
|
||||
from models.llm_message import LLMSystemMessage, LLMUserMessage
|
||||
from llmai import get_client
|
||||
from llmai.shared import (
|
||||
JSONSchemaResponse,
|
||||
Message,
|
||||
ResponseStreamCompletionChunk,
|
||||
SystemMessage,
|
||||
UserMessage,
|
||||
WebSearchTool,
|
||||
)
|
||||
|
||||
from models.presentation_outline_model import PresentationOutlineModel
|
||||
from models.llm_tools import SearchWebTool
|
||||
from services.llm_client import LLMClient
|
||||
from utils.get_dynamic_models import get_presentation_outline_model_with_n_slides
|
||||
from utils.llm_config import enable_web_grounding, get_llm_config
|
||||
from utils.llm_client_error_handler import handle_llm_client_exceptions
|
||||
from utils.llm_provider import get_model
|
||||
from utils.llm_utils import (
|
||||
get_generate_kwargs,
|
||||
serialize_structured_content,
|
||||
stream_generate_events,
|
||||
)
|
||||
|
||||
|
||||
def get_system_prompt(
|
||||
|
|
@ -125,9 +137,9 @@ def get_messages(
|
|||
instructions: Optional[str] = None,
|
||||
include_title_slide: bool = True,
|
||||
include_table_of_contents: bool = False,
|
||||
):
|
||||
) -> list[Message]:
|
||||
return [
|
||||
LLMSystemMessage(
|
||||
SystemMessage(
|
||||
content=get_system_prompt(
|
||||
tone,
|
||||
verbosity,
|
||||
|
|
@ -136,7 +148,7 @@ def get_messages(
|
|||
include_table_of_contents,
|
||||
),
|
||||
),
|
||||
LLMUserMessage(
|
||||
UserMessage(
|
||||
content=get_user_prompt(
|
||||
content,
|
||||
n_slides,
|
||||
|
|
@ -170,36 +182,47 @@ async def generate_ppt_outline(
|
|||
else PresentationOutlineModel
|
||||
)
|
||||
|
||||
client = LLMClient()
|
||||
providers_with_search_tool = {
|
||||
LLMProvider.OPENAI,
|
||||
LLMProvider.ANTHROPIC,
|
||||
LLMProvider.GOOGLE,
|
||||
}
|
||||
use_search_tool = (
|
||||
web_search
|
||||
and client.enable_web_grounding()
|
||||
and client.llm_provider in providers_with_search_tool
|
||||
)
|
||||
client = get_client(config=get_llm_config())
|
||||
use_search_tool = web_search and enable_web_grounding()
|
||||
|
||||
try:
|
||||
async for chunk in client.stream_structured(
|
||||
model,
|
||||
get_messages(
|
||||
content,
|
||||
n_slides,
|
||||
language,
|
||||
additional_context,
|
||||
tone,
|
||||
verbosity,
|
||||
instructions,
|
||||
include_title_slide,
|
||||
include_table_of_contents,
|
||||
),
|
||||
response_model.model_json_schema(),
|
||||
response_format = JSONSchemaResponse(
|
||||
name="response",
|
||||
json_schema=response_model.model_json_schema(),
|
||||
strict=True,
|
||||
tools=([SearchWebTool] if use_search_tool else None),
|
||||
)
|
||||
emitted_content = False
|
||||
async for event in stream_generate_events(
|
||||
client,
|
||||
**get_generate_kwargs(
|
||||
model=model,
|
||||
messages=get_messages(
|
||||
content,
|
||||
n_slides,
|
||||
language,
|
||||
additional_context,
|
||||
tone,
|
||||
verbosity,
|
||||
instructions,
|
||||
include_title_slide,
|
||||
include_table_of_contents,
|
||||
),
|
||||
response_format=response_format,
|
||||
tools=([WebSearchTool()] if use_search_tool else None),
|
||||
stream=True,
|
||||
),
|
||||
):
|
||||
yield chunk
|
||||
if getattr(event, "type", None) == "content":
|
||||
chunk = getattr(event, "chunk", None)
|
||||
if chunk:
|
||||
emitted_content = True
|
||||
yield chunk
|
||||
elif (
|
||||
isinstance(event, ResponseStreamCompletionChunk)
|
||||
and not emitted_content
|
||||
):
|
||||
final_content = serialize_structured_content(event.content)
|
||||
if final_content:
|
||||
yield final_content
|
||||
except Exception as e:
|
||||
yield handle_llm_client_exceptions(e)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,14 @@
|
|||
from typing import Optional, Dict
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
from models.llm_message import LLMSystemMessage, LLMUserMessage
|
||||
from fastapi import HTTPException
|
||||
from llmai import get_client
|
||||
from llmai.shared import JSONSchemaResponse, Message, SystemMessage, UserMessage
|
||||
from models.presentation_layout import PresentationLayoutModel
|
||||
from models.presentation_outline_model import PresentationOutlineModel
|
||||
from services.llm_client import LLMClient
|
||||
from utils.llm_config import get_llm_config
|
||||
from utils.llm_client_error_handler import handle_llm_client_exceptions
|
||||
from utils.llm_utils import extract_structured_content, get_generate_kwargs
|
||||
from utils.llm_provider import get_model
|
||||
from utils.get_dynamic_models import get_presentation_structure_model_with_n_slides
|
||||
from models.presentation_structure_model import PresentationStructureModel
|
||||
|
|
@ -97,19 +101,21 @@ def get_messages(
|
|||
n_slides: int,
|
||||
data: str,
|
||||
instructions: Optional[str] = None,
|
||||
):
|
||||
) -> list[Message]:
|
||||
system_prompt = GET_MESSAGES_SYSTEM_PROMPT.format(
|
||||
user_instruction_header="# User Instruction:" if instructions else "",
|
||||
n_slides=n_slides,
|
||||
)
|
||||
|
||||
return [
|
||||
LLMSystemMessage(content=system_prompt),
|
||||
LLMUserMessage(content=(
|
||||
f"{presentation_layout.to_string()}\n\n"
|
||||
"--------------------------------------\n\n"
|
||||
f"{data}"
|
||||
)),
|
||||
SystemMessage(content=system_prompt),
|
||||
UserMessage(
|
||||
content=(
|
||||
f"{presentation_layout.to_string()}\n\n"
|
||||
"--------------------------------------\n\n"
|
||||
f"{data}"
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -118,20 +124,13 @@ def get_messages_for_slides_markdown(
|
|||
n_slides: int,
|
||||
data: str,
|
||||
instructions: Optional[str] = None,
|
||||
):
|
||||
) -> list[Message]:
|
||||
system_prompt = STRUCTURE_FROM_SLIDES_MARKDOWN_SYSTEM_PROMPT.format(
|
||||
user_instructions=instructions or "",
|
||||
presentation_layout=presentation_layout.to_string(with_schema=True),
|
||||
)
|
||||
|
||||
return [
|
||||
LLMSystemMessage(
|
||||
content=system_prompt
|
||||
),
|
||||
LLMUserMessage(
|
||||
content=data
|
||||
)
|
||||
]
|
||||
return [SystemMessage(content=system_prompt), UserMessage(content=data)]
|
||||
|
||||
|
||||
async def generate_presentation_structure(
|
||||
|
|
@ -140,34 +139,50 @@ async def generate_presentation_structure(
|
|||
instructions: Optional[str] = None,
|
||||
using_slides_markdown: bool = False,
|
||||
) -> PresentationStructureModel:
|
||||
|
||||
client = LLMClient()
|
||||
client = get_client(config=get_llm_config())
|
||||
model = get_model()
|
||||
response_model = get_presentation_structure_model_with_n_slides(
|
||||
len(presentation_outline.slides)
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.generate_structured(
|
||||
model=model,
|
||||
messages=(
|
||||
get_messages_for_slides_markdown(
|
||||
presentation_layout,
|
||||
len(presentation_outline.slides),
|
||||
presentation_outline.to_string(),
|
||||
instructions,
|
||||
)
|
||||
if using_slides_markdown
|
||||
else get_messages(
|
||||
presentation_layout,
|
||||
len(presentation_outline.slides),
|
||||
presentation_outline.to_string(),
|
||||
instructions,
|
||||
)
|
||||
),
|
||||
response_format=response_model.model_json_schema(),
|
||||
messages = (
|
||||
get_messages_for_slides_markdown(
|
||||
presentation_layout,
|
||||
len(presentation_outline.slides),
|
||||
presentation_outline.to_string(),
|
||||
instructions,
|
||||
)
|
||||
if using_slides_markdown
|
||||
else get_messages(
|
||||
presentation_layout,
|
||||
len(presentation_outline.slides),
|
||||
presentation_outline.to_string(),
|
||||
instructions,
|
||||
)
|
||||
)
|
||||
response_format = JSONSchemaResponse(
|
||||
name="response",
|
||||
json_schema=response_model.model_json_schema(),
|
||||
strict=True,
|
||||
)
|
||||
return PresentationStructureModel(**response)
|
||||
|
||||
for attempt in range(3):
|
||||
response = await asyncio.to_thread(
|
||||
client.generate,
|
||||
**get_generate_kwargs(
|
||||
model=model,
|
||||
messages=messages,
|
||||
response_format=response_format,
|
||||
),
|
||||
)
|
||||
content = extract_structured_content(response.content)
|
||||
if content is not None:
|
||||
return PresentationStructureModel(**content)
|
||||
|
||||
if attempt < 2:
|
||||
await asyncio.sleep(0.5 * (attempt + 1))
|
||||
|
||||
raise HTTPException(status_code=400, detail="LLM did not return any content")
|
||||
except Exception as e:
|
||||
raise handle_llm_client_exceptions(e)
|
||||
|
|
|
|||
|
|
@ -1,11 +1,15 @@
|
|||
import asyncio
|
||||
from datetime import datetime
|
||||
import json
|
||||
from typing import Optional
|
||||
from models.llm_message import LLMSystemMessage, LLMUserMessage
|
||||
from fastapi import HTTPException
|
||||
from llmai import get_client
|
||||
from llmai.shared import JSONSchemaResponse, Message, SystemMessage, UserMessage
|
||||
from models.presentation_layout import SlideLayoutModel
|
||||
from models.presentation_outline_model import SlideOutlineModel
|
||||
from services.llm_client import LLMClient
|
||||
from utils.llm_config import get_llm_config
|
||||
from utils.llm_client_error_handler import handle_llm_client_exceptions
|
||||
from utils.llm_utils import extract_structured_content, get_generate_kwargs
|
||||
from utils.llm_provider import get_model
|
||||
from utils.schema_utils import add_field_in_schema, remove_fields_from_schema
|
||||
|
||||
|
|
@ -130,10 +134,10 @@ def get_messages(
|
|||
verbosity: Optional[str] = None,
|
||||
instructions: Optional[str] = None,
|
||||
response_schema: Optional[dict] = None,
|
||||
):
|
||||
) -> list[Message]:
|
||||
|
||||
return [
|
||||
LLMSystemMessage(
|
||||
SystemMessage(
|
||||
content=get_system_prompt(
|
||||
tone,
|
||||
verbosity,
|
||||
|
|
@ -141,7 +145,7 @@ def get_messages(
|
|||
response_schema,
|
||||
),
|
||||
),
|
||||
LLMUserMessage(
|
||||
UserMessage(
|
||||
content=get_user_prompt(outline, language),
|
||||
),
|
||||
]
|
||||
|
|
@ -155,7 +159,7 @@ async def get_slide_content_from_type_and_outline(
|
|||
verbosity: Optional[str] = None,
|
||||
instructions: Optional[str] = None,
|
||||
):
|
||||
client = LLMClient()
|
||||
client = get_client(config=get_llm_config())
|
||||
model = get_model()
|
||||
|
||||
response_schema = remove_fields_from_schema(
|
||||
|
|
@ -175,20 +179,37 @@ async def get_slide_content_from_type_and_outline(
|
|||
)
|
||||
|
||||
try:
|
||||
response = await client.generate_structured(
|
||||
model=model,
|
||||
messages=get_messages(
|
||||
outline.content,
|
||||
language,
|
||||
tone,
|
||||
verbosity,
|
||||
instructions,
|
||||
response_schema,
|
||||
),
|
||||
response_format=response_schema,
|
||||
response_format = JSONSchemaResponse(
|
||||
name="response",
|
||||
json_schema=response_schema,
|
||||
strict=False,
|
||||
)
|
||||
return response
|
||||
messages = get_messages(
|
||||
outline.content,
|
||||
language,
|
||||
tone,
|
||||
verbosity,
|
||||
instructions,
|
||||
response_schema,
|
||||
)
|
||||
|
||||
for attempt in range(3):
|
||||
response = await asyncio.to_thread(
|
||||
client.generate,
|
||||
**get_generate_kwargs(
|
||||
model=model,
|
||||
messages=messages,
|
||||
response_format=response_format,
|
||||
),
|
||||
)
|
||||
content = extract_structured_content(response.content)
|
||||
if content is not None:
|
||||
return content
|
||||
|
||||
if attempt < 2:
|
||||
await asyncio.sleep(0.5 * (attempt + 1))
|
||||
|
||||
raise HTTPException(status_code=400, detail="LLM did not return any content")
|
||||
|
||||
except Exception as e:
|
||||
raise handle_llm_client_exceptions(e)
|
||||
|
|
|
|||
|
|
@ -1,9 +1,13 @@
|
|||
from models.llm_message import LLMSystemMessage, LLMUserMessage
|
||||
import asyncio
|
||||
from fastapi import HTTPException
|
||||
from llmai import get_client
|
||||
from llmai.shared import JSONSchemaResponse, Message, SystemMessage, UserMessage
|
||||
from models.presentation_layout import PresentationLayoutModel, SlideLayoutModel
|
||||
from models.slide_layout_index import SlideLayoutIndex
|
||||
from models.sql.slide import SlideModel
|
||||
from services.llm_client import LLMClient
|
||||
from utils.llm_config import get_llm_config
|
||||
from utils.llm_client_error_handler import handle_llm_client_exceptions
|
||||
from utils.llm_utils import extract_structured_content, get_generate_kwargs
|
||||
from utils.llm_provider import get_model
|
||||
|
||||
|
||||
|
|
@ -13,7 +17,7 @@ def get_messages(
|
|||
layout: PresentationLayoutModel,
|
||||
current_slide_layout: int,
|
||||
memory_context: str = "",
|
||||
):
|
||||
) -> list[Message]:
|
||||
memory_block = (
|
||||
f"\n # Retrieved Presentation Memory Context\n {memory_context}\n"
|
||||
if memory_context
|
||||
|
|
@ -21,7 +25,7 @@ def get_messages(
|
|||
)
|
||||
|
||||
return [
|
||||
LLMSystemMessage(
|
||||
SystemMessage(
|
||||
content=f"""
|
||||
Select a Slide Layout index based on provided user prompt and current slide data.
|
||||
{layout.to_string()}
|
||||
|
|
@ -34,7 +38,7 @@ def get_messages(
|
|||
**Go through all notes and steps and make sure they are followed, including mentioned constraints**
|
||||
""",
|
||||
),
|
||||
LLMUserMessage(
|
||||
UserMessage(
|
||||
content=f"""
|
||||
- User Prompt: {prompt}
|
||||
- Current Slide Data: {slide_data}
|
||||
|
|
@ -50,27 +54,43 @@ async def get_slide_layout_from_prompt(
|
|||
slide: SlideModel,
|
||||
memory_context: str = "",
|
||||
) -> SlideLayoutModel:
|
||||
|
||||
client = LLMClient()
|
||||
client = get_client(config=get_llm_config())
|
||||
model = get_model()
|
||||
|
||||
slide_layout_index = layout.get_slide_layout_index(slide.layout)
|
||||
|
||||
try:
|
||||
response = await client.generate_structured(
|
||||
model=model,
|
||||
messages=get_messages(
|
||||
prompt,
|
||||
slide.content,
|
||||
layout,
|
||||
slide_layout_index,
|
||||
memory_context,
|
||||
),
|
||||
response_format=SlideLayoutIndex.model_json_schema(),
|
||||
response_format = JSONSchemaResponse(
|
||||
name="response",
|
||||
json_schema=SlideLayoutIndex.model_json_schema(),
|
||||
strict=True,
|
||||
)
|
||||
index = SlideLayoutIndex(**response).index
|
||||
return layout.slides[index]
|
||||
messages = get_messages(
|
||||
prompt,
|
||||
slide.content,
|
||||
layout,
|
||||
slide_layout_index,
|
||||
memory_context,
|
||||
)
|
||||
|
||||
for attempt in range(3):
|
||||
response = await asyncio.to_thread(
|
||||
client.generate,
|
||||
**get_generate_kwargs(
|
||||
model=model,
|
||||
messages=messages,
|
||||
response_format=response_format,
|
||||
),
|
||||
)
|
||||
content = extract_structured_content(response.content)
|
||||
if content is not None:
|
||||
index = SlideLayoutIndex(**content).index
|
||||
return layout.slides[index]
|
||||
|
||||
if attempt < 2:
|
||||
await asyncio.sleep(0.5 * (attempt + 1))
|
||||
|
||||
raise HTTPException(status_code=400, detail="LLM did not return any content")
|
||||
|
||||
except Exception as e:
|
||||
raise handle_llm_client_exceptions(e)
|
||||
|
|
|
|||
|
|
@ -1,18 +1,19 @@
|
|||
from fastapi import HTTPException
|
||||
from anthropic import APIError as AnthropicAPIError
|
||||
from openai import APIError as OpenAIAPIError
|
||||
from google.genai.errors import APIError as GoogleAPIError
|
||||
import traceback
|
||||
|
||||
from llmai.shared.errors import BaseError as LLMAIBaseError
|
||||
|
||||
|
||||
def handle_llm_client_exceptions(e: Exception) -> HTTPException:
|
||||
traceback.print_exc()
|
||||
if isinstance(e, HTTPException):
|
||||
return e
|
||||
if isinstance(e, LLMAIBaseError):
|
||||
return HTTPException(status_code=e.status_code, detail=e.message)
|
||||
if isinstance(e, OpenAIAPIError):
|
||||
return HTTPException(status_code=500, detail=f"OpenAI API error: {e.message}")
|
||||
if isinstance(e, GoogleAPIError):
|
||||
return HTTPException(status_code=500, detail=f"Google API error: {e.message}")
|
||||
if isinstance(e, AnthropicAPIError):
|
||||
return HTTPException(
|
||||
status_code=500, detail=f"Anthropic API error: {e.message}"
|
||||
)
|
||||
return HTTPException(status_code=500, detail=f"LLM API error: {e}")
|
||||
|
|
|
|||
146
servers/fastapi/utils/llm_config.py
Normal file
146
servers/fastapi/utils/llm_config.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
import time
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import HTTPException
|
||||
from llmai.shared import (
|
||||
AnthropicClientConfig,
|
||||
ChatGPTClientConfig,
|
||||
ClientConfig,
|
||||
GoogleClientConfig,
|
||||
OpenAIApiType,
|
||||
OpenAIClientConfig,
|
||||
)
|
||||
|
||||
from enums.llm_provider import LLMProvider
|
||||
from utils.get_env import (
|
||||
get_anthropic_api_key_env,
|
||||
get_codex_access_token_env,
|
||||
get_codex_account_id_env,
|
||||
get_codex_refresh_token_env,
|
||||
get_codex_token_expires_env,
|
||||
get_custom_llm_api_key_env,
|
||||
get_custom_llm_url_env,
|
||||
get_disable_thinking_env,
|
||||
get_google_api_key_env,
|
||||
get_ollama_url_env,
|
||||
get_openai_api_key_env,
|
||||
get_web_grounding_env,
|
||||
)
|
||||
from utils.llm_provider import get_llm_provider
|
||||
from utils.parsers import parse_bool_or_none
|
||||
from utils.set_env import (
|
||||
set_codex_access_token_env,
|
||||
set_codex_account_id_env,
|
||||
set_codex_refresh_token_env,
|
||||
set_codex_token_expires_env,
|
||||
)
|
||||
|
||||
|
||||
def enable_web_grounding() -> bool:
|
||||
return parse_bool_or_none(get_web_grounding_env()) or False
|
||||
|
||||
|
||||
def disable_thinking() -> bool:
|
||||
return parse_bool_or_none(get_disable_thinking_env()) or False
|
||||
|
||||
|
||||
def _get_codex_access_token() -> str:
|
||||
access_token = get_codex_access_token_env()
|
||||
if not access_token:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=(
|
||||
"Codex OAuth access token is not set. Please authenticate via "
|
||||
"/api/v1/ppt/codex/auth/initiate"
|
||||
),
|
||||
)
|
||||
|
||||
expires_str = get_codex_token_expires_env()
|
||||
if expires_str:
|
||||
try:
|
||||
expires_ms = int(expires_str)
|
||||
now_ms = int(time.time() * 1000)
|
||||
if now_ms >= expires_ms - 60_000:
|
||||
refresh_token = get_codex_refresh_token_env()
|
||||
if refresh_token:
|
||||
from utils.oauth.openai_codex import (
|
||||
TokenSuccess,
|
||||
get_account_id,
|
||||
refresh_access_token,
|
||||
)
|
||||
|
||||
result = refresh_access_token(refresh_token)
|
||||
if isinstance(result, TokenSuccess):
|
||||
set_codex_access_token_env(result.access)
|
||||
set_codex_refresh_token_env(result.refresh)
|
||||
set_codex_token_expires_env(str(result.expires))
|
||||
account_id = get_account_id(result.access)
|
||||
if account_id:
|
||||
set_codex_account_id_env(account_id)
|
||||
access_token = result.access
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
return access_token
|
||||
|
||||
|
||||
def get_llm_config() -> ClientConfig:
|
||||
llm_provider = get_llm_provider()
|
||||
|
||||
match llm_provider:
|
||||
case LLMProvider.OPENAI:
|
||||
api_key = get_openai_api_key_env()
|
||||
if not api_key:
|
||||
raise HTTPException(status_code=400, detail="OpenAI API Key is not set")
|
||||
return OpenAIClientConfig(
|
||||
api_key=api_key,
|
||||
api_type=OpenAIApiType.RESPONSES,
|
||||
)
|
||||
case LLMProvider.GOOGLE:
|
||||
api_key = get_google_api_key_env()
|
||||
if not api_key:
|
||||
raise HTTPException(status_code=400, detail="Google API Key is not set")
|
||||
return GoogleClientConfig(api_key=api_key)
|
||||
case LLMProvider.ANTHROPIC:
|
||||
api_key = get_anthropic_api_key_env()
|
||||
if not api_key:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Anthropic API Key is not set",
|
||||
)
|
||||
return AnthropicClientConfig(api_key=api_key)
|
||||
case LLMProvider.OLLAMA:
|
||||
return OpenAIClientConfig(
|
||||
base_url=(get_ollama_url_env() or "http://localhost:11434") + "/v1",
|
||||
api_key="ollama",
|
||||
)
|
||||
case LLMProvider.CUSTOM:
|
||||
base_url = get_custom_llm_url_env()
|
||||
if not base_url:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Custom LLM URL is not set",
|
||||
)
|
||||
return OpenAIClientConfig(
|
||||
base_url=base_url,
|
||||
api_key=get_custom_llm_api_key_env() or "null",
|
||||
)
|
||||
case LLMProvider.CODEX:
|
||||
return ChatGPTClientConfig(
|
||||
access_token=_get_codex_access_token(),
|
||||
account_id=get_codex_account_id_env() or None,
|
||||
)
|
||||
case _:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=(
|
||||
"LLM Provider must be either openai, google, anthropic, "
|
||||
"ollama, custom, or codex"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def get_extra_body() -> Optional[dict]:
|
||||
if get_llm_provider() == LLMProvider.CUSTOM and disable_thinking():
|
||||
return {"enable_thinking": False}
|
||||
return None
|
||||
134
servers/fastapi/utils/llm_utils.py
Normal file
134
servers/fastapi/utils/llm_utils.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
import asyncio
|
||||
import json
|
||||
from collections.abc import AsyncGenerator, Sequence
|
||||
from typing import Any, Optional
|
||||
|
||||
import dirtyjson
|
||||
from llmai.shared import (
|
||||
LLMTool,
|
||||
Message,
|
||||
ResponseFormat,
|
||||
normalize_content_parts,
|
||||
)
|
||||
|
||||
from utils.llm_config import get_extra_body
|
||||
|
||||
|
||||
def get_generate_kwargs(
|
||||
model: str,
|
||||
messages: Sequence[Message],
|
||||
max_tokens: Optional[int] = None,
|
||||
tools: Optional[list[LLMTool]] = None,
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": list(messages),
|
||||
"stream": stream,
|
||||
}
|
||||
if max_tokens is not None:
|
||||
kwargs["max_tokens"] = max_tokens
|
||||
if tools:
|
||||
kwargs["tools"] = tools
|
||||
if response_format is not None:
|
||||
kwargs["response_format"] = response_format
|
||||
|
||||
extra_body = get_extra_body()
|
||||
if extra_body:
|
||||
kwargs["extra_body"] = extra_body
|
||||
|
||||
return kwargs
|
||||
|
||||
|
||||
def extract_text(content: Any) -> Optional[str]:
|
||||
if content is None:
|
||||
return None
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, Sequence) and not isinstance(content, (bytes, bytearray)):
|
||||
parts: list[str] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
parts.append(part)
|
||||
continue
|
||||
text = getattr(part, "text", None)
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
joined = "".join(parts)
|
||||
return joined or None
|
||||
text = getattr(content, "text", None)
|
||||
if isinstance(text, str):
|
||||
return text
|
||||
return None
|
||||
|
||||
|
||||
def extract_structured_content(content: Any) -> Optional[dict]:
|
||||
if content is None:
|
||||
return None
|
||||
if isinstance(content, dict):
|
||||
return content
|
||||
if hasattr(content, "model_dump"):
|
||||
dumped = content.model_dump(mode="json")
|
||||
if isinstance(dumped, dict):
|
||||
return dumped
|
||||
|
||||
raw_text = extract_text(content)
|
||||
if not raw_text:
|
||||
return None
|
||||
|
||||
try:
|
||||
parsed = dirtyjson.loads(raw_text)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if isinstance(parsed, dict):
|
||||
return dict(parsed)
|
||||
return None
|
||||
|
||||
|
||||
def serialize_structured_content(content: Any) -> Optional[str]:
|
||||
parsed = extract_structured_content(content)
|
||||
if parsed is not None:
|
||||
return json.dumps(parsed, ensure_ascii=False)
|
||||
|
||||
raw_text = extract_text(content)
|
||||
if raw_text:
|
||||
return raw_text
|
||||
return None
|
||||
|
||||
|
||||
def message_content_to_text(content: Sequence[Any] | str | None) -> Optional[str]:
|
||||
joined = "".join(
|
||||
part.text
|
||||
for part in normalize_content_parts(content)
|
||||
if isinstance(getattr(part, "text", None), str)
|
||||
)
|
||||
return joined or None
|
||||
|
||||
|
||||
async def stream_generate_events(client: Any, **kwargs) -> AsyncGenerator[Any, None]:
|
||||
loop = asyncio.get_running_loop()
|
||||
queue: asyncio.Queue[Any] = asyncio.Queue()
|
||||
sentinel = object()
|
||||
|
||||
def worker():
|
||||
try:
|
||||
for event in client.generate(**kwargs):
|
||||
loop.call_soon_threadsafe(queue.put_nowait, event)
|
||||
except Exception as exc:
|
||||
loop.call_soon_threadsafe(queue.put_nowait, exc)
|
||||
finally:
|
||||
loop.call_soon_threadsafe(queue.put_nowait, sentinel)
|
||||
|
||||
worker_task = asyncio.create_task(asyncio.to_thread(worker))
|
||||
try:
|
||||
while True:
|
||||
item = await queue.get()
|
||||
if item is sentinel:
|
||||
break
|
||||
if isinstance(item, Exception):
|
||||
raise item
|
||||
yield item
|
||||
finally:
|
||||
await worker_task
|
||||
|
|
@ -73,10 +73,6 @@ def set_disable_image_generation_env(value):
|
|||
os.environ["DISABLE_IMAGE_GENERATION"] = value
|
||||
|
||||
|
||||
def set_tool_calls_env(value):
|
||||
os.environ["TOOL_CALLS"] = value
|
||||
|
||||
|
||||
def set_disable_thinking_env(value):
|
||||
os.environ["DISABLE_THINKING"] = value
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ from utils.get_env import (
|
|||
get_openai_api_key_env,
|
||||
get_openai_model_env,
|
||||
get_pexels_api_key_env,
|
||||
get_tool_calls_env,
|
||||
get_user_config_path_env,
|
||||
get_image_provider_env,
|
||||
get_pixabay_api_key_env,
|
||||
|
|
@ -63,7 +62,6 @@ from utils.set_env import (
|
|||
set_pexels_api_key_env,
|
||||
set_image_provider_env,
|
||||
set_pixabay_api_key_env,
|
||||
set_tool_calls_env,
|
||||
set_web_grounding_env,
|
||||
set_codex_access_token_env,
|
||||
set_codex_refresh_token_env,
|
||||
|
|
@ -118,11 +116,6 @@ def get_user_config():
|
|||
DALL_E_3_QUALITY=existing_config.DALL_E_3_QUALITY or get_dall_e_3_quality_env(),
|
||||
GPT_IMAGE_1_5_QUALITY=existing_config.GPT_IMAGE_1_5_QUALITY
|
||||
or get_gpt_image_1_5_quality_env(),
|
||||
TOOL_CALLS=(
|
||||
existing_config.TOOL_CALLS
|
||||
if existing_config.TOOL_CALLS is not None
|
||||
else (parse_bool_or_none(get_tool_calls_env()) or False)
|
||||
),
|
||||
DISABLE_THINKING=(
|
||||
existing_config.DISABLE_THINKING
|
||||
if existing_config.DISABLE_THINKING is not None
|
||||
|
|
@ -197,8 +190,6 @@ def update_env_with_user_config():
|
|||
set_dall_e_3_quality_env(user_config.DALL_E_3_QUALITY)
|
||||
if user_config.GPT_IMAGE_1_5_QUALITY:
|
||||
set_gpt_image_1_5_quality_env(user_config.GPT_IMAGE_1_5_QUALITY)
|
||||
if user_config.TOOL_CALLS is not None:
|
||||
set_tool_calls_env(str(user_config.TOOL_CALLS))
|
||||
if user_config.DISABLE_THINKING is not None:
|
||||
set_disable_thinking_env(str(user_config.DISABLE_THINKING))
|
||||
if user_config.EXTENDED_REASONING is not None:
|
||||
|
|
|
|||
79
servers/fastapi/uv.lock
generated
79
servers/fastapi/uv.lock
generated
|
|
@ -1,5 +1,5 @@
|
|||
version = 1
|
||||
revision = 3
|
||||
revision = 2
|
||||
requires-python = "==3.11.*"
|
||||
|
||||
[[package]]
|
||||
|
|
@ -238,6 +238,34 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/ea/44/b749f8777b020b420bceaaf60f66432fc30cc904ca5b69640ec9cbef11ed/blis-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:27f82b8633030f8d095d2b412dffa7eb6dbc8ee43813139909a20012e54422ea", size = 6171233, upload-time = "2025-11-17T12:27:41.921Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "boto3"
|
||||
version = "1.42.94"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "botocore" },
|
||||
{ name = "jmespath" },
|
||||
{ name = "s3transfer" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6a/6a/95302333208830de932ad1d0b69599ee13e936349a44981fb72632507861/boto3-1.42.94.tar.gz", hash = "sha256:5b6056a661c19e974aaea3cb97690ddbe30d10c31e4f887df3bff06574f34510", size = 113211, upload-time = "2026-04-22T20:36:19.167Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/6f/4e175604f3168befcb413c95bf45eada67d12042f92f76a9305d6a817ea9/boto3-1.42.94-py3-none-any.whl", hash = "sha256:56d53bce75629cc7c78a32da8b62de74cee3e2a3d54a2b60ba1a65f9f1b129da", size = 140555, upload-time = "2026-04-22T20:36:16.182Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "botocore"
|
||||
version = "1.42.94"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "jmespath" },
|
||||
{ name = "python-dateutil" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b7/90/1a4d0e81b325d38e37f81d907ceacac3b8f509ad38b495bb95086ecb609d/botocore-1.42.94.tar.gz", hash = "sha256:41c6b3b11b073221a41f52b222ba387be34459fb77cdc506e8b74cdaf24bdcce", size = 15260901, upload-time = "2026-04-22T20:36:00.853Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/61/73/313af9ee02ac0155247bcf3f04fcf54fcae2e33250bb437528c18aeefd81/botocore-1.42.94-py3-none-any.whl", hash = "sha256:a2143742132ed0f6cdb90204d667b89d0301068b1045e8bc099efa267bf1b348", size = 14942938, upload-time = "2026-04-22T20:35:55.663Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cachetools"
|
||||
version = "7.0.6"
|
||||
|
|
@ -783,7 +811,9 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/fb/c6/dba32cab7e3a625b011aa5647486e2d28423a48845a2998c126dd69c85e1/greenlet-3.4.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:805bebb4945094acbab757d34d6e1098be6de8966009ab9ca54f06ff492def58", size = 285504, upload-time = "2026-04-08T15:52:14.071Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/f4/7cb5c2b1feb9a1f50e038be79980dfa969aa91979e5e3a18fdbcfad2c517/greenlet-3.4.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:439fc2f12b9b512d9dfa681c5afe5f6b3232c708d13e6f02c845e0d9f4c2d8c6", size = 605476, upload-time = "2026-04-08T16:24:37.064Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d6/af/b66ab0b2f9a4c5a867c136bf66d9599f34f21a1bcca26a2884a29c450bd9/greenlet-3.4.0-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a70ed1cb0295bee1df57b63bf7f46b4e56a5c93709eea769c1fec1bb23a95875", size = 618336, upload-time = "2026-04-08T16:30:56.59Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6d/31/56c43d2b5de476f77d36ceeec436328533bff960a4cba9a07616e93063ab/greenlet-3.4.0-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8c5696c42e6bb5cfb7c6ff4453789081c66b9b91f061e5e9367fa15792644e76", size = 625045, upload-time = "2026-04-08T16:40:37.111Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/5c/8c5633ece6ba611d64bf2770219a98dd439921d6424e4e8cf16b0ac74ea5/greenlet-3.4.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c660bce1940a1acae5f51f0a064f1bc785d07ea16efcb4bc708090afc4d69e83", size = 613515, upload-time = "2026-04-08T15:56:32.478Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/80/ca/704d4e2c90acb8bdf7ae593f5cbc95f58e82de95cc540fb75631c1054533/greenlet-3.4.0-cp311-cp311-manylinux_2_39_riscv64.whl", hash = "sha256:89995ce5ddcd2896d89615116dd39b9703bfa0c07b583b85b89bf1b5d6eddf81", size = 419745, upload-time = "2026-04-08T16:43:04.022Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a9/df/950d15bca0d90a0e7395eb777903060504cdb509b7b705631e8fb69ff415/greenlet-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ee407d4d1ca9dc632265aee1c8732c4a2d60adff848057cdebfe5fe94eb2c8a2", size = 1574623, upload-time = "2026-04-08T16:26:18.596Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/e7/0839afab829fcb7333c9ff6d80c040949510055d2d4d63251f0d1c7c804e/greenlet-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:956215d5e355fffa7c021d168728321fd4d31fd730ac609b1653b450f6a4bc71", size = 1639579, upload-time = "2026-04-08T15:57:29.231Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/2b/b4482401e9bcaf9f5c97f67ead38db89c19520ff6d0d6699979c6efcc200/greenlet-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:5cb614ace7c27571270354e9c9f696554d073f8aa9319079dcba466bbdead711", size = 238233, upload-time = "2026-04-08T17:02:54.286Z" },
|
||||
|
|
@ -1057,6 +1087,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/99/8f/15e7741ff19e9bcd4d753f7ff22f988fd54592f134ca13701c13ea8c20e0/jiter-0.14.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e52c076f187405fc21523c746c04399c9af8ece566077ed147b2126f2bcba577", size = 351445, upload-time = "2026-04-10T14:28:33.093Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jmespath"
|
||||
version = "1.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "joblib"
|
||||
version = "1.5.3"
|
||||
|
|
@ -1146,6 +1185,28 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "llmai"
|
||||
version = "0.1.8"
|
||||
source = { url = "https://files.pythonhosted.org/packages/49/9e/64fb2453d9eace7fd50b25635ae267422d014c64861ac511a5e953884f85/llmai-0.1.8-py3-none-any.whl" }
|
||||
dependencies = [
|
||||
{ name = "anthropic" },
|
||||
{ name = "boto3" },
|
||||
{ name = "google-genai" },
|
||||
{ name = "openai" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/49/9e/64fb2453d9eace7fd50b25635ae267422d014c64861ac511a5e953884f85/llmai-0.1.8-py3-none-any.whl", hash = "sha256:c4bae504dae928e88e8437bd3e2e5eb573f459d6df9ed8fc182671ee99b3cf1b" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "anthropic", specifier = ">=0.79.0" },
|
||||
{ name = "boto3", specifier = ">=1.42.89" },
|
||||
{ name = "google-genai", specifier = ">=1.62.0" },
|
||||
{ name = "openai", specifier = ">=2.18.0" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "loguru"
|
||||
version = "0.7.3"
|
||||
|
|
@ -1604,13 +1665,13 @@ dependencies = [
|
|||
{ name = "aiomysql" },
|
||||
{ name = "aiosqlite" },
|
||||
{ name = "alembic" },
|
||||
{ name = "anthropic" },
|
||||
{ name = "asyncpg" },
|
||||
{ name = "dirtyjson" },
|
||||
{ name = "fastapi", extra = ["standard"] },
|
||||
{ name = "fastembed-vectorstore" },
|
||||
{ name = "fastmcp" },
|
||||
{ name = "google-genai" },
|
||||
{ name = "llmai" },
|
||||
{ name = "mem0ai", extra = ["nlp"] },
|
||||
{ name = "nltk" },
|
||||
{ name = "openai" },
|
||||
|
|
@ -1626,13 +1687,13 @@ requires-dist = [
|
|||
{ name = "aiomysql", specifier = ">=0.2.0" },
|
||||
{ name = "aiosqlite", specifier = ">=0.21.0" },
|
||||
{ name = "alembic", specifier = ">=1.14.0" },
|
||||
{ name = "anthropic", specifier = ">=0.60.0" },
|
||||
{ name = "asyncpg", specifier = ">=0.30.0" },
|
||||
{ name = "dirtyjson", specifier = ">=1.0.8" },
|
||||
{ name = "fastapi", extras = ["standard"], specifier = ">=0.116.1" },
|
||||
{ name = "fastembed-vectorstore", specifier = ">=0.5.2" },
|
||||
{ name = "fastmcp", specifier = ">=2.11.0" },
|
||||
{ name = "google-genai", specifier = ">=1.28.0" },
|
||||
{ name = "llmai", url = "https://files.pythonhosted.org/packages/49/9e/64fb2453d9eace7fd50b25635ae267422d014c64861ac511a5e953884f85/llmai-0.1.8-py3-none-any.whl" },
|
||||
{ name = "mem0ai", extras = ["nlp"], specifier = ">=0.1.115" },
|
||||
{ name = "nltk", specifier = ">=3.9.1" },
|
||||
{ name = "openai", specifier = ">=1.98.0" },
|
||||
|
|
@ -2200,6 +2261,18 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "s3transfer"
|
||||
version = "0.16.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "botocore" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/46/29/af14f4ef3c11a50435308660e2cc68761c9a7742475e0585cd4396b91777/s3transfer-0.16.1.tar.gz", hash = "sha256:8e424355754b9ccb32467bdc568edf55be82692ef2002d934b1311dbb3b9e524", size = 154801, upload-time = "2026-04-22T20:36:06.475Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/03/19/90d7d4ed51932c022d53f1d02d564b62d10e272692a1f9b76425c1ad2a02/s3transfer-0.16.1-py3-none-any.whl", hash = "sha256:61bcd00ccb83b21a0fe7e91a553fff9729d46c83b4e0106e7c314a733891f7c2", size = 86825, upload-time = "2026-04-22T20:36:04.992Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "secretstorage"
|
||||
version = "3.5.0"
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ interface CustomConfigProps {
|
|||
customLlmUrl: string;
|
||||
customLlmApiKey: string;
|
||||
customModel: string;
|
||||
toolCalls: boolean;
|
||||
disableThinking: boolean;
|
||||
onInputChange: (value: string | boolean, field: string) => void;
|
||||
}
|
||||
|
|
@ -29,7 +28,6 @@ export default function CustomConfig({
|
|||
customLlmUrl,
|
||||
customLlmApiKey,
|
||||
customModel,
|
||||
toolCalls,
|
||||
disableThinking,
|
||||
onInputChange,
|
||||
}: CustomConfigProps) {
|
||||
|
|
@ -165,9 +163,8 @@ export default function CustomConfig({
|
|||
<div className="mb-4">
|
||||
<div className="mb-3 p-3 bg-amber-50 border border-amber-200 rounded-lg">
|
||||
<p className="text-sm text-amber-800">
|
||||
<strong>Important:</strong> Only models with function
|
||||
calling capabilities (tool calls) or JSON schema support
|
||||
will work.
|
||||
<strong>Important:</strong> Only models with structured
|
||||
JSON schema output support will work reliably.
|
||||
</p>
|
||||
</div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
|
|
@ -231,23 +228,6 @@ export default function CustomConfig({
|
|||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Tool Calls Toggle */}
|
||||
<div>
|
||||
<div className="flex items-center justify-between mb-4 bg-green-50 p-2 rounded-sm">
|
||||
<label className="text-sm font-medium text-gray-700">
|
||||
Use Tool Calls
|
||||
</label>
|
||||
<Switch
|
||||
checked={toolCalls}
|
||||
onCheckedChange={(checked) => onInputChange(checked, "tool_calls")}
|
||||
/>
|
||||
</div>
|
||||
<p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
|
||||
<span className="block w-1 h-1 rounded-full bg-gray-400"></span>
|
||||
If enabled, Tool Calls will be used instead of JSON Schema for Structured Output.
|
||||
</p>
|
||||
</div>
|
||||
{/* Disable Thinking Toggle */}
|
||||
<div>
|
||||
<div className="flex items-center justify-between mb-4 bg-green-50 p-2 rounded-sm">
|
||||
|
|
@ -266,4 +246,4 @@ export default function CustomConfig({
|
|||
</div>
|
||||
</div >
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -292,7 +292,6 @@ export default function LLMProviderSelection({
|
|||
customLlmUrl={llmConfig.CUSTOM_LLM_URL || ""}
|
||||
customLlmApiKey={llmConfig.CUSTOM_LLM_API_KEY || ""}
|
||||
customModel={llmConfig.CUSTOM_MODEL || ""}
|
||||
toolCalls={llmConfig.TOOL_CALLS || false}
|
||||
disableThinking={llmConfig.DISABLE_THINKING || false}
|
||||
onInputChange={input_field_changed}
|
||||
/>
|
||||
|
|
|
|||
|
|
@ -42,7 +42,6 @@ export interface LLMConfig {
|
|||
GPT_IMAGE_1_5_QUALITY?: string;
|
||||
|
||||
// Other Configs
|
||||
TOOL_CALLS?: boolean;
|
||||
DISABLE_THINKING?: boolean;
|
||||
EXTENDED_REASONING?: boolean;
|
||||
WEB_GROUNDING?: boolean;
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@ export const updateLLMConfig = (
|
|||
image_provider: "IMAGE_PROVIDER",
|
||||
disable_image_generation: "DISABLE_IMAGE_GENERATION",
|
||||
use_custom_url: "USE_CUSTOM_URL",
|
||||
tool_calls: "TOOL_CALLS",
|
||||
disable_thinking: "DISABLE_THINKING",
|
||||
extended_reasoning: "EXTENDED_REASONING",
|
||||
web_grounding: "WEB_GROUNDING",
|
||||
|
|
@ -244,4 +243,4 @@ export const pullOllamaModel = async (
|
|||
void pollOnce();
|
||||
}, 1000);
|
||||
});
|
||||
};
|
||||
};
|
||||
|
|
|
|||
1
start.js
1
start.js
|
|
@ -175,7 +175,6 @@ const setupUserConfigFromEnv = () => {
|
|||
PIXABAY_API_KEY:
|
||||
process.env.PIXABAY_API_KEY || existingConfig.PIXABAY_API_KEY,
|
||||
IMAGE_PROVIDER: process.env.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER,
|
||||
TOOL_CALLS: process.env.TOOL_CALLS || existingConfig.TOOL_CALLS,
|
||||
DISABLE_THINKING:
|
||||
process.env.DISABLE_THINKING || existingConfig.DISABLE_THINKING,
|
||||
EXTENDED_REASONING:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue