diff --git a/Dockerfile b/Dockerfile index 6a740c3c..a3fc8e44 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,12 +2,15 @@ FROM python:3.11-slim-bookworm # Install Node.js and npm RUN apt-get update && apt-get install -y \ - nodejs \ - npm \ nginx \ curl \ redis-server +# Install Node.js 20 using NodeSource repository +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ + apt-get install -y nodejs + + # Create a working directory WORKDIR /app @@ -39,7 +42,7 @@ RUN npm run build WORKDIR /app -# Copy FastAPI and start script +# Copy FastAPI COPY servers/fastapi/ ./servers/fastapi/ COPY start.js LICENSE NOTICE ./ diff --git a/Dockerfile.dev b/Dockerfile.dev index 326057af..db61b4b7 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -2,12 +2,16 @@ FROM python:3.11-slim-bookworm # Install Node.js and npm RUN apt-get update && apt-get install -y \ - nodejs \ - npm \ nginx \ curl \ redis-server + +# Install Node.js 20 using NodeSource repository +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ + apt-get install -y nodejs + + # Change working directory WORKDIR /app diff --git a/docker-compose.yml b/docker-compose.yml index 64798520..81239bba 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,12 +14,15 @@ services: - LLM=${LLM} - OPENAI_API_KEY=${OPENAI_API_KEY} - GOOGLE_API_KEY=${GOOGLE_API_KEY} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - ANTHROPIC_MODEL=${ANTHROPIC_MODEL} - OLLAMA_URL=${OLLAMA_URL} - OLLAMA_MODEL=${OLLAMA_MODEL} - CUSTOM_LLM_URL=${CUSTOM_LLM_URL} - CUSTOM_LLM_API_KEY=${CUSTOM_LLM_API_KEY} - CUSTOM_MODEL=${CUSTOM_MODEL} - PEXELS_API_KEY=${PEXELS_API_KEY} + - EXTENDED_REASONING=${EXTENDED_REASONING} - DATABASE_URL=${DATABASE_URL} production-gpu: @@ -44,12 +47,15 @@ services: - LLM=${LLM} - OPENAI_API_KEY=${OPENAI_API_KEY} - GOOGLE_API_KEY=${GOOGLE_API_KEY} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - ANTHROPIC_MODEL=${ANTHROPIC_MODEL} - OLLAMA_URL=${OLLAMA_URL} - OLLAMA_MODEL=${OLLAMA_MODEL} - CUSTOM_LLM_URL=${CUSTOM_LLM_URL} - CUSTOM_LLM_API_KEY=${CUSTOM_LLM_API_KEY} - CUSTOM_MODEL=${CUSTOM_MODEL} - PEXELS_API_KEY=${PEXELS_API_KEY} + - EXTENDED_REASONING=${EXTENDED_REASONING} - DATABASE_URL=${DATABASE_URL} development: @@ -67,12 +73,15 @@ services: - LLM=${LLM} - OPENAI_API_KEY=${OPENAI_API_KEY} - GOOGLE_API_KEY=${GOOGLE_API_KEY} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - ANTHROPIC_MODEL=${ANTHROPIC_MODEL} - OLLAMA_URL=${OLLAMA_URL} - OLLAMA_MODEL=${OLLAMA_MODEL} - CUSTOM_LLM_URL=${CUSTOM_LLM_URL} - CUSTOM_LLM_API_KEY=${CUSTOM_LLM_API_KEY} - CUSTOM_MODEL=${CUSTOM_MODEL} - PEXELS_API_KEY=${PEXELS_API_KEY} + - EXTENDED_REASONING=${EXTENDED_REASONING} - DATABASE_URL=${DATABASE_URL} development-gpu: @@ -97,10 +106,13 @@ services: - LLM=${LLM} - OPENAI_API_KEY=${OPENAI_API_KEY} - GOOGLE_API_KEY=${GOOGLE_API_KEY} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - ANTHROPIC_MODEL=${ANTHROPIC_MODEL} - OLLAMA_URL=${OLLAMA_URL} - OLLAMA_MODEL=${OLLAMA_MODEL} - CUSTOM_LLM_URL=${CUSTOM_LLM_URL} - CUSTOM_LLM_API_KEY=${CUSTOM_LLM_API_KEY} - CUSTOM_MODEL=${CUSTOM_MODEL} - PEXELS_API_KEY=${PEXELS_API_KEY} + - EXTENDED_REASONING=${EXTENDED_REASONING} - DATABASE_URL=${DATABASE_URL} \ No newline at end of file diff --git a/servers/fastapi/api/v1/ppt/endpoints/presentation.py b/servers/fastapi/api/v1/ppt/endpoints/presentation.py index f7fa7865..6b9cecc7 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/presentation.py +++ b/servers/fastapi/api/v1/ppt/endpoints/presentation.py @@ -19,7 +19,7 @@ from models.pptx_models import PptxPresentationModel from models.presentation_layout import PresentationLayoutModel from models.presentation_structure_model import PresentationStructureModel from models.presentation_with_slides import PresentationWithSlides -from services.get_layout_by_name import get_layout_by_name +from utils.get_layout_by_name import get_layout_by_name from services.icon_finder_service import IconFinderService from services.image_generation_service import ImageGenerationService from utils.dict_utils import deep_update diff --git a/servers/fastapi/chroma/chroma.sqlite3 b/servers/fastapi/chroma/chroma.sqlite3 index 8e0ae9f0..6bcb2901 100644 Binary files a/servers/fastapi/chroma/chroma.sqlite3 and b/servers/fastapi/chroma/chroma.sqlite3 differ diff --git a/servers/fastapi/services/llm_client.py b/servers/fastapi/services/llm_client.py index f41bf767..a5cccfea 100644 --- a/servers/fastapi/services/llm_client.py +++ b/servers/fastapi/services/llm_client.py @@ -26,7 +26,7 @@ from utils.llm_provider import get_llm_provider class LLMClient: def __init__(self): self.llm_provider = get_llm_provider() - self.client = self._get_client() + self._client = self._get_client() # ? Clients def _get_client(self): @@ -100,7 +100,7 @@ class LLMClient: # ? Generate Unstructured Content async def _generate_openai(self, model: str, messages: List[LLMMessage]): - client: AsyncOpenAI = self.client + client: AsyncOpenAI = self._client response = await client.chat.completions.create( model=model, messages=[message.model_dump() for message in messages], @@ -108,7 +108,7 @@ class LLMClient: return response.choices[0].message.content async def _generate_google(self, model: str, messages: List[LLMMessage]): - client: genai.Client = self.client + client: genai.Client = self._client response = await asyncio.to_thread( client.models.generate_content, model=model, @@ -121,7 +121,7 @@ class LLMClient: return response.text async def _generate_anthropic(self, model: str, messages: List[LLMMessage]): - client: AsyncAnthropic = self.client + client: AsyncAnthropic = self._client response: AnthropicMessage = await client.messages.create( model=model, messages=[message.model_dump() for message in messages], @@ -153,11 +153,6 @@ class LLMClient: content = await self._generate_ollama(model, messages) case LLMProvider.CUSTOM: content = await self._generate_custom(model, messages) - case _: - raise HTTPException( - status_code=400, - detail="LLM Provider must be either openai, google, anthropic, ollama, or custom", - ) if content is None: raise HTTPException( status_code=400, @@ -169,7 +164,7 @@ class LLMClient: async def _generate_openai_structured( self, model: str, messages: List[LLMMessage], response_format: BaseModel | dict ): - client: AsyncOpenAI = self.client + client: AsyncOpenAI = self._client is_response_format_dict = isinstance(response_format, dict) if is_response_format_dict: response = await client.chat.completions.create( @@ -203,7 +198,7 @@ class LLMClient: async def _generate_google_structured( self, model: str, messages: List[LLMMessage], response_format: BaseModel | dict ): - client: genai.Client = self.client + client: genai.Client = self._client response = await asyncio.to_thread( client.models.generate_content, model=model, @@ -221,7 +216,7 @@ class LLMClient: async def _generate_anthropic_structured( self, model: str, messages: List[LLMMessage], response_format: BaseModel | dict ): - client: AsyncAnthropic = self.client + client: AsyncAnthropic = self._client is_response_format_dict = isinstance(response_format, dict) response: AnthropicMessage = await client.messages.create( model=model, @@ -279,11 +274,6 @@ class LLMClient: content = await self._generate_custom_structured( model, messages, response_format ) - case _: - raise HTTPException( - status_code=400, - detail="LLM Provider must be either openai, google, anthropic, ollama, or custom", - ) if content is None: raise HTTPException( status_code=400, @@ -293,7 +283,7 @@ class LLMClient: # ? Stream Unstructured Content async def _stream_openai(self, model: str, messages: List[LLMMessage]): - client: AsyncOpenAI = self.client + client: AsyncOpenAI = self._client async with client.chat.completions.stream( model=model, messages=[message.model_dump() for message in messages], @@ -303,7 +293,7 @@ class LLMClient: yield event.delta async def _stream_google(self, model: str, messages: List[LLMMessage]): - client: genai.Client = self.client + client: genai.Client = self._client async for event in iterator_to_async(client.models.generate_content_stream)( model=model, contents=self._get_user_prompts(messages), @@ -316,7 +306,7 @@ class LLMClient: yield event.text async def _stream_anthropic(self, model: str, messages: List[LLMMessage]): - client: AsyncAnthropic = self.client + client: AsyncAnthropic = self._client async with client.messages.stream( model=model, messages=[message.model_dump() for message in messages], @@ -332,7 +322,7 @@ class LLMClient: def _stream_custom(self, model: str, messages: List[LLMMessage]): return self._stream_openai(model, messages) - async def stream(self, model: str, messages: List[LLMMessage]): + def stream(self, model: str, messages: List[LLMMessage]): match self.llm_provider: case LLMProvider.OPENAI: return self._stream_openai(model, messages) @@ -344,17 +334,12 @@ class LLMClient: return self._stream_ollama(model, messages) case LLMProvider.CUSTOM: return self._stream_custom(model, messages) - case _: - raise HTTPException( - status_code=400, - detail="LLM Provider must be either openai, google, anthropic, ollama, or custom", - ) # ? Stream Structured Content async def _stream_openai_structured( self, model: str, messages: List[LLMMessage], response_format: BaseModel | dict ): - client: AsyncOpenAI = self.client + client: AsyncOpenAI = self._client is_response_format_dict = isinstance(response_format, dict) async with client.chat.completions.stream( model=model, @@ -378,7 +363,7 @@ class LLMClient: async def _stream_google_structured( self, model: str, messages: List[LLMMessage], response_format: BaseModel | dict ): - client: genai.Client = self.client + client: genai.Client = self._client async for event in iterator_to_async(client.models.generate_content_stream)( model=model, contents=self._get_user_prompts(messages), @@ -394,7 +379,7 @@ class LLMClient: async def _stream_anthropic_structured( self, model: str, messages: List[LLMMessage], response_format: BaseModel | dict ): - client: AsyncAnthropic = self.client + client: AsyncAnthropic = self._client is_response_format_dict = isinstance(response_format, dict) async with client.messages.stream( model=model, @@ -426,7 +411,7 @@ class LLMClient: ): return self._stream_openai_structured(model, messages, response_format) - async def stream_structured( + def stream_structured( self, model: str, messages: List[LLMMessage], response_format: BaseModel | dict ): match self.llm_provider: @@ -442,8 +427,3 @@ class LLMClient: return self._stream_ollama_structured(model, messages, response_format) case LLMProvider.CUSTOM: return self._stream_custom_structured(model, messages, response_format) - case _: - raise HTTPException( - status_code=400, - detail="LLM Provider must be either openai, google, anthropic, ollama, or custom", - ) diff --git a/servers/fastapi/services/get_layout_by_name.py b/servers/fastapi/utils/get_layout_by_name.py similarity index 100% rename from servers/fastapi/services/get_layout_by_name.py rename to servers/fastapi/utils/get_layout_by_name.py diff --git a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py index 2b7d48ed..ddfabd2d 100644 --- a/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py +++ b/servers/fastapi/utils/llm_calls/generate_presentation_outlines.py @@ -1,18 +1,9 @@ from typing import Optional -from google.genai.types import GenerateContentConfig -from openai.types.chat.chat_completion_chunk import ChoiceDelta -from utils.async_iterator import iterator_to_async +from models.llm_message import LLMMessage +from services.llm_client import LLMClient from utils.get_dynamic_models import get_presentation_outline_model_with_n_slides -from utils.llm_provider import ( - get_anthropic_llm_client, - get_google_llm_client, - get_large_model, - get_llm_client, - is_anthropic_selected, - is_google_selected, -) -from pydantic import BaseModel +from utils.llm_provider import get_large_model system_prompt = """ You are an expert presentation creator. Generate structured presentations based on user requirements and format them according to the specified JSON schema with markdown content. @@ -64,29 +55,19 @@ def get_user_prompt(prompt: str, n_slides: int, language: str, content: str): """ -def get_prompt_template(prompt: str, n_slides: int, language: str, content: str): +def get_messages(prompt: str, n_slides: int, language: str, content: str): return [ - { - "role": "system", - "content": system_prompt, - }, - { - "role": "user", - "content": get_user_prompt(prompt, n_slides, language, content), - }, + LLMMessage( + role="system", + content=system_prompt, + ), + LLMMessage( + role="user", + content=get_user_prompt(prompt, n_slides, language, content), + ), ] -def get_response_format(response_model: BaseModel): - return { - "type": "json_schema", - "json_schema": { - "name": "PresentationOutlineModel", - "schema": response_model.model_json_schema(), - }, - } - - async def generate_ppt_outline( prompt: Optional[str], n_slides: int, @@ -96,29 +77,11 @@ async def generate_ppt_outline( model = get_large_model() response_model = get_presentation_outline_model_with_n_slides(n_slides) - if is_google_selected(): - client = get_google_llm_client() - generate_stream = iterator_to_async(client.models.generate_content_stream) - async for event in generate_stream( - model=model, - contents=[get_user_prompt(prompt, n_slides, language, content)], - config=GenerateContentConfig( - system_instruction=system_prompt, - response_mime_type="application/json", - response_json_schema=response_model.model_json_schema(), - ), - ): - if event.text: - yield event.text + client = LLMClient() - else: - client = get_llm_client() - async for response in await client.chat.completions.create( - model=model, - messages=get_prompt_template(prompt, n_slides, language, content), - stream=True, - response_format=get_response_format(response_model), - ): - delta: ChoiceDelta = response.choices[0].delta - if delta.content: - yield delta.content + async for chunk in client.stream_structured( + model, + get_messages(prompt, n_slides, language, content), + response_model, + ): + yield chunk