diff --git a/.dockerignore b/.dockerignore index c0e3eddd..616ca25a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,4 +6,5 @@ out build .git .gitignore -tmp \ No newline at end of file +tmp +debug \ No newline at end of file diff --git a/.gitignore b/.gitignore index e55e0c33..6d9d48ad 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ __pycache__ node_modules out user_data -tmp \ No newline at end of file +tmp +debug \ No newline at end of file diff --git a/Dockerfile.dev b/Dockerfile.dev index 58c401ef..1bf2c4db 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -24,9 +24,8 @@ RUN curl -fsSL https://ollama.com/install.sh | sh COPY servers/fastapi/requirements.txt ./ RUN pip install -r requirements.txt - # Install dependencies for Next.js -WORKDIR /app/servers/nextjs +WORKDIR /node_dependencies COPY servers/nextjs/package.json servers/nextjs/package-lock.json ./ RUN npm install @@ -40,4 +39,10 @@ COPY nginx.conf /etc/nginx/nginx.conf EXPOSE 80 3000 8000 # Start the servers -CMD ["/bin/bash", "-c", "ollama serve & service nginx start & service redis-server start && node /app/start.js"] \ No newline at end of file +CMD ["/bin/bash", "-c", "\ + rm -rf /app/servers/nextjs/node_modules && \ + ln -s /node_dependencies/node_modules /app/servers/nextjs/node_modules && \ + ollama serve & \ + service nginx start & \ + service redis-server start && \ + node /app/start.js"] \ No newline at end of file diff --git a/servers/fastapi/api/main.py b/servers/fastapi/api/main.py index 56fba941..960ce692 100644 --- a/servers/fastapi/api/main.py +++ b/servers/fastapi/api/main.py @@ -8,7 +8,8 @@ from contextlib import asynccontextmanager from api.routers.presentation.router import presentation_router from api.services.database import sql_engine from api.utils.supported_ollama_models import SUPPORTED_OLLAMA_MODELS -from api.utils.utils import is_ollama_selected, update_env_with_user_config +from api.utils.utils import update_env_with_user_config +from api.utils.model_utils import is_ollama_selected can_change_keys = os.getenv("CAN_CHANGE_KEYS") != "false" diff --git a/servers/fastapi/api/routers/presentation/handlers/edit.py b/servers/fastapi/api/routers/presentation/handlers/edit.py index 5e5aa1d5..2706b53f 100644 --- a/servers/fastapi/api/routers/presentation/handlers/edit.py +++ b/servers/fastapi/api/routers/presentation/handlers/edit.py @@ -14,8 +14,8 @@ from api.utils.supported_ollama_models import SUPPORTED_OLLAMA_MODELS from api.utils.utils import ( get_presentation_dir, get_presentation_images_dir, - is_ollama_selected, ) +from api.utils.model_utils import is_ollama_selected from image_processor.icons_vectorstore_utils import get_icons_vectorstore from image_processor.images_finder import generate_image from image_processor.icons_finder import get_icon diff --git a/servers/fastapi/api/routers/presentation/handlers/generate_data.py b/servers/fastapi/api/routers/presentation/handlers/generate_data.py index a6408592..f3c6edd7 100644 --- a/servers/fastapi/api/routers/presentation/handlers/generate_data.py +++ b/servers/fastapi/api/routers/presentation/handlers/generate_data.py @@ -11,7 +11,7 @@ from api.routers.presentation.models import PresentationGenerateRequest from api.services.logging import LoggingService from api.sql_models import KeyValueSqlModel, PresentationSqlModel from api.services.database import get_sql_session -from api.utils.utils import is_ollama_selected +from api.utils.model_utils import is_ollama_selected from ppt_config_generator.models import PresentationMarkdownModel, SlideStructureModel from ppt_config_generator.structure_generator import generate_presentation_structure diff --git a/servers/fastapi/api/routers/presentation/handlers/generate_presentation.py b/servers/fastapi/api/routers/presentation/handlers/generate_presentation.py index 692b09ab..c3511f77 100644 --- a/servers/fastapi/api/routers/presentation/handlers/generate_presentation.py +++ b/servers/fastapi/api/routers/presentation/handlers/generate_presentation.py @@ -17,7 +17,8 @@ from api.services.database import get_sql_session from api.services.instances import TEMP_FILE_SERVICE from api.services.logging import LoggingService from api.sql_models import PresentationSqlModel, SlideSqlModel -from api.utils.utils import get_presentation_dir, is_ollama_selected +from api.utils.utils import get_presentation_dir +from api.utils.model_utils import is_ollama_selected from document_processor.loader import DocumentsLoader from ppt_config_generator.document_summary_generator import generate_document_summary from ppt_config_generator.models import PresentationMarkdownModel diff --git a/servers/fastapi/api/routers/presentation/handlers/generate_stream.py b/servers/fastapi/api/routers/presentation/handlers/generate_stream.py index 60dee4a5..78d44aa5 100644 --- a/servers/fastapi/api/routers/presentation/handlers/generate_stream.py +++ b/servers/fastapi/api/routers/presentation/handlers/generate_stream.py @@ -17,7 +17,8 @@ from api.routers.presentation.models import ( from api.services.database import get_sql_session from api.services.logging import LoggingService from api.sql_models import KeyValueSqlModel, PresentationSqlModel, SlideSqlModel -from api.utils.utils import get_presentation_dir, is_ollama_selected +from api.utils.utils import get_presentation_dir +from api.utils.model_utils import is_ollama_selected from ppt_config_generator.models import ( PresentationMarkdownModel, PresentationStructureModel, diff --git a/servers/fastapi/api/utils/model_utils.py b/servers/fastapi/api/utils/model_utils.py index e642e066..1d13ad67 100644 --- a/servers/fastapi/api/utils/model_utils.py +++ b/servers/fastapi/api/utils/model_utils.py @@ -1,5 +1,7 @@ import os +from openai import AsyncOpenAI + from api.models import SelectedLLMProvider @@ -15,38 +17,56 @@ def get_model_base_url(): selected_llm = get_selected_llm_provider() if selected_llm == SelectedLLMProvider.OLLAMA: - return "http://localhost:11434" + return "http://localhost:11434/v1" elif selected_llm == SelectedLLMProvider.OPENAI: return "https://api.openai.com/v1" else: return "https://generativelanguage.googleapis.com/v1beta/openai" +def get_llm_api_key(): + selected_llm = get_selected_llm_provider() + if selected_llm == SelectedLLMProvider.OPENAI: + return os.getenv("OPENAI_API_KEY") + elif selected_llm == SelectedLLMProvider.GOOGLE: + return os.getenv("GOOGLE_API_KEY") + else: + return "ollama" + + +def get_llm_client(): + client = AsyncOpenAI( + base_url=get_model_base_url(), + api_key=get_llm_api_key(), + ) + return client + + def get_large_model(): selected_llm = get_selected_llm_provider() if selected_llm == SelectedLLMProvider.OPENAI: - return ChatOpenAI(model="gpt-4.1") + return "gpt-4.1" elif selected_llm == SelectedLLMProvider.GOOGLE: - return ChatGoogleGenerativeAI(model="gemini-2.0-flash") + return "gemini-2.0-flash" else: - return ChatOllama(model=os.getenv("OLLAMA_MODEL"), temperature=0.8) + return os.getenv("OLLAMA_MODEL") def get_small_model(): - selected_llm = os.getenv("LLM") - if selected_llm == "openai": - return ChatOpenAI(model="gpt-4.1-mini") - elif selected_llm == "google": - return ChatGoogleGenerativeAI(model="gemini-2.0-flash") + selected_llm = get_selected_llm_provider() + if selected_llm == SelectedLLMProvider.OPENAI: + return "gpt-4.1-mini" + elif selected_llm == SelectedLLMProvider.GOOGLE: + return "gemini-2.0-flash" else: - return ChatOllama(model=os.getenv("OLLAMA_MODEL"), temperature=0.8) + return os.getenv("OLLAMA_MODEL") def get_nano_model(): - selected_llm = os.getenv("LLM") - if selected_llm == "openai": - return ChatOpenAI(model="gpt-4.1-nano") - elif selected_llm == "google": - return ChatGoogleGenerativeAI(model="gemini-2.0-flash") + selected_llm = get_selected_llm_provider() + if selected_llm == SelectedLLMProvider.OPENAI: + return "gpt-4.1-nano" + elif selected_llm == SelectedLLMProvider.GOOGLE: + return "gemini-2.0-flash" else: - return ChatOllama(model=os.getenv("OLLAMA_MODEL"), temperature=0.8) + return os.getenv("OLLAMA_MODEL") diff --git a/servers/fastapi/image_processor/images_finder.py b/servers/fastapi/image_processor/images_finder.py index 9c7dfd12..231d6576 100644 --- a/servers/fastapi/image_processor/images_finder.py +++ b/servers/fastapi/image_processor/images_finder.py @@ -9,7 +9,8 @@ from openai import OpenAI from ppt_generator.models.query_and_prompt_models import ( ImagePromptWithThemeAndAspectRatio, ) -from api.utils.utils import download_file, get_resource, is_ollama_selected +from api.utils.utils import download_file, get_resource +from api.utils.model_utils import is_ollama_selected async def generate_image( diff --git a/servers/fastapi/ppt_config_generator/document_summary_generator.py b/servers/fastapi/ppt_config_generator/document_summary_generator.py index 8e0c7943..a699e72a 100644 --- a/servers/fastapi/ppt_config_generator/document_summary_generator.py +++ b/servers/fastapi/ppt_config_generator/document_summary_generator.py @@ -1,14 +1,10 @@ import asyncio -import os from typing import List from langchain_core.documents import Document -from langchain_google_genai import ChatGoogleGenerativeAI -from langchain_openai import ChatOpenAI -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.messages import BaseMessage from langchain_text_splitters import CharacterTextSplitter +from openai.types.chat.chat_completion import ChatCompletion -from api.utils.utils import get_nano_model +from api.utils.model_utils import get_llm_client, get_nano_model sysmte_prompt = """ Generate a blog-style summary of the provided document in **more than 2000 words**. @@ -26,26 +22,28 @@ Maintain as much information as possible. - If **slides structure is mentioned** in document, structure the summary in the same way. """ -prompt_template = ChatPromptTemplate.from_messages( - [ - ("system", sysmte_prompt), - ("user", "{text}"), - ] -) - async def generate_document_summary(documents: List[Document]): + client = get_llm_client() model = get_nano_model() + text_splitter = CharacterTextSplitter(chunk_size=200000, chunk_overlap=0) - chain = prompt_template | model coroutines = [] for document in documents: text = document.page_content truncated_text = text_splitter.split_text(text)[0] - coroutine = chain.ainvoke({"text": truncated_text}) + coroutine = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": sysmte_prompt}, + {"role": "user", "content": truncated_text}, + ], + ) coroutines.append(coroutine) - completions: List[BaseMessage] = await asyncio.gather(*coroutines) - combined = "\n\n\n\n".join([completion.content for completion in completions]) + completions: List[ChatCompletion] = await asyncio.gather(*coroutines) + combined = "\n\n\n\n".join( + [completion.choices[0].message.content for completion in completions] + ) return combined diff --git a/servers/fastapi/ppt_config_generator/ppt_outlines_generator.py b/servers/fastapi/ppt_config_generator/ppt_outlines_generator.py index 246768af..3bfc17f3 100644 --- a/servers/fastapi/ppt_config_generator/ppt_outlines_generator.py +++ b/servers/fastapi/ppt_config_generator/ppt_outlines_generator.py @@ -1,7 +1,8 @@ from typing import Optional from langchain_core.prompts import ChatPromptTemplate +from langchain_ollama import ChatOllama -from api.utils.utils import get_large_model +from api.utils.model_utils import get_large_model from api.utils.variable_length_models import ( get_presentation_markdown_model_with_n_slides, ) @@ -64,7 +65,7 @@ async def generate_ppt_content( language: Optional[str] = None, content: Optional[str] = None, ) -> PresentationMarkdownModel: - model = get_large_model() + model = ChatOllama(model=get_large_model(), temperature=0.8) response_model = get_presentation_markdown_model_with_n_slides(n_slides) chain = get_prompt_template() | model.with_structured_output( diff --git a/servers/fastapi/ppt_config_generator/structure_generator.py b/servers/fastapi/ppt_config_generator/structure_generator.py index 28f3c8f6..98275a6b 100644 --- a/servers/fastapi/ppt_config_generator/structure_generator.py +++ b/servers/fastapi/ppt_config_generator/structure_generator.py @@ -1,6 +1,7 @@ from langchain_core.prompts import ChatPromptTemplate +from langchain_ollama import ChatOllama -from api.utils.utils import get_small_model +from api.utils.model_utils import get_small_model from api.utils.variable_length_models import ( get_presentation_structure_model_with_n_slides, ) @@ -59,7 +60,7 @@ async def generate_presentation_structure( presentation_outline: PresentationMarkdownModel, ) -> PresentationStructureModel: - model = get_small_model() + model = ChatOllama(model=get_small_model(), temperature=0.8) response_model = get_presentation_structure_model_with_n_slides( len(presentation_outline.slides) ) diff --git a/servers/fastapi/ppt_generator/fix_validation_errors.py b/servers/fastapi/ppt_generator/fix_validation_errors.py index 73a4c15e..02a60ff0 100644 --- a/servers/fastapi/ppt_generator/fix_validation_errors.py +++ b/servers/fastapi/ppt_generator/fix_validation_errors.py @@ -1,12 +1,11 @@ import os from typing import Optional from fastapi import HTTPException -from langchain_google_genai import ChatGoogleGenerativeAI -from langchain_openai import ChatOpenAI +from langchain_ollama import ChatOllama from langchain_core.prompts import ChatPromptTemplate from pydantic import BaseModel, ValidationError -from api.utils.utils import get_large_model +from api.utils.model_utils import get_large_model def get_prompt_template(): @@ -41,7 +40,7 @@ def get_prompt_template(): async def fix_validation_errors(response_model: BaseModel, response, errors): - model = get_large_model() + model = ChatOllama(model=get_large_model(), temperature=0.8) chain = get_prompt_template() | model.with_structured_output( response_model.model_json_schema() diff --git a/servers/fastapi/ppt_generator/generator.py b/servers/fastapi/ppt_generator/generator.py index 76cd22f1..82ba9bf5 100644 --- a/servers/fastapi/ppt_generator/generator.py +++ b/servers/fastapi/ppt_generator/generator.py @@ -5,7 +5,8 @@ from langchain_core.messages import ( AIMessageChunk, AIMessage, ) -from api.utils.utils import get_large_model +from langchain_ollama import ChatOllama +from api.utils.model_utils import get_large_model from ppt_config_generator.models import PresentationMarkdownModel from ppt_generator.models.llm_models_with_validations import ( LLMPresentationModelWithValidation, @@ -91,7 +92,7 @@ def get_model_and_messages( presentation_outline: PresentationMarkdownModel, ): user_message = HumanMessage(presentation_outline.to_string()) - model = get_large_model() + model = ChatOllama(model=get_large_model(), temperature=0.8) return model, system_prompt, user_message diff --git a/servers/fastapi/ppt_generator/models/llm_models_with_validations.py b/servers/fastapi/ppt_generator/models/llm_models_with_validations.py index 685d2c21..638d853f 100644 --- a/servers/fastapi/ppt_generator/models/llm_models_with_validations.py +++ b/servers/fastapi/ppt_generator/models/llm_models_with_validations.py @@ -34,48 +34,34 @@ from ppt_generator.models.llm_models import ( class LLMHeadingModelWithValidation(LLMHeadingModel): heading: str = Field( - description="List item heading to show in slide body", - min_length=10, - max_length=30, + description="List item heading to show in slide body in less than 5 words.", ) description: str = Field( description="Description of list item in less than 20 words.", - min_length=80, - max_length=150, ) class LLMHeadingModelWithImagePromptWithValidation(LLMHeadingModelWithImagePrompt): image_prompt: str = Field( - description="Prompt used to generate image for this item", - min_length=10, - max_length=50, + description="Prompt used to generate image for this item in less than 6 words.", ) class LLMHeadingModelWithIconQueryWithValidation(LLMHeadingModelWithIconQuery): icon_query: str = Field( - description="Icon query to generate icon for this item", - min_length=10, - max_length=50, + description="Icon query to generate icon for this item in less than 4 words.", ) class LLMType1ContentWithValidation(LLMType1Content): title: str = Field( - description="Title of the slide", - min_length=10, - max_length=50, + description="Title of the slide in less than 6 words.", ) body: str = Field( description="Slide content summary in less than 30 words.", - min_length=100, - max_length=200, ) image_prompt: str = Field( - description="Prompt used to generate image for this slide.", - min_length=10, - max_length=50, + description="Prompt used to generate image for this slide in less than 6 words.", ) @classmethod @@ -85,9 +71,7 @@ class LLMType1ContentWithValidation(LLMType1Content): class LLMType2ContentWithValidation(LLMType2Content): title: str = Field( - description="Title of the slide", - min_length=10, - max_length=50, + description="Title of the slide in less than 6 words.", ) body: List[LLMHeadingModelWithValidation] = Field( description="List items to show in slide's body", @@ -106,9 +90,7 @@ class LLMType2ContentWithValidation(LLMType2Content): class LLMType3ContentWithValidation(LLMType3Content): title: str = Field( - description="Title of the slide", - min_length=10, - max_length=50, + description="Title of the slide in less than 6 words.", ) body: List[LLMHeadingModelWithValidation] = Field( description="List items to show in slide's body", @@ -116,9 +98,7 @@ class LLMType3ContentWithValidation(LLMType3Content): max_length=3, ) image_prompt: str = Field( - description="Prompt used to generate image for this slide", - min_length=10, - max_length=50, + description="Prompt used to generate image for this slide in less than 6 words.", ) @classmethod @@ -132,9 +112,7 @@ class LLMType3ContentWithValidation(LLMType3Content): class LLMType4ContentWithValidation(LLMType4Content): title: str = Field( - description="Title of the slide", - min_length=10, - max_length=50, + description="Title of the slide in less than 6 words.", ) body: List[LLMHeadingModelWithImagePromptWithValidation] = Field( description="List items to show in slide's body", @@ -153,14 +131,10 @@ class LLMType4ContentWithValidation(LLMType4Content): class LLMType5ContentWithValidation(LLMType5Content): title: str = Field( - description="Title of the slide", - min_length=10, - max_length=50, + description="Title of the slide in less than 6 words.", ) body: str = Field( description="Slide content summary in less than 30 words.", - min_length=100, - max_length=250, ) graph: GraphModel = Field(description="Graph to show in slide") @@ -171,14 +145,10 @@ class LLMType5ContentWithValidation(LLMType5Content): class LLMType6ContentWithValidation(LLMType6Content): title: str = Field( - description="Title of the slide", - min_length=10, - max_length=50, + description="Title of the slide in less than 6 words.", ) description: str = Field( description="Slide content summary in less than 20 words.", - min_length=80, - max_length=150, ) body: List[LLMHeadingModelWithValidation] = Field( description="List items to show in slide's body", @@ -197,9 +167,7 @@ class LLMType6ContentWithValidation(LLMType6Content): class LLMType7ContentWithValidation(LLMType7Content): title: str = Field( - description="Title of the slide", - min_length=10, - max_length=50, + description="Title of the slide in less than 6 words.", ) body: List[LLMHeadingModelWithIconQueryWithValidation] = Field( description="List items to show in slide's body", @@ -218,14 +186,10 @@ class LLMType7ContentWithValidation(LLMType7Content): class LLMType8ContentWithValidation(LLMType8Content): title: str = Field( - description="Title of the slide", - min_length=10, - max_length=50, + description="Title of the slide in less than 6 words.", ) description: str = Field( description="Slide content summary in less than 20 words.", - min_length=80, - max_length=150, ) body: List[LLMHeadingModelWithImagePromptWithValidation] = Field( description="List items to show in slide's body", @@ -244,9 +208,7 @@ class LLMType8ContentWithValidation(LLMType8Content): class LLMType9ContentWithValidation(LLMType9Content): title: str = Field( - description="Title of the slide", - min_length=10, - max_length=50, + description="Title of the slide in less than 6 words.", ) body: List[LLMHeadingModelWithValidation] = Field( description="List items to show in slide's body", diff --git a/servers/fastapi/ppt_generator/slide_generator.py b/servers/fastapi/ppt_generator/slide_generator.py index 19e52af5..45512a55 100644 --- a/servers/fastapi/ppt_generator/slide_generator.py +++ b/servers/fastapi/ppt_generator/slide_generator.py @@ -1,5 +1,8 @@ from typing import Optional -from api.utils.utils import get_large_model, get_small_model + +from langchain_ollama import ChatOllama +from openai import OpenAI +from api.utils.model_utils import get_large_model, get_llm_client, get_small_model from ppt_config_generator.models import SlideMarkdownModel from ppt_generator.fix_validation_errors import get_validated_response @@ -16,42 +19,43 @@ from ppt_generator.models.other_models import SlideTypeModel from ppt_generator.models.slide_model import SlideModel -prompt_template_to_generate_slide_content = ChatPromptTemplate.from_messages( - [ - ( - "system", - """ - Generate structured slide based on provided title and outline, follow mentioned steps and notes and provide structured output. +def get_prompt_to_generate_slide_content( + title: str, outline: str, notes: Optional[str] = None +): + return [ + { + "role": "system", + "content": f""" + Generate structured slide based on provided title and outline, follow mentioned steps and notes and provide structured output. - # Steps - 1. Analyze the outline and title. - 2. Generate structured slide based on the outline and title. - 3. Generate image prompts and icon queries if mentioned in schema. - 4. Generate graph if mentioned in schema. + # Steps + 1. Analyze the outline and title. + 2. Generate structured slide based on the outline and title. + 3. Generate image prompts and icon queries if mentioned in schema. + 4. Generate graph if mentioned in schema. - # Notes - - Slide body should not use words like "This slide", "This presentation". - - Rephrase the slide body to make it flow naturally. - - Do not use markdown formatting in slide body. - - **Icon query** must be a generic single word noun. - - **Image prompt** should be a 2-3 words phrase. - - Try to make paragraphs as short as possible. - {notes} + # Notes + - Slide body should not use words like "This slide", "This presentation". + - Rephrase the slide body to make it flow naturally. + - Do not use markdown formatting in slide body. + - **Icon query** must be a generic single word noun. + - **Image prompt** should be a 2-3 words phrase. + - Try to make paragraphs as short as possible. + {notes} """, - ), - ( - "user", - """ - ## Slide Title - {title} + }, + { + "role": "user", + "content": f""" + ## Slide Title + {title} - ## Slide Outline - {outline} - """, - ), + ## Slide Outline + {outline} + """, + }, ] -) prompt_template_to_edit_slide_content = ChatPromptTemplate.from_messages( @@ -126,22 +130,26 @@ prompt_template_to_select_slide_type = ChatPromptTemplate.from_messages( async def get_slide_content_from_type_and_outline( slide_type: int, outline: SlideMarkdownModel ) -> LLMSlideContentModel: - content_type_model_type = LLM_CONTENT_TYPE_WITH_VALIDATION_MAPPING[slide_type] - validation_model = LLM_CONTENT_TYPE_MAPPING[slide_type] - model = get_small_model().with_structured_output( - content_type_model_type.model_json_schema() - ) - chain = prompt_template_to_generate_slide_content | model + response_model = LLM_CONTENT_TYPE_WITH_VALIDATION_MAPPING[slide_type] - return await get_validated_response( - chain, - { - "title": outline.title, - "outline": outline.body, - "notes": content_type_model_type.get_notes(), - }, - content_type_model_type, - validation_model, + client = get_llm_client() + model = get_small_model() + + response = await client.beta.chat.completions.parse( + model=model, + messages=get_prompt_to_generate_slide_content( + outline.title, + outline.body, + response_model.get_notes(), + ), + response_format=response_model, + ) + + with open("debug/llm_response.json", "w") as f: + f.write(response.choices[0].message.content) + + return LLM_CONTENT_TYPE_MAPPING[slide_type].model_validate_json( + response.choices[0].message.content ) @@ -152,7 +160,7 @@ async def get_edited_slide_content_model( theme: Optional[dict] = None, language: Optional[str] = None, ): - model = get_large_model() + model = ChatOllama(model=get_large_model(), temperature=0.8) content_type_model_type = LLM_CONTENT_TYPE_WITH_VALIDATION_MAPPING[slide_type] validation_model = LLM_CONTENT_TYPE_MAPPING[slide_type] @@ -181,7 +189,7 @@ async def get_slide_type_from_prompt( slide: SlideModel, ) -> SlideTypeModel: - model = get_small_model() + model = ChatOllama(model=get_small_model(), temperature=0.8) chain = prompt_template_to_select_slide_type | model.with_structured_output( SlideTypeModel.model_json_schema() diff --git a/servers/fastapi/requirements.txt b/servers/fastapi/requirements.txt index 113c5fbf..6b34832b 100644 --- a/servers/fastapi/requirements.txt +++ b/servers/fastapi/requirements.txt @@ -65,7 +65,7 @@ mypy_extensions==1.1.0 numpy==2.2.5 ollama==0.5.1 onnxruntime==1.22.0 -openai==1.78.1 +openai==1.91.0 orjson==3.10.18 packaging==24.2 pdfminer.six==20250327 diff --git a/servers/fastapi/server.py b/servers/fastapi/server.py index 40db9df9..aba13788 100644 --- a/servers/fastapi/server.py +++ b/servers/fastapi/server.py @@ -1,3 +1,4 @@ +import os import uvicorn import argparse @@ -8,6 +9,8 @@ from api.main import app app if __name__ == "__main__": + os.makedirs("debug", exist_ok=True) + parser = argparse.ArgumentParser(description="Run the FastAPI server") parser.add_argument( "--port", type=int, required=True, help="Port number to run the server on" diff --git a/servers/fastapi/server_autoreload.py b/servers/fastapi/server_autoreload.py index 4d5c00de..b6266161 100644 --- a/servers/fastapi/server_autoreload.py +++ b/servers/fastapi/server_autoreload.py @@ -1,8 +1,11 @@ +import os import uvicorn import argparse if __name__ == "__main__": + os.makedirs("debug", exist_ok=True) + parser = argparse.ArgumentParser(description="Run the FastAPI server") parser.add_argument( "--port", type=int, required=True, help="Port number to run the server on" diff --git a/servers/fastapi/server_test.py b/servers/fastapi/server_test.py index 4b547a5c..ed7fd67e 100644 --- a/servers/fastapi/server_test.py +++ b/servers/fastapi/server_test.py @@ -1,9 +1,12 @@ +import os import uvicorn from dotenv import load_dotenv load_dotenv() if __name__ == "__main__": + os.makedirs("debug", exist_ok=True) + uvicorn.run( "api.main:app", host="0.0.0.0", port=8000, log_level="info", reload=True )