diff --git a/CLAUDE.md b/CLAUDE.md index 050e326..61a06aa 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -113,7 +113,7 @@ VITE_MODE=development ## Key Dependencies -- **LLM**: OpenAI `chatgpt-4o-latest` (configurable in `config.py`) +- **LLM**: OpenAI `gpt-4.1` (configurable in `config.py`) - **Embeddings**: OpenAI `text-embedding-3-small` - **Document Parsing**: LlamaParse (requires `LLAMA_CLOUD_API_KEY`) - **Graph DB**: Neo4j (bolt://localhost:7687) diff --git a/README.md b/README.md index d0afc97..72bcd31 100644 --- a/README.md +++ b/README.md @@ -183,7 +183,7 @@ This re-parses all documents, rebuilds the vector index, and regenerates the kno | Layer | Technology | |-------|-----------| -| LLM | OpenAI GPT-4o (`chatgpt-4o-latest`) | +| LLM | OpenAI GPT-4.1 (`gpt-4.1`) | | Embeddings | OpenAI `text-embedding-3-small` | | RAG Framework | LlamaIndex | | Document Parsing | LlamaParse (LlamaCloud) | diff --git a/ai_core.py b/ai_core.py index 743a637..453a78b 100644 --- a/ai_core.py +++ b/ai_core.py @@ -936,7 +936,7 @@ async def initialize_global_index() -> bool: if not hasattr(graph_store, 'communities_built') or not graph_store.communities_built: log_structured('info', 'Building graph communities before creating query engine') try: - # Use gpt-4o-mini model for community summaries (set in GraphRAGStore) + # Use gpt-4.1-mini model for community summaries (set in GraphRAGStore) # The build_communities() method will first try to load from cache # and will only rebuild and re-cache if cache loading fails # It also tracks if communities are already built to avoid duplicate work diff --git a/config.py b/config.py index fb588f0..79b0095 100644 --- a/config.py +++ b/config.py @@ -61,7 +61,7 @@ os.environ["LLAMA_CLOUD_API_KEY"] = LLAMA_CLOUD_API_KEY print(f"OpenAI API key {'is set' if OPENAI_API_KEY else 'is NOT set'}", file=sys.stderr) # --- AI Model Configuration --- -LLM_MODEL = "chatgpt-4o-latest" # Or "gpt-4o" etc. +LLM_MODEL = "gpt-4.1" EMBEDDING_MODEL = "text-embedding-3-small" LLM_TEMPERATURE = 0.3 LLM_TIMEOUT = 300.0 # 5 minutes diff --git a/graph_rag_integration.py b/graph_rag_integration.py index 2ba2fc2..042a33e 100644 --- a/graph_rag_integration.py +++ b/graph_rag_integration.py @@ -219,7 +219,7 @@ class GraphRAGStore: for i, chunk in enumerate(chunks): try: # Use GPT-4o-mini model for better cost efficiency - llm = OpenAI(model="gpt-4o-mini") + llm = OpenAI(model="gpt-4.1-mini") messages = [ ChatMessage( role="system", @@ -237,7 +237,7 @@ class GraphRAGStore: if summaries: final_summary_text = "\n\n".join(summaries) try: - llm = OpenAI(model="gpt-4o-mini") + llm = OpenAI(model="gpt-4.1-mini") messages = [ ChatMessage( role="system", @@ -257,7 +257,7 @@ class GraphRAGStore: # For normal size text, use the larger model directly try: # Use GPT-4o-mini model for better cost efficiency - llm = OpenAI(model="gpt-4o-mini") + llm = OpenAI(model="gpt-4.1-mini") messages = [ ChatMessage( role="system", @@ -825,9 +825,9 @@ def generate_final_answer(query, retrieval_result, llm): # If no model was provided or we're forcing to use a specific model if llm is None or not hasattr(llm, 'chat'): - # Fallback to gpt-4o-mini for better cost efficiency - llm = OpenAI(model="gpt-4o-mini") - log_structured('info', 'Using gpt-4o-mini model for final answer generation') + # Fallback to gpt-4.1-mini for better cost efficiency + llm = OpenAI(model="gpt-4.1-mini") + log_structured('info', 'Using gpt-4.1-mini model for final answer generation') prompt = f""" Based on the following information from two different sources, please answer this question: {query} diff --git a/mongodb_utils.py b/mongodb_utils.py index fa47310..31136b7 100644 --- a/mongodb_utils.py +++ b/mongodb_utils.py @@ -327,7 +327,7 @@ def generate_conversation_title(conversation_id: str, content: List[Dict]) -> Op # Create LLM instance llm = LlamaOpenAI( - model="chatgpt-4o-latest", + model="gpt-4.1", temperature=0.3, )