Update OpenAI models to gpt-4.1 and gpt-4.1-mini

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 15:13:41 -06:00 · 2026-02-23 15:13:41 -06:00 · 49b0ba9c74
commit 49b0ba9c74
parent 236d1ddbd8
6 changed files with 11 additions and 11 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -113,7 +113,7 @@ VITE_MODE=development

 ## Key Dependencies

- **LLM**: OpenAI `chatgpt-4o-latest` (configurable in `config.py`)
+- **LLM**: OpenAI `gpt-4.1` (configurable in `config.py`)
 - **Embeddings**: OpenAI `text-embedding-3-small`
 - **Document Parsing**: LlamaParse (requires `LLAMA_CLOUD_API_KEY`)
 - **Graph DB**: Neo4j (bolt://localhost:7687)
--- a/README.md
+++ b/README.md
@ -183,7 +183,7 @@ This re-parses all documents, rebuilds the vector index, and regenerates the kno

 | Layer | Technology |
 |-------|-----------|
-| LLM | OpenAI GPT-4o (`chatgpt-4o-latest`) |
+| LLM | OpenAI GPT-4.1 (`gpt-4.1`) |
 | Embeddings | OpenAI `text-embedding-3-small` |
 | RAG Framework | LlamaIndex |
 | Document Parsing | LlamaParse (LlamaCloud) |
--- a/ai_core.py
+++ b/ai_core.py
@ -936,7 +936,7 @@ async def initialize_global_index() -> bool:
                if not hasattr(graph_store, 'communities_built') or not graph_store.communities_built:
                    log_structured('info', 'Building graph communities before creating query engine')
                    try:
-                        # Use gpt-4o-mini model for community summaries (set in GraphRAGStore)
+                        # Use gpt-4.1-mini model for community summaries (set in GraphRAGStore)
                        # The build_communities() method will first try to load from cache
                        # and will only rebuild and re-cache if cache loading fails
                        # It also tracks if communities are already built to avoid duplicate work
--- a/config.py
+++ b/config.py
@ -61,7 +61,7 @@ os.environ["LLAMA_CLOUD_API_KEY"] = LLAMA_CLOUD_API_KEY
 print(f"OpenAI API key {'is set' if OPENAI_API_KEY else 'is NOT set'}", file=sys.stderr)

 # --- AI Model Configuration ---
-LLM_MODEL = "chatgpt-4o-latest" # Or "gpt-4o" etc.
+LLM_MODEL = "gpt-4.1"
 EMBEDDING_MODEL = "text-embedding-3-small"
 LLM_TEMPERATURE = 0.3
 LLM_TIMEOUT = 300.0  # 5 minutes
--- a/graph_rag_integration.py
+++ b/graph_rag_integration.py
@ -219,7 +219,7 @@ class GraphRAGStore:
            for i, chunk in enumerate(chunks):
                try:
                    # Use GPT-4o-mini model for better cost efficiency
-                    llm = OpenAI(model="gpt-4o-mini")
+                    llm = OpenAI(model="gpt-4.1-mini")
                    messages = [
                        ChatMessage(
                            role="system",
@ -237,7 +237,7 @@ class GraphRAGStore:
            if summaries:
                final_summary_text = "\n\n".join(summaries)
                try:
-                    llm = OpenAI(model="gpt-4o-mini")
+                    llm = OpenAI(model="gpt-4.1-mini")
                    messages = [
                        ChatMessage(
                            role="system",
@ -257,7 +257,7 @@ class GraphRAGStore:
        # For normal size text, use the larger model directly
        try:
            # Use GPT-4o-mini model for better cost efficiency
-            llm = OpenAI(model="gpt-4o-mini")
+            llm = OpenAI(model="gpt-4.1-mini")
            messages = [
                ChatMessage(
                    role="system",
@ -825,9 +825,9 @@ def generate_final_answer(query, retrieval_result, llm):
    
    # If no model was provided or we're forcing to use a specific model
    if llm is None or not hasattr(llm, 'chat'):
-        # Fallback to gpt-4o-mini for better cost efficiency
-        llm = OpenAI(model="gpt-4o-mini")
-        log_structured('info', 'Using gpt-4o-mini model for final answer generation')
+        # Fallback to gpt-4.1-mini for better cost efficiency
+        llm = OpenAI(model="gpt-4.1-mini")
+        log_structured('info', 'Using gpt-4.1-mini model for final answer generation')
    
    prompt = f"""
    Based on the following information from two different sources, please answer this question: {query}
--- a/mongodb_utils.py
+++ b/mongodb_utils.py
@ -327,7 +327,7 @@ def generate_conversation_title(conversation_id: str, content: List[Dict]) -> Op
        
        # Create LLM instance
        llm = LlamaOpenAI(
-            model="chatgpt-4o-latest",
+            model="gpt-4.1",
            temperature=0.3,
        )