diff --git a/CLAUDE.md b/CLAUDE.md
index 050e326..61a06aa 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -113,7 +113,7 @@ VITE_MODE=development
 
 ## Key Dependencies
 
-- **LLM**: OpenAI `chatgpt-4o-latest` (configurable in `config.py`)
+- **LLM**: OpenAI `gpt-4.1` (configurable in `config.py`)
 - **Embeddings**: OpenAI `text-embedding-3-small`
 - **Document Parsing**: LlamaParse (requires `LLAMA_CLOUD_API_KEY`)
 - **Graph DB**: Neo4j (bolt://localhost:7687)
diff --git a/README.md b/README.md
index d0afc97..72bcd31 100644
--- a/README.md
+++ b/README.md
@@ -183,7 +183,7 @@ This re-parses all documents, rebuilds the vector index, and regenerates the kno
 
 | Layer | Technology |
 |-------|-----------|
-| LLM | OpenAI GPT-4o (`chatgpt-4o-latest`) |
+| LLM | OpenAI GPT-4.1 (`gpt-4.1`) |
 | Embeddings | OpenAI `text-embedding-3-small` |
 | RAG Framework | LlamaIndex |
 | Document Parsing | LlamaParse (LlamaCloud) |
diff --git a/ai_core.py b/ai_core.py
index 743a637..453a78b 100644
--- a/ai_core.py
+++ b/ai_core.py
@@ -936,7 +936,7 @@ async def initialize_global_index() -> bool:
                 if not hasattr(graph_store, 'communities_built') or not graph_store.communities_built:
                     log_structured('info', 'Building graph communities before creating query engine')
                     try:
-                        # Use gpt-4o-mini model for community summaries (set in GraphRAGStore)
+                        # Use gpt-4.1-mini model for community summaries (set in GraphRAGStore)
                         # The build_communities() method will first try to load from cache
                         # and will only rebuild and re-cache if cache loading fails
                         # It also tracks if communities are already built to avoid duplicate work
diff --git a/config.py b/config.py
index fb588f0..79b0095 100644
--- a/config.py
+++ b/config.py
@@ -61,7 +61,7 @@ os.environ["LLAMA_CLOUD_API_KEY"] = LLAMA_CLOUD_API_KEY
 print(f"OpenAI API key {'is set' if OPENAI_API_KEY else 'is NOT set'}", file=sys.stderr)
 
 # --- AI Model Configuration ---
-LLM_MODEL = "chatgpt-4o-latest" # Or "gpt-4o" etc.
+LLM_MODEL = "gpt-4.1"
 EMBEDDING_MODEL = "text-embedding-3-small"
 LLM_TEMPERATURE = 0.3
 LLM_TIMEOUT = 300.0  # 5 minutes
diff --git a/graph_rag_integration.py b/graph_rag_integration.py
index 2ba2fc2..042a33e 100644
--- a/graph_rag_integration.py
+++ b/graph_rag_integration.py
@@ -219,7 +219,7 @@ class GraphRAGStore:
             for i, chunk in enumerate(chunks):
                 try:
                     # Use GPT-4o-mini model for better cost efficiency
-                    llm = OpenAI(model="gpt-4o-mini")
+                    llm = OpenAI(model="gpt-4.1-mini")
                     messages = [
                         ChatMessage(
                             role="system",
@@ -237,7 +237,7 @@ class GraphRAGStore:
             if summaries:
                 final_summary_text = "\n\n".join(summaries)
                 try:
-                    llm = OpenAI(model="gpt-4o-mini")
+                    llm = OpenAI(model="gpt-4.1-mini")
                     messages = [
                         ChatMessage(
                             role="system",
@@ -257,7 +257,7 @@ class GraphRAGStore:
         # For normal size text, use the larger model directly
         try:
             # Use GPT-4o-mini model for better cost efficiency
-            llm = OpenAI(model="gpt-4o-mini")
+            llm = OpenAI(model="gpt-4.1-mini")
             messages = [
                 ChatMessage(
                     role="system",
@@ -825,9 +825,9 @@ def generate_final_answer(query, retrieval_result, llm):
     
     # If no model was provided or we're forcing to use a specific model
     if llm is None or not hasattr(llm, 'chat'):
-        # Fallback to gpt-4o-mini for better cost efficiency
-        llm = OpenAI(model="gpt-4o-mini")
-        log_structured('info', 'Using gpt-4o-mini model for final answer generation')
+        # Fallback to gpt-4.1-mini for better cost efficiency
+        llm = OpenAI(model="gpt-4.1-mini")
+        log_structured('info', 'Using gpt-4.1-mini model for final answer generation')
     
     prompt = f"""
     Based on the following information from two different sources, please answer this question: {query}
diff --git a/mongodb_utils.py b/mongodb_utils.py
index fa47310..31136b7 100644
--- a/mongodb_utils.py
+++ b/mongodb_utils.py
@@ -327,7 +327,7 @@ def generate_conversation_title(conversation_id: str, content: List[Dict]) -> Op
         
         # Create LLM instance
         llm = LlamaOpenAI(
-            model="chatgpt-4o-latest",
+            model="gpt-4.1",
             temperature=0.3,
         )