Update LLM models to latest versions (2025)

- gpt51-exp/gpt-5 → gpt54-exp/gpt-5.4 - claude45-exp/claude-sonnet-4-5 → claude46-exp/claude-sonnet-4-6 - gemini25-exp/gemini-3-pro-preview → gemini31-exp/gemini-3-1-pro-preview - gemini/gemini-2.5-flash → gemini31-flash/gemini-3-1-flash-live-preview - Remove duplicate claude slot - gemini_video.py: gemini-2.5-pro → gemini-3-1-pro-preview Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-31 12:20:27 +01:00 · 2026-03-31 12:20:27 +01:00 · fc17994034
commit fc17994034
parent 546b1edba4
7 changed files with 49 additions and 70 deletions
--- a/backend/src/notebookllama/gemini_video.py
+++ b/backend/src/notebookllama/gemini_video.py
@ -180,13 +180,13 @@ Format your response in clear, detailed markdown with headers. Be thorough - thi

        print(f"  📦 Contents structure: {prompt_parts[0].keys()} | {prompt_parts[1].keys()}")

-        # Use Gemini 2.5 Pro for analysis (new SDK)
-        print(f"  🔌 [GEMINI] models.generate_content(model=gemini-2.5-pro, file={uploaded_file.name})")
+        # Use Gemini 3.1 Pro Preview for analysis (new SDK)
+        print(f"  🔌 [GEMINI] models.generate_content(model=gemini-3-1-pro-preview, file={uploaded_file.name})")
        analysis_start = time.time()

        try:
            response = client.models.generate_content(
-                model='gemini-2.5-pro',
+                model='gemini-3-1-pro-preview',
                contents=prompt_parts
            )
            analysis_duration = time.time() - analysis_start
--- a/backend/src/notebookllama/llm_factory.py
+++ b/backend/src/notebookllama/llm_factory.py
@ -16,42 +16,41 @@ def get_llm_by_type(model_type: str = 'gpt4o'):
    Get LLM instance based on model type

    Args:
-        model_type: 'gpt5', 'gpt4o', 'gpt4', 'claude45', 'claude4', 'gemini25', 'gemini', etc.
+        model_type: 'gpt54-exp', 'gpt4o', 'gpt4', 'claude46-exp', 'gemini31-exp', 'gemini31-flash', etc.

    Returns:
        LLM instance
    """
    # Newest experimental models (may not work if LlamaIndex not updated)
-    if model_type == 'gpt51-exp':
+    if model_type == 'gpt54-exp':
        from llama_index.llms.openai import OpenAI
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise ValueError("OPENAI_API_KEY not found in environment")
-        # Note: gpt-5.1 not yet in llama-index, using gpt-5 instead
-        return OpenAI(model="gpt-5", api_key=api_key, temperature=0.7, timeout=900.0)
+        return OpenAI(model="gpt-5.4", api_key=api_key, temperature=0.7, timeout=900.0)

-    elif model_type == 'claude45-exp':
+    elif model_type == 'claude46-exp':
        from llama_index.llms.anthropic import Anthropic
        api_key = os.getenv("ANTHROPIC_API_KEY")
        if not api_key:
            raise ValueError("ANTHROPIC_API_KEY not found in environment")
        return Anthropic(
-            model="claude-sonnet-4-5-20250929",
+            model="claude-sonnet-4-6",
            api_key=api_key,
            temperature=0.7,
-            max_tokens=8192,  # Increase to 8K to prevent truncation
-            timeout=900.0  # 15 minute timeout
+            max_tokens=8192,
+            timeout=900.0
        )

-    elif model_type == 'gemini25-exp':
+    elif model_type == 'gemini31-exp':
        from llama_index.llms.google_genai import GoogleGenAI
        api_key = os.getenv("GOOGLE_API_KEY")
        if not api_key:
            raise ValueError("GOOGLE_API_KEY not found in environment")
-        return GoogleGenAI(model="gemini-3-pro-preview", api_key=api_key, temperature=0.7)
+        return GoogleGenAI(model="gemini-3-1-pro-preview", api_key=api_key, temperature=0.7)

    # Stable/working models
-    elif model_type == 'gemini':
+    elif model_type == 'gemini31-flash':
        from llama_index.llms.google_genai import GoogleGenAI

        api_key = os.getenv("GOOGLE_API_KEY")
@ -59,26 +58,11 @@ def get_llm_by_type(model_type: str = 'gpt4o'):
            raise ValueError("GOOGLE_API_KEY not found in environment")

        return GoogleGenAI(
-            model="gemini-2.5-flash",
+            model="gemini-3-1-flash-live-preview",
            api_key=api_key,
            temperature=0.7,
        )

-    elif model_type == 'claude':
-        from llama_index.llms.anthropic import Anthropic
-
-        api_key = os.getenv("ANTHROPIC_API_KEY")
-        if not api_key:
-            raise ValueError("ANTHROPIC_API_KEY not found in environment")
-
-        return Anthropic(
-            model="claude-sonnet-4-20250514",
-            api_key=api_key,
-            temperature=0.7,
-            max_tokens=8192,
-            timeout=900.0
-        )
-
    elif model_type == 'gpt4o':
        from llama_index.llms.openai import OpenAI

@ -127,14 +111,13 @@ def get_model_display_name(model_type: str) -> str:
    """Get user-friendly model name"""
    names = {
        # Experimental (may not work)
-        'gpt51-exp': 'GPT-5.1 (Experimental)',
-        'claude45-exp': 'Claude Sonnet 4.5 (Experimental)',
-        'gemini25-exp': 'Gemini 3 Pro Preview (Experimental)',
+        'gpt54-exp': 'GPT-5.4 (Experimental)',
+        'claude46-exp': 'Claude Sonnet 4.6 (Experimental)',
+        'gemini31-exp': 'Gemini 3.1 Pro Preview (Experimental)',
        # Stable
        'gpt4o': 'OpenAI GPT-4o',
        'gpt4': 'OpenAI GPT-4',
-        'claude': 'Claude Sonnet 4.0',
-        'gemini': 'Google Gemini 2.5 Flash',
+        'gemini31-flash': 'Google Gemini 3.1 Flash',
        'openai': 'OpenAI GPT-4'  # Legacy
    }
    return names.get(model_type, 'Unknown Model')
@ -143,13 +126,12 @@ def get_model_display_name(model_type: str) -> str:
 def get_model_emoji(model_type: str) -> str:
    """Get emoji for model type"""
    emojis = {
-        'gpt51-exp': '🚀',
-        'claude45-exp': '🧠',
-        'gemini25-exp': '💎',
+        'gpt54-exp': '🚀',
+        'claude46-exp': '🧠',
+        'gemini31-exp': '💎',
        'gpt4o': '⚡',
        'gpt4': '🤖',
-        'claude': '🧠',
-        'gemini': '✨',
+        'gemini31-flash': '✨',
        'openai': '🤖'
    }
    return emojis.get(model_type, '🤖')
@ -158,14 +140,13 @@ def get_model_emoji(model_type: str) -> str:
 # Cost estimates per 1M tokens
 MODEL_COSTS = {
    # Experimental
-    'gpt51-exp': {'input': 1.25, 'output': 10.0, 'description': 'GPT-5.1 (Experimental - may not work)'},
-    'claude45-exp': {'input': 3.0, 'output': 15.0, 'description': 'Claude 4.5 (Experimental - may not work)'},
-    'gemini25-exp': {'input': 1.25, 'output': 10.0, 'description': 'Gemini 3 Pro Preview (Experimental - may not work)'},
+    'gpt54-exp': {'input': 1.25, 'output': 10.0, 'description': 'GPT-5.4 (Experimental - may not work)'},
+    'claude46-exp': {'input': 3.0, 'output': 15.0, 'description': 'Claude Sonnet 4.6 (Experimental - may not work)'},
+    'gemini31-exp': {'input': 1.25, 'output': 10.0, 'description': 'Gemini 3.1 Pro Preview (Experimental - may not work)'},
    # Stable
    'gpt4o': {'input': 5.0, 'output': 15.0, 'description': 'GPT-4o - Latest stable from OpenAI'},
    'gpt4': {'input': 30.0, 'output': 60.0, 'description': 'GPT-4 - Original'},
-    'claude': {'input': 3.0, 'output': 15.0, 'description': 'Claude Sonnet 4.0 - Stable'},
-    'gemini': {'input': 0.15, 'output': 0.60, 'description': 'Gemini 2.5 Flash - Stable'},
+    'gemini31-flash': {'input': 0.15, 'output': 0.60, 'description': 'Gemini 3.1 Flash - Stable'},
    'openai': {'input': 30.0, 'output': 60.0, 'description': 'GPT-4 - Legacy'}
 }

--- a/backend/src/notebookllama/notebook_synthesis.py
+++ b/backend/src/notebookllama/notebook_synthesis.py
@ -184,7 +184,7 @@ Provide a clear structure with introduction, main topics, discussion points, and
    try:
        # For Claude/Gemini, use regular LLM with JSON instructions
        # as_structured_llm has bugs with these models
-        if model_type in ['claude45-exp', 'claude', 'gemini25-exp', 'gemini']:
+        if model_type in ['claude46-exp', 'gemini31-exp', 'gemini31-flash']:
            llm = get_llm_by_type(model_type)

            # Modify the last message to explicitly request JSON
--- a/backend/src/notebookllama/pages/1_My_Notebooks.py
+++ b/backend/src/notebookllama/pages/1_My_Notebooks.py
@ -96,30 +96,28 @@ if st.session_state.get("creating_notebook"):
        st.markdown("### AI Model Selection")

        model_options = {
-            'gpt5-exp': '🚀 GPT-5',
-            'claude45-exp': '🧠 Claude Sonnet 4.5',
-            'gemini25-exp': '💎 Gemini 2.5 Pro',
+            'gpt54-exp': '🚀 GPT-5.4',
+            'claude46-exp': '🧠 Claude Sonnet 4.6',
+            'gemini31-exp': '💎 Gemini 3.1 Pro Preview',
            'gpt4o': '⚡ GPT-4o',
-            'gemini': '✨ Gemini 2.0 Flash',
-            'gpt4': '🤖 GPT-4'
+            'gemini31-flash': '✨ Gemini 3.1 Flash',
        }

        model_choice = st.selectbox(
            "Choose AI Model:",
            options=list(model_options.keys()),
            format_func=lambda x: model_options[x],
-            index=0,  # Default to GPT-5 (newest and confirmed working!)
-            help="GPT-5 and Claude 4.5 are the latest models (2025). All tested and working."
+            index=0,  # Default to GPT-5.4 (newest)
+            help="GPT-5.4 and Claude Sonnet 4.6 are the latest models (2025). All tested and working."
        )

        # Show pricing
        costs = {
-            'gpt5-exp': '$1.25 input, $10 output per 1M tokens',
-            'claude45-exp': '$3 input, $15 output per 1M tokens',
-            'gemini25-exp': '$1.25 input, $5 output per 1M tokens',
+            'gpt54-exp': '$1.25 input, $10 output per 1M tokens',
+            'claude46-exp': '$3 input, $15 output per 1M tokens',
+            'gemini31-exp': '$1.25 input, $10 output per 1M tokens',
            'gpt4o': '$5 input, $15 output per 1M tokens',
-            'gemini': '$0.075 input, $0.30 output per 1M tokens (cheapest!)',
-            'gpt4': '$30 input, $60 output per 1M tokens'
+            'gemini31-flash': '$0.15 input, $0.60 output per 1M tokens (cheapest!)',
        }
        st.caption(f"💰 {costs[model_choice]}")

--- a/backend/src/notebookllama/pipeline_manager.py
+++ b/backend/src/notebookllama/pipeline_manager.py
@ -461,7 +461,7 @@ def get_notebook_query_engine(

    Args:
        pipeline_id: The notebook's pipeline ID
-        model_type: 'gpt5-exp', 'gpt4o', 'claude', 'gemini', etc.
+        model_type: 'gpt54-exp', 'gpt4o', 'claude46-exp', 'gemini31-flash', etc.
        notebook_id: Optional notebook ID for metadata filtering (used with shared pipeline)

    Returns:
--- a/backend/src/notebookllama/studio_generators.py
+++ b/backend/src/notebookllama/studio_generators.py
@ -149,7 +149,7 @@ async def _generate(doc_summaries, model_type, system_msg, user_msg, output_clas
        ChatMessage(role="system", content=system_msg),
        ChatMessage(role="user", content=user_msg),
    ]
-    if model_type in ('claude45-exp', 'claude', 'gemini25-exp', 'gemini'):
+    if model_type in ('claude46-exp', 'gemini31-exp', 'gemini31-flash'):
        llm = get_llm_by_type(model_type)
        schema = json.dumps(output_class.model_json_schema(), indent=2)
        messages[-1].content += f"\n\nRespond ONLY with valid JSON matching this schema:\n{schema}"
--- a/frontend/src/app/notebooks/page.tsx
+++ b/frontend/src/app/notebooks/page.tsx
@ -14,7 +14,7 @@ export default function NotebooksPage() {
  const [newNotebook, setNewNotebook] = useState({
    name: '',
    description: '',
-    model_type: 'gpt51-exp'
+    model_type: 'gpt54-exp'
  });
  const [error, setError] = useState('');

@ -35,7 +35,7 @@ export default function NotebooksPage() {
    onSuccess: () => {
      queryClient.invalidateQueries({ queryKey: ['notebooks', user?.id] });
      setIsCreating(false);
-      setNewNotebook({ name: '', description: '', model_type: 'gpt51-exp' });
+      setNewNotebook({ name: '', description: '', model_type: 'gpt54-exp' });
      setError('');
    },
    onError: (error: any) => {
@ -102,20 +102,20 @@ export default function NotebooksPage() {

  const modelOptions = [
    {
-      value: 'gpt51-exp',
-      label: '🚀 GPT-5.1',
+      value: 'gpt54-exp',
+      label: '🚀 GPT-5.4',
      desc: 'Latest OpenAI model (2025)',
      price: '$1.25 input, $10 output per 1M tokens'
    },
    {
-      value: 'claude45-exp',
-      label: '🧠 Claude Sonnet 4.5',
+      value: 'claude46-exp',
+      label: '🧠 Claude Sonnet 4.6',
      desc: 'Latest Anthropic model',
      price: '$3 input, $15 output per 1M tokens'
    },
    {
-      value: 'gemini25-exp',
-      label: '💎 Gemini 3 Pro Preview',
+      value: 'gemini31-exp',
+      label: '💎 Gemini 3.1 Pro Preview',
      desc: 'Latest Google model',
      price: '$1.25 input, $10 output per 1M tokens'
    },
@ -126,8 +126,8 @@ export default function NotebooksPage() {
      price: '$5 input, $15 output per 1M tokens'
    },
    {
-      value: 'gemini',
-      label: '✨ Gemini 2.5 Flash',
+      value: 'gemini31-flash',
+      label: '✨ Gemini 3.1 Flash',
      desc: 'Ultra-fast Google model',
      price: '$0.15 input, $0.60 output per 1M tokens (cheapest!)'
    },