Switch Ollama chat model to gemma4:latest

Gemma 4 loads successfully, supports tool calling with proper structured output, and responds in ~100ms after initial load. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-08 15:11:59 -04:00 · 2026-04-08 15:11:59 -04:00 · d4fa69957e
commit d4fa69957e
parent 49f301f6f4
1 changed files with 1 additions and 1 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -31,7 +31,7 @@ services:
      # Ollama — points to internal GPU server for embeddings + chat fallback
      OLLAMA_HOST: ${OLLAMA_HOST:-http://10.24.42.219:11434}
      OLLAMA_CHAT_HOST: ${OLLAMA_CHAT_HOST:-http://10.24.42.219:11434}
-      OLLAMA_CHAT_MODEL: ${OLLAMA_CHAT_MODEL:-mistral:latest}
+      OLLAMA_CHAT_MODEL: ${OLLAMA_CHAT_MODEL:-gemma4:latest}
      OLLAMA_EMBED_MODEL: ${OLLAMA_EMBED_MODEL:-nomic-embed-text}
      NODE_ENV: production
      AUTH_SECRET: ${AUTH_SECRET}