Switch Ollama chat model to gemma4:latest

Gemma 4 loads successfully, supports tool calling with proper
structured output, and responds in ~100ms after initial load.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
DJP 2026-04-08 15:11:59 -04:00
parent 49f301f6f4
commit d4fa69957e

View file

@ -31,7 +31,7 @@ services:
# Ollama — points to internal GPU server for embeddings + chat fallback
OLLAMA_HOST: ${OLLAMA_HOST:-http://10.24.42.219:11434}
OLLAMA_CHAT_HOST: ${OLLAMA_CHAT_HOST:-http://10.24.42.219:11434}
OLLAMA_CHAT_MODEL: ${OLLAMA_CHAT_MODEL:-mistral:latest}
OLLAMA_CHAT_MODEL: ${OLLAMA_CHAT_MODEL:-gemma4:latest}
OLLAMA_EMBED_MODEL: ${OLLAMA_EMBED_MODEL:-nomic-embed-text}
NODE_ENV: production
AUTH_SECRET: ${AUTH_SECRET}