From 83ce802264a2d024b096cd8a43d90a0609d79f9b Mon Sep 17 00:00:00 2001 From: DJP Date: Wed, 8 Apr 2026 14:06:35 -0400 Subject: [PATCH] Make Ollama primary AI provider, Claude as paid fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Ollama (internal GPU server) is tried first — free - If Ollama is down, falls back to Claude API with a browser toast: "Ollama unavailable — using Claude (paid API)" - Provider badge shows which one is active (orange/purple) Co-Authored-By: Claude Opus 4.6 --- src/app/api/chat/route.ts | 1 + src/hooks/use-chat.ts | 8 ++++++++ src/lib/chat/provider.ts | 40 +++++++++++++++++++++++---------------- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts index ca6d20d..f338f93 100644 --- a/src/app/api/chat/route.ts +++ b/src/app/api/chat/route.ts @@ -490,6 +490,7 @@ export async function POST(req: NextRequest) { sendEvent(controller, encoder, "message", { content: finalContent, provider: response.provider, + fallback: response.fallback || false, invalidateKeys: [...new Set(allInvalidateKeys)], ...(suggestions && suggestions.length > 0 ? { suggestions } : {}), ...(entities.length > 0 ? { entities } : {}), diff --git a/src/hooks/use-chat.ts b/src/hooks/use-chat.ts index 4d70d93..5476f6d 100644 --- a/src/hooks/use-chat.ts +++ b/src/hooks/use-chat.ts @@ -2,6 +2,7 @@ import { useState, useCallback, useRef } from "react"; import { useQueryClient } from "@tanstack/react-query"; +import { toast } from "sonner"; import { apiUrl } from "@/lib/api-client"; export interface ToolStatus { @@ -263,6 +264,13 @@ export function useChat(context?: ChatContext) { setProvider(data.provider); } + // Notify when falling back from Ollama to Claude (paid) + if (data.fallback) { + toast.warning("Ollama unavailable — using Claude (paid API)", { + duration: 5000, + }); + } + // Invalidate TanStack Query caches for any mutated data if (data.invalidateKeys && data.invalidateKeys.length > 0) { for (const key of data.invalidateKeys) { diff --git a/src/lib/chat/provider.ts b/src/lib/chat/provider.ts index 695ef4f..6863b4a 100644 --- a/src/lib/chat/provider.ts +++ b/src/lib/chat/provider.ts @@ -27,6 +27,8 @@ export interface ChatResponse { toolCalls: ToolCall[]; provider: "claude" | "ollama"; stopReason: string; + /** True when this response came from the fallback provider (Ollama down → Claude) */ + fallback?: boolean; } const SYSTEM_PROMPT = `You are an AI assistant for the HP CG Production Tracker — a tool used by producers to manage CG rendering projects for HP products. @@ -421,29 +423,32 @@ async function chatWithOllama( // ─── Public API ─────────────────────────────────────────── /** - * Send a chat request — tries Claude first, falls back to Ollama. + * Send a chat request — tries Ollama first (free), falls back to Claude (paid). + * The `fallback` flag in the response tells the UI to notify the user. */ export async function chat( messages: ChatMessage[], systemPrompt: string, toolResults?: { tool_use_id: string; content: string }[] ): Promise { - // Try Claude first - if (process.env.ANTHROPIC_API_KEY) { + // Try Ollama first (free / internal GPU server) + if (getOllamaHost()) { try { - return await chatWithClaude(messages, systemPrompt, toolResults); - } catch (claudeError) { - console.warn("[Chat] Claude failed, trying Ollama fallback:", (claudeError as Error).message); + return await chatWithOllama(messages, systemPrompt, toolResults); + } catch (ollamaError) { + console.warn("[Chat] Ollama failed, trying Claude fallback:", (ollamaError as Error).message); } } - // Try Ollama - if (getOllamaHost()) { - return await chatWithOllama(messages, systemPrompt, toolResults); + // Fall back to Claude (paid) + if (process.env.ANTHROPIC_API_KEY) { + const response = await chatWithClaude(messages, systemPrompt, toolResults); + response.fallback = true; + return response; } throw new Error( - "No AI provider available. Configure ANTHROPIC_API_KEY for Claude or OLLAMA_CHAT_HOST for Ollama." + "No AI provider available. Configure OLLAMA_CHAT_HOST for Ollama or ANTHROPIC_API_KEY for Claude." ); } @@ -454,20 +459,23 @@ export async function getProviderStatus(): Promise<{ available: boolean; activeProvider: "claude" | "ollama" | "none"; ollamaModel?: string; + hasFallback: boolean; }> { - const claudeOk = await checkClaudeHealth(); - if (claudeOk) { - return { available: true, activeProvider: "claude" }; - } - const ollamaOk = await checkOllamaHealth(); + const claudeOk = !!process.env.ANTHROPIC_API_KEY; + if (ollamaOk) { return { available: true, activeProvider: "ollama", ollamaModel: getOllamaChatModel(), + hasFallback: claudeOk, }; } - return { available: false, activeProvider: "none" }; + if (claudeOk) { + return { available: true, activeProvider: "claude", hasFallback: false }; + } + + return { available: false, activeProvider: "none", hasFallback: false }; }