Make Ollama primary AI provider, Claude as paid fallback

- Ollama (internal GPU server) is tried first — free
- If Ollama is down, falls back to Claude API with a browser toast:
  "Ollama unavailable — using Claude (paid API)"
- Provider badge shows which one is active (orange/purple)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
DJP 2026-04-08 14:06:35 -04:00
parent 6e19c1f046
commit 83ce802264
3 changed files with 33 additions and 16 deletions

View file

@ -490,6 +490,7 @@ export async function POST(req: NextRequest) {
sendEvent(controller, encoder, "message", {
content: finalContent,
provider: response.provider,
fallback: response.fallback || false,
invalidateKeys: [...new Set(allInvalidateKeys)],
...(suggestions && suggestions.length > 0 ? { suggestions } : {}),
...(entities.length > 0 ? { entities } : {}),

View file

@ -2,6 +2,7 @@
import { useState, useCallback, useRef } from "react";
import { useQueryClient } from "@tanstack/react-query";
import { toast } from "sonner";
import { apiUrl } from "@/lib/api-client";
export interface ToolStatus {
@ -263,6 +264,13 @@ export function useChat(context?: ChatContext) {
setProvider(data.provider);
}
// Notify when falling back from Ollama to Claude (paid)
if (data.fallback) {
toast.warning("Ollama unavailable — using Claude (paid API)", {
duration: 5000,
});
}
// Invalidate TanStack Query caches for any mutated data
if (data.invalidateKeys && data.invalidateKeys.length > 0) {
for (const key of data.invalidateKeys) {

View file

@ -27,6 +27,8 @@ export interface ChatResponse {
toolCalls: ToolCall[];
provider: "claude" | "ollama";
stopReason: string;
/** True when this response came from the fallback provider (Ollama down → Claude) */
fallback?: boolean;
}
const SYSTEM_PROMPT = `You are an AI assistant for the HP CG Production Tracker — a tool used by producers to manage CG rendering projects for HP products.
@ -421,29 +423,32 @@ async function chatWithOllama(
// ─── Public API ───────────────────────────────────────────
/**
* Send a chat request tries Claude first, falls back to Ollama.
* Send a chat request tries Ollama first (free), falls back to Claude (paid).
* The `fallback` flag in the response tells the UI to notify the user.
*/
export async function chat(
messages: ChatMessage[],
systemPrompt: string,
toolResults?: { tool_use_id: string; content: string }[]
): Promise<ChatResponse> {
// Try Claude first
if (process.env.ANTHROPIC_API_KEY) {
// Try Ollama first (free / internal GPU server)
if (getOllamaHost()) {
try {
return await chatWithClaude(messages, systemPrompt, toolResults);
} catch (claudeError) {
console.warn("[Chat] Claude failed, trying Ollama fallback:", (claudeError as Error).message);
return await chatWithOllama(messages, systemPrompt, toolResults);
} catch (ollamaError) {
console.warn("[Chat] Ollama failed, trying Claude fallback:", (ollamaError as Error).message);
}
}
// Try Ollama
if (getOllamaHost()) {
return await chatWithOllama(messages, systemPrompt, toolResults);
// Fall back to Claude (paid)
if (process.env.ANTHROPIC_API_KEY) {
const response = await chatWithClaude(messages, systemPrompt, toolResults);
response.fallback = true;
return response;
}
throw new Error(
"No AI provider available. Configure ANTHROPIC_API_KEY for Claude or OLLAMA_CHAT_HOST for Ollama."
"No AI provider available. Configure OLLAMA_CHAT_HOST for Ollama or ANTHROPIC_API_KEY for Claude."
);
}
@ -454,20 +459,23 @@ export async function getProviderStatus(): Promise<{
available: boolean;
activeProvider: "claude" | "ollama" | "none";
ollamaModel?: string;
hasFallback: boolean;
}> {
const claudeOk = await checkClaudeHealth();
if (claudeOk) {
return { available: true, activeProvider: "claude" };
}
const ollamaOk = await checkOllamaHealth();
const claudeOk = !!process.env.ANTHROPIC_API_KEY;
if (ollamaOk) {
return {
available: true,
activeProvider: "ollama",
ollamaModel: getOllamaChatModel(),
hasFallback: claudeOk,
};
}
return { available: false, activeProvider: "none" };
if (claudeOk) {
return { available: true, activeProvider: "claude", hasFallback: false };
}
return { available: false, activeProvider: "none", hasFallback: false };
}