Make Ollama primary AI provider, Claude as paid fallback
- Ollama (internal GPU server) is tried first — free - If Ollama is down, falls back to Claude API with a browser toast: "Ollama unavailable — using Claude (paid API)" - Provider badge shows which one is active (orange/purple) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
6e19c1f046
commit
83ce802264
3 changed files with 33 additions and 16 deletions
|
|
@ -490,6 +490,7 @@ export async function POST(req: NextRequest) {
|
|||
sendEvent(controller, encoder, "message", {
|
||||
content: finalContent,
|
||||
provider: response.provider,
|
||||
fallback: response.fallback || false,
|
||||
invalidateKeys: [...new Set(allInvalidateKeys)],
|
||||
...(suggestions && suggestions.length > 0 ? { suggestions } : {}),
|
||||
...(entities.length > 0 ? { entities } : {}),
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import { useState, useCallback, useRef } from "react";
|
||||
import { useQueryClient } from "@tanstack/react-query";
|
||||
import { toast } from "sonner";
|
||||
import { apiUrl } from "@/lib/api-client";
|
||||
|
||||
export interface ToolStatus {
|
||||
|
|
@ -263,6 +264,13 @@ export function useChat(context?: ChatContext) {
|
|||
setProvider(data.provider);
|
||||
}
|
||||
|
||||
// Notify when falling back from Ollama to Claude (paid)
|
||||
if (data.fallback) {
|
||||
toast.warning("Ollama unavailable — using Claude (paid API)", {
|
||||
duration: 5000,
|
||||
});
|
||||
}
|
||||
|
||||
// Invalidate TanStack Query caches for any mutated data
|
||||
if (data.invalidateKeys && data.invalidateKeys.length > 0) {
|
||||
for (const key of data.invalidateKeys) {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@ export interface ChatResponse {
|
|||
toolCalls: ToolCall[];
|
||||
provider: "claude" | "ollama";
|
||||
stopReason: string;
|
||||
/** True when this response came from the fallback provider (Ollama down → Claude) */
|
||||
fallback?: boolean;
|
||||
}
|
||||
|
||||
const SYSTEM_PROMPT = `You are an AI assistant for the HP CG Production Tracker — a tool used by producers to manage CG rendering projects for HP products.
|
||||
|
|
@ -421,29 +423,32 @@ async function chatWithOllama(
|
|||
// ─── Public API ───────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Send a chat request — tries Claude first, falls back to Ollama.
|
||||
* Send a chat request — tries Ollama first (free), falls back to Claude (paid).
|
||||
* The `fallback` flag in the response tells the UI to notify the user.
|
||||
*/
|
||||
export async function chat(
|
||||
messages: ChatMessage[],
|
||||
systemPrompt: string,
|
||||
toolResults?: { tool_use_id: string; content: string }[]
|
||||
): Promise<ChatResponse> {
|
||||
// Try Claude first
|
||||
if (process.env.ANTHROPIC_API_KEY) {
|
||||
// Try Ollama first (free / internal GPU server)
|
||||
if (getOllamaHost()) {
|
||||
try {
|
||||
return await chatWithClaude(messages, systemPrompt, toolResults);
|
||||
} catch (claudeError) {
|
||||
console.warn("[Chat] Claude failed, trying Ollama fallback:", (claudeError as Error).message);
|
||||
return await chatWithOllama(messages, systemPrompt, toolResults);
|
||||
} catch (ollamaError) {
|
||||
console.warn("[Chat] Ollama failed, trying Claude fallback:", (ollamaError as Error).message);
|
||||
}
|
||||
}
|
||||
|
||||
// Try Ollama
|
||||
if (getOllamaHost()) {
|
||||
return await chatWithOllama(messages, systemPrompt, toolResults);
|
||||
// Fall back to Claude (paid)
|
||||
if (process.env.ANTHROPIC_API_KEY) {
|
||||
const response = await chatWithClaude(messages, systemPrompt, toolResults);
|
||||
response.fallback = true;
|
||||
return response;
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
"No AI provider available. Configure ANTHROPIC_API_KEY for Claude or OLLAMA_CHAT_HOST for Ollama."
|
||||
"No AI provider available. Configure OLLAMA_CHAT_HOST for Ollama or ANTHROPIC_API_KEY for Claude."
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -454,20 +459,23 @@ export async function getProviderStatus(): Promise<{
|
|||
available: boolean;
|
||||
activeProvider: "claude" | "ollama" | "none";
|
||||
ollamaModel?: string;
|
||||
hasFallback: boolean;
|
||||
}> {
|
||||
const claudeOk = await checkClaudeHealth();
|
||||
if (claudeOk) {
|
||||
return { available: true, activeProvider: "claude" };
|
||||
}
|
||||
|
||||
const ollamaOk = await checkOllamaHealth();
|
||||
const claudeOk = !!process.env.ANTHROPIC_API_KEY;
|
||||
|
||||
if (ollamaOk) {
|
||||
return {
|
||||
available: true,
|
||||
activeProvider: "ollama",
|
||||
ollamaModel: getOllamaChatModel(),
|
||||
hasFallback: claudeOk,
|
||||
};
|
||||
}
|
||||
|
||||
return { available: false, activeProvider: "none" };
|
||||
if (claudeOk) {
|
||||
return { available: true, activeProvider: "claude", hasFallback: false };
|
||||
}
|
||||
|
||||
return { available: false, activeProvider: "none", hasFallback: false };
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue