feat: local image provider
This commit is contained in:
parent
c9b60d5f51
commit
c34fb75302
15 changed files with 201 additions and 4 deletions
|
|
@ -100,6 +100,8 @@ services:
|
|||
- WEB_GROUNDING=${WEB_GROUNDING}
|
||||
- DATABASE_URL=${DATABASE_URL}
|
||||
- DISABLE_ANONYMOUS_TRACKING=${DISABLE_ANONYMOUS_TRACKING}
|
||||
- LOCAL_IMAGE_URL=${LOCAL_IMAGE_URL}
|
||||
- LOCAL_IMAGE_MODEL=${LOCAL_IMAGE_MODEL}
|
||||
|
||||
development-gpu:
|
||||
build:
|
||||
|
|
|
|||
|
|
@ -5,3 +5,4 @@ class ImageProvider(Enum):
|
|||
PIXABAY = "pixabay"
|
||||
GEMINI_FLASH = "gemini_flash"
|
||||
DALLE3 = "dall-e-3"
|
||||
LOCAL = "local" # Local image generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)
|
||||
|
|
|
|||
|
|
@ -32,6 +32,10 @@ class UserConfig(BaseModel):
|
|||
PEXELS_API_KEY: Optional[str] = None
|
||||
PIXABAY_API_KEY: Optional[str] = None
|
||||
|
||||
# Local Image Generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)
|
||||
LOCAL_IMAGE_URL: Optional[str] = None
|
||||
LOCAL_IMAGE_MODEL: Optional[str] = None
|
||||
|
||||
# Reasoning
|
||||
TOOL_CALLS: Optional[bool] = None
|
||||
DISABLE_THINKING: Optional[bool] = None
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
import aiohttp
|
||||
from google import genai
|
||||
|
|
@ -9,12 +10,15 @@ from models.sql.image_asset import ImageAsset
|
|||
from utils.download_helpers import download_file
|
||||
from utils.get_env import get_pexels_api_key_env
|
||||
from utils.get_env import get_pixabay_api_key_env
|
||||
from utils.get_env import get_local_image_url_env
|
||||
from utils.get_env import get_local_image_model_env
|
||||
from utils.image_provider import (
|
||||
is_image_generation_disabled,
|
||||
is_pixels_selected,
|
||||
is_pixabay_selected,
|
||||
is_gemini_flash_selected,
|
||||
is_dalle3_selected,
|
||||
is_local_selected,
|
||||
)
|
||||
import uuid
|
||||
|
||||
|
|
@ -37,6 +41,8 @@ class ImageGenerationService:
|
|||
return self.generate_image_google
|
||||
elif is_dalle3_selected():
|
||||
return self.generate_image_openai
|
||||
elif is_local_selected():
|
||||
return self.generate_image_local
|
||||
return None
|
||||
|
||||
def is_stock_provider_selected(self):
|
||||
|
|
@ -137,3 +143,84 @@ class ImageGenerationService:
|
|||
data = await response.json()
|
||||
image_url = data["hits"][0]["largeImageURL"]
|
||||
return image_url
|
||||
|
||||
async def generate_image_local(self, prompt: str, output_directory: str) -> str:
|
||||
"""
|
||||
Generate image using a local image generation server.
|
||||
Supports Automatic1111 WebUI API format (commonly used by many local AI image tools).
|
||||
|
||||
Compatible with:
|
||||
- Automatic1111 (Stable Diffusion WebUI)
|
||||
- Stable Diffusion WebUI Forge
|
||||
- ComfyUI (with API wrapper)
|
||||
- Fooocus (with API mode)
|
||||
- FLUX-based UIs with compatible API
|
||||
- Any server implementing the /sdapi/v1/txt2img endpoint
|
||||
|
||||
Args:
|
||||
prompt: The text prompt for image generation
|
||||
output_directory: Directory to save the generated image
|
||||
|
||||
Returns:
|
||||
Path to the generated image file
|
||||
"""
|
||||
local_url = get_local_image_url_env()
|
||||
local_model = get_local_image_model_env()
|
||||
|
||||
if not local_url:
|
||||
raise ValueError("LOCAL_IMAGE_URL environment variable is not set")
|
||||
|
||||
# Ensure URL doesn't have trailing slash
|
||||
local_url = local_url.rstrip("/")
|
||||
|
||||
# Build the API endpoint URL (Automatic1111 compatible format)
|
||||
api_url = f"{local_url}/sdapi/v1/txt2img"
|
||||
|
||||
# Build the request payload
|
||||
payload = {
|
||||
"prompt": prompt,
|
||||
"negative_prompt": "blurry, bad quality, distorted, ugly, deformed",
|
||||
"steps": 20,
|
||||
"width": 1024,
|
||||
"height": 1024,
|
||||
"cfg_scale": 7,
|
||||
"sampler_name": "Euler a",
|
||||
}
|
||||
|
||||
# Add model override if specified
|
||||
if local_model:
|
||||
payload["override_settings"] = {
|
||||
"sd_model_checkpoint": local_model
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession(trust_env=True) as session:
|
||||
try:
|
||||
response = await session.post(
|
||||
api_url,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=300) # 5 min timeout for generation
|
||||
)
|
||||
|
||||
if response.status != 200:
|
||||
error_text = await response.text()
|
||||
raise Exception(f"Local image API error: {response.status} - {error_text}")
|
||||
|
||||
data = await response.json()
|
||||
|
||||
# API returns images as base64 encoded strings
|
||||
if "images" in data and len(data["images"]) > 0:
|
||||
image_base64 = data["images"][0]
|
||||
|
||||
# Decode base64 and save to file
|
||||
image_data = base64.b64decode(image_base64)
|
||||
image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png")
|
||||
|
||||
with open(image_path, "wb") as f:
|
||||
f.write(image_data)
|
||||
|
||||
return image_path
|
||||
else:
|
||||
raise Exception("No images returned from local image API")
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
raise Exception(f"Failed to connect to local image server at {local_url}: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -99,3 +99,11 @@ def get_extended_reasoning_env():
|
|||
|
||||
def get_web_grounding_env():
|
||||
return os.getenv("WEB_GROUNDING")
|
||||
|
||||
|
||||
def get_local_image_url_env():
|
||||
return os.getenv("LOCAL_IMAGE_URL")
|
||||
|
||||
|
||||
def get_local_image_model_env():
|
||||
return os.getenv("LOCAL_IMAGE_MODEL")
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from utils.get_env import (
|
|||
get_disable_image_generation_env,
|
||||
get_google_api_key_env,
|
||||
get_image_provider_env,
|
||||
get_local_image_url_env,
|
||||
get_openai_api_key_env,
|
||||
get_pexels_api_key_env,
|
||||
get_pixabay_api_key_env,
|
||||
|
|
@ -30,6 +31,10 @@ def is_dalle3_selected() -> bool:
|
|||
return ImageProvider.DALLE3 == get_selected_image_provider()
|
||||
|
||||
|
||||
def is_local_selected() -> bool:
|
||||
return ImageProvider.LOCAL == get_selected_image_provider()
|
||||
|
||||
|
||||
def get_selected_image_provider() -> ImageProvider | None:
|
||||
"""
|
||||
Get the selected image provider from environment variables.
|
||||
|
|
@ -52,5 +57,7 @@ def get_image_provider_api_key() -> str:
|
|||
return get_google_api_key_env()
|
||||
elif selected_image_provider == ImageProvider.DALLE3:
|
||||
return get_openai_api_key_env()
|
||||
elif selected_image_provider == ImageProvider.LOCAL:
|
||||
return get_local_image_url_env() # Returns URL instead of API key
|
||||
else:
|
||||
raise ValueError(f"Invalid image provider: {selected_image_provider}")
|
||||
|
|
|
|||
|
|
@ -87,3 +87,11 @@ def set_extended_reasoning_env(value):
|
|||
|
||||
def set_web_grounding_env(value):
|
||||
os.environ["WEB_GROUNDING"] = value
|
||||
|
||||
|
||||
def set_local_image_url_env(value):
|
||||
os.environ["LOCAL_IMAGE_URL"] = value
|
||||
|
||||
|
||||
def set_local_image_model_env(value):
|
||||
os.environ["LOCAL_IMAGE_MODEL"] = value
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ from utils.get_env import (
|
|||
get_google_api_key_env,
|
||||
get_google_model_env,
|
||||
get_llm_provider_env,
|
||||
get_local_image_model_env,
|
||||
get_local_image_url_env,
|
||||
get_ollama_model_env,
|
||||
get_ollama_url_env,
|
||||
get_openai_api_key_env,
|
||||
|
|
@ -38,6 +40,8 @@ from utils.set_env import (
|
|||
set_google_api_key_env,
|
||||
set_google_model_env,
|
||||
set_llm_provider_env,
|
||||
set_local_image_model_env,
|
||||
set_local_image_url_env,
|
||||
set_ollama_model_env,
|
||||
set_ollama_url_env,
|
||||
set_openai_api_key_env,
|
||||
|
|
@ -85,6 +89,8 @@ def get_user_config():
|
|||
),
|
||||
PIXABAY_API_KEY=existing_config.PIXABAY_API_KEY or get_pixabay_api_key_env(),
|
||||
PEXELS_API_KEY=existing_config.PEXELS_API_KEY or get_pexels_api_key_env(),
|
||||
LOCAL_IMAGE_URL=existing_config.LOCAL_IMAGE_URL or get_local_image_url_env(),
|
||||
LOCAL_IMAGE_MODEL=existing_config.LOCAL_IMAGE_MODEL or get_local_image_model_env(),
|
||||
TOOL_CALLS=(
|
||||
existing_config.TOOL_CALLS
|
||||
if existing_config.TOOL_CALLS is not None
|
||||
|
|
@ -142,6 +148,10 @@ def update_env_with_user_config():
|
|||
set_pixabay_api_key_env(user_config.PIXABAY_API_KEY)
|
||||
if user_config.PEXELS_API_KEY:
|
||||
set_pexels_api_key_env(user_config.PEXELS_API_KEY)
|
||||
if user_config.LOCAL_IMAGE_URL:
|
||||
set_local_image_url_env(user_config.LOCAL_IMAGE_URL)
|
||||
if user_config.LOCAL_IMAGE_MODEL:
|
||||
set_local_image_model_env(user_config.LOCAL_IMAGE_MODEL)
|
||||
if user_config.TOOL_CALLS is not None:
|
||||
set_tool_calls_env(str(user_config.TOOL_CALLS))
|
||||
if user_config.DISABLE_THINKING is not None:
|
||||
|
|
|
|||
|
|
@ -12,10 +12,10 @@ export async function GET() {
|
|||
const raw = fs.readFileSync(userConfigPath, "utf-8");
|
||||
const cfg = JSON.parse(raw || "{}");
|
||||
keyFromFile = cfg?.OPENAI_API_KEY || "";
|
||||
} catch {}
|
||||
} catch { }
|
||||
}
|
||||
|
||||
console.log(keyFromFile);
|
||||
|
||||
|
||||
const keyFromEnv = process.env.OPENAI_API_KEY || "";
|
||||
console.log(keyFromEnv);
|
||||
|
|
|
|||
|
|
@ -64,6 +64,8 @@ export async function POST(request: Request) {
|
|||
userConfig.PIXABAY_API_KEY || existingConfig.PIXABAY_API_KEY,
|
||||
IMAGE_PROVIDER: userConfig.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER,
|
||||
PEXELS_API_KEY: userConfig.PEXELS_API_KEY || existingConfig.PEXELS_API_KEY,
|
||||
LOCAL_IMAGE_URL: userConfig.LOCAL_IMAGE_URL || existingConfig.LOCAL_IMAGE_URL,
|
||||
LOCAL_IMAGE_MODEL: userConfig.LOCAL_IMAGE_MODEL || existingConfig.LOCAL_IMAGE_MODEL,
|
||||
TOOL_CALLS:
|
||||
userConfig.TOOL_CALLS === undefined
|
||||
? existingConfig.TOOL_CALLS
|
||||
|
|
|
|||
|
|
@ -82,10 +82,13 @@ export default function LLMProviderSelection({
|
|||
|
||||
const needsOllamaUrl = (llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL);
|
||||
|
||||
const needsLocalImageUrl = !llmConfig.DISABLE_IMAGE_GENERATION &&
|
||||
llmConfig.IMAGE_PROVIDER === "local" && !llmConfig.LOCAL_IMAGE_URL;
|
||||
|
||||
setButtonState({
|
||||
isLoading: false,
|
||||
isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl,
|
||||
text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : "Save Configuration",
|
||||
isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl || needsLocalImageUrl,
|
||||
text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : needsLocalImageUrl ? "Please Enter Local Server URL" : "Save Configuration",
|
||||
showProgress: false
|
||||
});
|
||||
|
||||
|
|
@ -336,6 +339,54 @@ export default function LLMProviderSelection({
|
|||
return <></>;
|
||||
}
|
||||
|
||||
// Show Local Image Generation configuration
|
||||
if (provider.value === "local") {
|
||||
return (
|
||||
<div className="mb-8 space-y-4">
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
Local Server URL
|
||||
</label>
|
||||
<div className="relative">
|
||||
<input
|
||||
type="text"
|
||||
placeholder="http://192.168.1.7:7860"
|
||||
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
|
||||
value={llmConfig.LOCAL_IMAGE_URL || ""}
|
||||
onChange={(e) => {
|
||||
input_field_changed(e.target.value, "local_image_url");
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
|
||||
<span className="block w-1 h-1 rounded-full bg-gray-400"></span>
|
||||
URL of your local image generation server (Automatic1111, ComfyUI, Fooocus, FLUX, etc.)
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
Model Checkpoint (Optional)
|
||||
</label>
|
||||
<div className="relative">
|
||||
<input
|
||||
type="text"
|
||||
placeholder="e.g., sd_xl_base_1.0.safetensors or flux1-dev.safetensors"
|
||||
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
|
||||
value={llmConfig.LOCAL_IMAGE_MODEL || ""}
|
||||
onChange={(e) => {
|
||||
input_field_changed(e.target.value, "local_image_model");
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
|
||||
<span className="block w-1 h-1 rounded-full bg-gray-400"></span>
|
||||
Leave empty to use the currently loaded model
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Show API key input for other providers
|
||||
return (
|
||||
<div className="mb-8">
|
||||
|
|
|
|||
|
|
@ -28,6 +28,10 @@ export interface LLMConfig {
|
|||
PEXELS_API_KEY?: string;
|
||||
PIXABAY_API_KEY?: string;
|
||||
|
||||
// Local Image Generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)
|
||||
LOCAL_IMAGE_URL?: string;
|
||||
LOCAL_IMAGE_MODEL?: string;
|
||||
|
||||
// Other Configs
|
||||
TOOL_CALLS?: boolean;
|
||||
DISABLE_THINKING?: boolean;
|
||||
|
|
|
|||
|
|
@ -61,6 +61,15 @@ export const IMAGE_PROVIDERS: Record<string, ImageProviderOption> = {
|
|||
apiKeyField: "GOOGLE_API_KEY",
|
||||
apiKeyFieldLabel: "Google API Key"
|
||||
},
|
||||
local: {
|
||||
value: "local",
|
||||
label: "Local Image Generation",
|
||||
description: "Use your local AI image server (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)",
|
||||
icon: "/icons/local.png",
|
||||
requiresApiKey: false,
|
||||
apiKeyField: "LOCAL_IMAGE_URL",
|
||||
apiKeyFieldLabel: "Local Server URL"
|
||||
},
|
||||
};
|
||||
|
||||
export const LLM_PROVIDERS: Record<string, LLMProviderOption> = {
|
||||
|
|
|
|||
|
|
@ -48,6 +48,8 @@ export const updateLLMConfig = (
|
|||
disable_thinking: "DISABLE_THINKING",
|
||||
extended_reasoning: "EXTENDED_REASONING",
|
||||
web_grounding: "WEB_GROUNDING",
|
||||
local_image_url: "LOCAL_IMAGE_URL",
|
||||
local_image_model: "LOCAL_IMAGE_MODEL",
|
||||
};
|
||||
|
||||
const configKey = fieldMappings[field];
|
||||
|
|
|
|||
|
|
@ -73,6 +73,8 @@ export const hasValidLLMConfig = (llmConfig: LLMConfig) => {
|
|||
return llmConfig.OPENAI_API_KEY && llmConfig.OPENAI_API_KEY !== "";
|
||||
case "gemini_flash":
|
||||
return llmConfig.GOOGLE_API_KEY && llmConfig.GOOGLE_API_KEY !== "";
|
||||
case "local":
|
||||
return llmConfig.LOCAL_IMAGE_URL && llmConfig.LOCAL_IMAGE_URL !== "";
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue