feat: local image provider

This commit is contained in:
shiva raj badu 2025-12-15 19:12:34 +05:45
parent c9b60d5f51
commit c34fb75302
No known key found for this signature in database
15 changed files with 201 additions and 4 deletions

View file

@ -100,6 +100,8 @@ services:
- WEB_GROUNDING=${WEB_GROUNDING}
- DATABASE_URL=${DATABASE_URL}
- DISABLE_ANONYMOUS_TRACKING=${DISABLE_ANONYMOUS_TRACKING}
- LOCAL_IMAGE_URL=${LOCAL_IMAGE_URL}
- LOCAL_IMAGE_MODEL=${LOCAL_IMAGE_MODEL}
development-gpu:
build:

View file

@ -5,3 +5,4 @@ class ImageProvider(Enum):
PIXABAY = "pixabay"
GEMINI_FLASH = "gemini_flash"
DALLE3 = "dall-e-3"
LOCAL = "local" # Local image generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)

View file

@ -32,6 +32,10 @@ class UserConfig(BaseModel):
PEXELS_API_KEY: Optional[str] = None
PIXABAY_API_KEY: Optional[str] = None
# Local Image Generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)
LOCAL_IMAGE_URL: Optional[str] = None
LOCAL_IMAGE_MODEL: Optional[str] = None
# Reasoning
TOOL_CALLS: Optional[bool] = None
DISABLE_THINKING: Optional[bool] = None

View file

@ -1,4 +1,5 @@
import asyncio
import base64
import os
import aiohttp
from google import genai
@ -9,12 +10,15 @@ from models.sql.image_asset import ImageAsset
from utils.download_helpers import download_file
from utils.get_env import get_pexels_api_key_env
from utils.get_env import get_pixabay_api_key_env
from utils.get_env import get_local_image_url_env
from utils.get_env import get_local_image_model_env
from utils.image_provider import (
is_image_generation_disabled,
is_pixels_selected,
is_pixabay_selected,
is_gemini_flash_selected,
is_dalle3_selected,
is_local_selected,
)
import uuid
@ -37,6 +41,8 @@ class ImageGenerationService:
return self.generate_image_google
elif is_dalle3_selected():
return self.generate_image_openai
elif is_local_selected():
return self.generate_image_local
return None
def is_stock_provider_selected(self):
@ -137,3 +143,84 @@ class ImageGenerationService:
data = await response.json()
image_url = data["hits"][0]["largeImageURL"]
return image_url
async def generate_image_local(self, prompt: str, output_directory: str) -> str:
"""
Generate image using a local image generation server.
Supports Automatic1111 WebUI API format (commonly used by many local AI image tools).
Compatible with:
- Automatic1111 (Stable Diffusion WebUI)
- Stable Diffusion WebUI Forge
- ComfyUI (with API wrapper)
- Fooocus (with API mode)
- FLUX-based UIs with compatible API
- Any server implementing the /sdapi/v1/txt2img endpoint
Args:
prompt: The text prompt for image generation
output_directory: Directory to save the generated image
Returns:
Path to the generated image file
"""
local_url = get_local_image_url_env()
local_model = get_local_image_model_env()
if not local_url:
raise ValueError("LOCAL_IMAGE_URL environment variable is not set")
# Ensure URL doesn't have trailing slash
local_url = local_url.rstrip("/")
# Build the API endpoint URL (Automatic1111 compatible format)
api_url = f"{local_url}/sdapi/v1/txt2img"
# Build the request payload
payload = {
"prompt": prompt,
"negative_prompt": "blurry, bad quality, distorted, ugly, deformed",
"steps": 20,
"width": 1024,
"height": 1024,
"cfg_scale": 7,
"sampler_name": "Euler a",
}
# Add model override if specified
if local_model:
payload["override_settings"] = {
"sd_model_checkpoint": local_model
}
async with aiohttp.ClientSession(trust_env=True) as session:
try:
response = await session.post(
api_url,
json=payload,
timeout=aiohttp.ClientTimeout(total=300) # 5 min timeout for generation
)
if response.status != 200:
error_text = await response.text()
raise Exception(f"Local image API error: {response.status} - {error_text}")
data = await response.json()
# API returns images as base64 encoded strings
if "images" in data and len(data["images"]) > 0:
image_base64 = data["images"][0]
# Decode base64 and save to file
image_data = base64.b64decode(image_base64)
image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png")
with open(image_path, "wb") as f:
f.write(image_data)
return image_path
else:
raise Exception("No images returned from local image API")
except aiohttp.ClientError as e:
raise Exception(f"Failed to connect to local image server at {local_url}: {str(e)}")

View file

@ -99,3 +99,11 @@ def get_extended_reasoning_env():
def get_web_grounding_env():
return os.getenv("WEB_GROUNDING")
def get_local_image_url_env():
return os.getenv("LOCAL_IMAGE_URL")
def get_local_image_model_env():
return os.getenv("LOCAL_IMAGE_MODEL")

View file

@ -3,6 +3,7 @@ from utils.get_env import (
get_disable_image_generation_env,
get_google_api_key_env,
get_image_provider_env,
get_local_image_url_env,
get_openai_api_key_env,
get_pexels_api_key_env,
get_pixabay_api_key_env,
@ -30,6 +31,10 @@ def is_dalle3_selected() -> bool:
return ImageProvider.DALLE3 == get_selected_image_provider()
def is_local_selected() -> bool:
return ImageProvider.LOCAL == get_selected_image_provider()
def get_selected_image_provider() -> ImageProvider | None:
"""
Get the selected image provider from environment variables.
@ -52,5 +57,7 @@ def get_image_provider_api_key() -> str:
return get_google_api_key_env()
elif selected_image_provider == ImageProvider.DALLE3:
return get_openai_api_key_env()
elif selected_image_provider == ImageProvider.LOCAL:
return get_local_image_url_env() # Returns URL instead of API key
else:
raise ValueError(f"Invalid image provider: {selected_image_provider}")

View file

@ -87,3 +87,11 @@ def set_extended_reasoning_env(value):
def set_web_grounding_env(value):
os.environ["WEB_GROUNDING"] = value
def set_local_image_url_env(value):
os.environ["LOCAL_IMAGE_URL"] = value
def set_local_image_model_env(value):
os.environ["LOCAL_IMAGE_MODEL"] = value

View file

@ -13,6 +13,8 @@ from utils.get_env import (
get_google_api_key_env,
get_google_model_env,
get_llm_provider_env,
get_local_image_model_env,
get_local_image_url_env,
get_ollama_model_env,
get_ollama_url_env,
get_openai_api_key_env,
@ -38,6 +40,8 @@ from utils.set_env import (
set_google_api_key_env,
set_google_model_env,
set_llm_provider_env,
set_local_image_model_env,
set_local_image_url_env,
set_ollama_model_env,
set_ollama_url_env,
set_openai_api_key_env,
@ -85,6 +89,8 @@ def get_user_config():
),
PIXABAY_API_KEY=existing_config.PIXABAY_API_KEY or get_pixabay_api_key_env(),
PEXELS_API_KEY=existing_config.PEXELS_API_KEY or get_pexels_api_key_env(),
LOCAL_IMAGE_URL=existing_config.LOCAL_IMAGE_URL or get_local_image_url_env(),
LOCAL_IMAGE_MODEL=existing_config.LOCAL_IMAGE_MODEL or get_local_image_model_env(),
TOOL_CALLS=(
existing_config.TOOL_CALLS
if existing_config.TOOL_CALLS is not None
@ -142,6 +148,10 @@ def update_env_with_user_config():
set_pixabay_api_key_env(user_config.PIXABAY_API_KEY)
if user_config.PEXELS_API_KEY:
set_pexels_api_key_env(user_config.PEXELS_API_KEY)
if user_config.LOCAL_IMAGE_URL:
set_local_image_url_env(user_config.LOCAL_IMAGE_URL)
if user_config.LOCAL_IMAGE_MODEL:
set_local_image_model_env(user_config.LOCAL_IMAGE_MODEL)
if user_config.TOOL_CALLS is not None:
set_tool_calls_env(str(user_config.TOOL_CALLS))
if user_config.DISABLE_THINKING is not None:

View file

@ -12,10 +12,10 @@ export async function GET() {
const raw = fs.readFileSync(userConfigPath, "utf-8");
const cfg = JSON.parse(raw || "{}");
keyFromFile = cfg?.OPENAI_API_KEY || "";
} catch {}
} catch { }
}
console.log(keyFromFile);
const keyFromEnv = process.env.OPENAI_API_KEY || "";
console.log(keyFromEnv);

View file

@ -64,6 +64,8 @@ export async function POST(request: Request) {
userConfig.PIXABAY_API_KEY || existingConfig.PIXABAY_API_KEY,
IMAGE_PROVIDER: userConfig.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER,
PEXELS_API_KEY: userConfig.PEXELS_API_KEY || existingConfig.PEXELS_API_KEY,
LOCAL_IMAGE_URL: userConfig.LOCAL_IMAGE_URL || existingConfig.LOCAL_IMAGE_URL,
LOCAL_IMAGE_MODEL: userConfig.LOCAL_IMAGE_MODEL || existingConfig.LOCAL_IMAGE_MODEL,
TOOL_CALLS:
userConfig.TOOL_CALLS === undefined
? existingConfig.TOOL_CALLS

View file

@ -82,10 +82,13 @@ export default function LLMProviderSelection({
const needsOllamaUrl = (llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL);
const needsLocalImageUrl = !llmConfig.DISABLE_IMAGE_GENERATION &&
llmConfig.IMAGE_PROVIDER === "local" && !llmConfig.LOCAL_IMAGE_URL;
setButtonState({
isLoading: false,
isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl,
text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : "Save Configuration",
isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl || needsLocalImageUrl,
text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : needsLocalImageUrl ? "Please Enter Local Server URL" : "Save Configuration",
showProgress: false
});
@ -336,6 +339,54 @@ export default function LLMProviderSelection({
return <></>;
}
// Show Local Image Generation configuration
if (provider.value === "local") {
return (
<div className="mb-8 space-y-4">
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
Local Server URL
</label>
<div className="relative">
<input
type="text"
placeholder="http://192.168.1.7:7860"
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
value={llmConfig.LOCAL_IMAGE_URL || ""}
onChange={(e) => {
input_field_changed(e.target.value, "local_image_url");
}}
/>
</div>
<p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
<span className="block w-1 h-1 rounded-full bg-gray-400"></span>
URL of your local image generation server (Automatic1111, ComfyUI, Fooocus, FLUX, etc.)
</p>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
Model Checkpoint (Optional)
</label>
<div className="relative">
<input
type="text"
placeholder="e.g., sd_xl_base_1.0.safetensors or flux1-dev.safetensors"
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
value={llmConfig.LOCAL_IMAGE_MODEL || ""}
onChange={(e) => {
input_field_changed(e.target.value, "local_image_model");
}}
/>
</div>
<p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
<span className="block w-1 h-1 rounded-full bg-gray-400"></span>
Leave empty to use the currently loaded model
</p>
</div>
</div>
);
}
// Show API key input for other providers
return (
<div className="mb-8">

View file

@ -28,6 +28,10 @@ export interface LLMConfig {
PEXELS_API_KEY?: string;
PIXABAY_API_KEY?: string;
// Local Image Generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)
LOCAL_IMAGE_URL?: string;
LOCAL_IMAGE_MODEL?: string;
// Other Configs
TOOL_CALLS?: boolean;
DISABLE_THINKING?: boolean;

View file

@ -61,6 +61,15 @@ export const IMAGE_PROVIDERS: Record<string, ImageProviderOption> = {
apiKeyField: "GOOGLE_API_KEY",
apiKeyFieldLabel: "Google API Key"
},
local: {
value: "local",
label: "Local Image Generation",
description: "Use your local AI image server (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)",
icon: "/icons/local.png",
requiresApiKey: false,
apiKeyField: "LOCAL_IMAGE_URL",
apiKeyFieldLabel: "Local Server URL"
},
};
export const LLM_PROVIDERS: Record<string, LLMProviderOption> = {

View file

@ -48,6 +48,8 @@ export const updateLLMConfig = (
disable_thinking: "DISABLE_THINKING",
extended_reasoning: "EXTENDED_REASONING",
web_grounding: "WEB_GROUNDING",
local_image_url: "LOCAL_IMAGE_URL",
local_image_model: "LOCAL_IMAGE_MODEL",
};
const configKey = fieldMappings[field];

View file

@ -73,6 +73,8 @@ export const hasValidLLMConfig = (llmConfig: LLMConfig) => {
return llmConfig.OPENAI_API_KEY && llmConfig.OPENAI_API_KEY !== "";
case "gemini_flash":
return llmConfig.GOOGLE_API_KEY && llmConfig.GOOGLE_API_KEY !== "";
case "local":
return llmConfig.LOCAL_IMAGE_URL && llmConfig.LOCAL_IMAGE_URL !== "";
default:
return false;
}