From c34fb75302934590b949cc89846bacea3e475bed Mon Sep 17 00:00:00 2001 From: shiva raj badu Date: Mon, 15 Dec 2025 19:12:34 +0545 Subject: [PATCH] feat: local image provider --- docker-compose.yml | 2 + servers/fastapi/enums/image_provider.py | 1 + servers/fastapi/models/user_config.py | 4 + .../services/image_generation_service.py | 87 +++++++++++++++++++ servers/fastapi/utils/get_env.py | 8 ++ servers/fastapi/utils/image_provider.py | 7 ++ servers/fastapi/utils/set_env.py | 8 ++ servers/fastapi/utils/user_config.py | 10 +++ .../nextjs/app/api/has-required-key/route.ts | 4 +- servers/nextjs/app/api/user-config/route.ts | 2 + servers/nextjs/components/LLMSelection.tsx | 55 +++++++++++- servers/nextjs/types/llm_config.ts | 4 + servers/nextjs/utils/providerConstants.ts | 9 ++ servers/nextjs/utils/providerUtils.ts | 2 + servers/nextjs/utils/storeHelpers.ts | 2 + 15 files changed, 201 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 39a24c97..d20cf3eb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -100,6 +100,8 @@ services: - WEB_GROUNDING=${WEB_GROUNDING} - DATABASE_URL=${DATABASE_URL} - DISABLE_ANONYMOUS_TRACKING=${DISABLE_ANONYMOUS_TRACKING} + - LOCAL_IMAGE_URL=${LOCAL_IMAGE_URL} + - LOCAL_IMAGE_MODEL=${LOCAL_IMAGE_MODEL} development-gpu: build: diff --git a/servers/fastapi/enums/image_provider.py b/servers/fastapi/enums/image_provider.py index 2c7b3bb2..37c20195 100644 --- a/servers/fastapi/enums/image_provider.py +++ b/servers/fastapi/enums/image_provider.py @@ -5,3 +5,4 @@ class ImageProvider(Enum): PIXABAY = "pixabay" GEMINI_FLASH = "gemini_flash" DALLE3 = "dall-e-3" + LOCAL = "local" # Local image generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.) diff --git a/servers/fastapi/models/user_config.py b/servers/fastapi/models/user_config.py index 99ca7e5e..506dee66 100644 --- a/servers/fastapi/models/user_config.py +++ b/servers/fastapi/models/user_config.py @@ -32,6 +32,10 @@ class UserConfig(BaseModel): PEXELS_API_KEY: Optional[str] = None PIXABAY_API_KEY: Optional[str] = None + # Local Image Generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.) + LOCAL_IMAGE_URL: Optional[str] = None + LOCAL_IMAGE_MODEL: Optional[str] = None + # Reasoning TOOL_CALLS: Optional[bool] = None DISABLE_THINKING: Optional[bool] = None diff --git a/servers/fastapi/services/image_generation_service.py b/servers/fastapi/services/image_generation_service.py index a94b8b08..1cd33be5 100644 --- a/servers/fastapi/services/image_generation_service.py +++ b/servers/fastapi/services/image_generation_service.py @@ -1,4 +1,5 @@ import asyncio +import base64 import os import aiohttp from google import genai @@ -9,12 +10,15 @@ from models.sql.image_asset import ImageAsset from utils.download_helpers import download_file from utils.get_env import get_pexels_api_key_env from utils.get_env import get_pixabay_api_key_env +from utils.get_env import get_local_image_url_env +from utils.get_env import get_local_image_model_env from utils.image_provider import ( is_image_generation_disabled, is_pixels_selected, is_pixabay_selected, is_gemini_flash_selected, is_dalle3_selected, + is_local_selected, ) import uuid @@ -37,6 +41,8 @@ class ImageGenerationService: return self.generate_image_google elif is_dalle3_selected(): return self.generate_image_openai + elif is_local_selected(): + return self.generate_image_local return None def is_stock_provider_selected(self): @@ -137,3 +143,84 @@ class ImageGenerationService: data = await response.json() image_url = data["hits"][0]["largeImageURL"] return image_url + + async def generate_image_local(self, prompt: str, output_directory: str) -> str: + """ + Generate image using a local image generation server. + Supports Automatic1111 WebUI API format (commonly used by many local AI image tools). + + Compatible with: + - Automatic1111 (Stable Diffusion WebUI) + - Stable Diffusion WebUI Forge + - ComfyUI (with API wrapper) + - Fooocus (with API mode) + - FLUX-based UIs with compatible API + - Any server implementing the /sdapi/v1/txt2img endpoint + + Args: + prompt: The text prompt for image generation + output_directory: Directory to save the generated image + + Returns: + Path to the generated image file + """ + local_url = get_local_image_url_env() + local_model = get_local_image_model_env() + + if not local_url: + raise ValueError("LOCAL_IMAGE_URL environment variable is not set") + + # Ensure URL doesn't have trailing slash + local_url = local_url.rstrip("/") + + # Build the API endpoint URL (Automatic1111 compatible format) + api_url = f"{local_url}/sdapi/v1/txt2img" + + # Build the request payload + payload = { + "prompt": prompt, + "negative_prompt": "blurry, bad quality, distorted, ugly, deformed", + "steps": 20, + "width": 1024, + "height": 1024, + "cfg_scale": 7, + "sampler_name": "Euler a", + } + + # Add model override if specified + if local_model: + payload["override_settings"] = { + "sd_model_checkpoint": local_model + } + + async with aiohttp.ClientSession(trust_env=True) as session: + try: + response = await session.post( + api_url, + json=payload, + timeout=aiohttp.ClientTimeout(total=300) # 5 min timeout for generation + ) + + if response.status != 200: + error_text = await response.text() + raise Exception(f"Local image API error: {response.status} - {error_text}") + + data = await response.json() + + # API returns images as base64 encoded strings + if "images" in data and len(data["images"]) > 0: + image_base64 = data["images"][0] + + # Decode base64 and save to file + image_data = base64.b64decode(image_base64) + image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png") + + with open(image_path, "wb") as f: + f.write(image_data) + + return image_path + else: + raise Exception("No images returned from local image API") + + except aiohttp.ClientError as e: + raise Exception(f"Failed to connect to local image server at {local_url}: {str(e)}") diff --git a/servers/fastapi/utils/get_env.py b/servers/fastapi/utils/get_env.py index 0d98e8c5..88c19f25 100644 --- a/servers/fastapi/utils/get_env.py +++ b/servers/fastapi/utils/get_env.py @@ -99,3 +99,11 @@ def get_extended_reasoning_env(): def get_web_grounding_env(): return os.getenv("WEB_GROUNDING") + + +def get_local_image_url_env(): + return os.getenv("LOCAL_IMAGE_URL") + + +def get_local_image_model_env(): + return os.getenv("LOCAL_IMAGE_MODEL") diff --git a/servers/fastapi/utils/image_provider.py b/servers/fastapi/utils/image_provider.py index 42a87519..43b270fe 100644 --- a/servers/fastapi/utils/image_provider.py +++ b/servers/fastapi/utils/image_provider.py @@ -3,6 +3,7 @@ from utils.get_env import ( get_disable_image_generation_env, get_google_api_key_env, get_image_provider_env, + get_local_image_url_env, get_openai_api_key_env, get_pexels_api_key_env, get_pixabay_api_key_env, @@ -30,6 +31,10 @@ def is_dalle3_selected() -> bool: return ImageProvider.DALLE3 == get_selected_image_provider() +def is_local_selected() -> bool: + return ImageProvider.LOCAL == get_selected_image_provider() + + def get_selected_image_provider() -> ImageProvider | None: """ Get the selected image provider from environment variables. @@ -52,5 +57,7 @@ def get_image_provider_api_key() -> str: return get_google_api_key_env() elif selected_image_provider == ImageProvider.DALLE3: return get_openai_api_key_env() + elif selected_image_provider == ImageProvider.LOCAL: + return get_local_image_url_env() # Returns URL instead of API key else: raise ValueError(f"Invalid image provider: {selected_image_provider}") diff --git a/servers/fastapi/utils/set_env.py b/servers/fastapi/utils/set_env.py index bd94e4c9..365688a5 100644 --- a/servers/fastapi/utils/set_env.py +++ b/servers/fastapi/utils/set_env.py @@ -87,3 +87,11 @@ def set_extended_reasoning_env(value): def set_web_grounding_env(value): os.environ["WEB_GROUNDING"] = value + + +def set_local_image_url_env(value): + os.environ["LOCAL_IMAGE_URL"] = value + + +def set_local_image_model_env(value): + os.environ["LOCAL_IMAGE_MODEL"] = value diff --git a/servers/fastapi/utils/user_config.py b/servers/fastapi/utils/user_config.py index ca60d44a..f718b374 100644 --- a/servers/fastapi/utils/user_config.py +++ b/servers/fastapi/utils/user_config.py @@ -13,6 +13,8 @@ from utils.get_env import ( get_google_api_key_env, get_google_model_env, get_llm_provider_env, + get_local_image_model_env, + get_local_image_url_env, get_ollama_model_env, get_ollama_url_env, get_openai_api_key_env, @@ -38,6 +40,8 @@ from utils.set_env import ( set_google_api_key_env, set_google_model_env, set_llm_provider_env, + set_local_image_model_env, + set_local_image_url_env, set_ollama_model_env, set_ollama_url_env, set_openai_api_key_env, @@ -85,6 +89,8 @@ def get_user_config(): ), PIXABAY_API_KEY=existing_config.PIXABAY_API_KEY or get_pixabay_api_key_env(), PEXELS_API_KEY=existing_config.PEXELS_API_KEY or get_pexels_api_key_env(), + LOCAL_IMAGE_URL=existing_config.LOCAL_IMAGE_URL or get_local_image_url_env(), + LOCAL_IMAGE_MODEL=existing_config.LOCAL_IMAGE_MODEL or get_local_image_model_env(), TOOL_CALLS=( existing_config.TOOL_CALLS if existing_config.TOOL_CALLS is not None @@ -142,6 +148,10 @@ def update_env_with_user_config(): set_pixabay_api_key_env(user_config.PIXABAY_API_KEY) if user_config.PEXELS_API_KEY: set_pexels_api_key_env(user_config.PEXELS_API_KEY) + if user_config.LOCAL_IMAGE_URL: + set_local_image_url_env(user_config.LOCAL_IMAGE_URL) + if user_config.LOCAL_IMAGE_MODEL: + set_local_image_model_env(user_config.LOCAL_IMAGE_MODEL) if user_config.TOOL_CALLS is not None: set_tool_calls_env(str(user_config.TOOL_CALLS)) if user_config.DISABLE_THINKING is not None: diff --git a/servers/nextjs/app/api/has-required-key/route.ts b/servers/nextjs/app/api/has-required-key/route.ts index 05efdbe8..3248e68d 100644 --- a/servers/nextjs/app/api/has-required-key/route.ts +++ b/servers/nextjs/app/api/has-required-key/route.ts @@ -12,10 +12,10 @@ export async function GET() { const raw = fs.readFileSync(userConfigPath, "utf-8"); const cfg = JSON.parse(raw || "{}"); keyFromFile = cfg?.OPENAI_API_KEY || ""; - } catch {} + } catch { } } - console.log(keyFromFile); + const keyFromEnv = process.env.OPENAI_API_KEY || ""; console.log(keyFromEnv); diff --git a/servers/nextjs/app/api/user-config/route.ts b/servers/nextjs/app/api/user-config/route.ts index 828a8ff0..8edbc040 100644 --- a/servers/nextjs/app/api/user-config/route.ts +++ b/servers/nextjs/app/api/user-config/route.ts @@ -64,6 +64,8 @@ export async function POST(request: Request) { userConfig.PIXABAY_API_KEY || existingConfig.PIXABAY_API_KEY, IMAGE_PROVIDER: userConfig.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER, PEXELS_API_KEY: userConfig.PEXELS_API_KEY || existingConfig.PEXELS_API_KEY, + LOCAL_IMAGE_URL: userConfig.LOCAL_IMAGE_URL || existingConfig.LOCAL_IMAGE_URL, + LOCAL_IMAGE_MODEL: userConfig.LOCAL_IMAGE_MODEL || existingConfig.LOCAL_IMAGE_MODEL, TOOL_CALLS: userConfig.TOOL_CALLS === undefined ? existingConfig.TOOL_CALLS diff --git a/servers/nextjs/components/LLMSelection.tsx b/servers/nextjs/components/LLMSelection.tsx index 9bd13ccf..e77d333c 100644 --- a/servers/nextjs/components/LLMSelection.tsx +++ b/servers/nextjs/components/LLMSelection.tsx @@ -82,10 +82,13 @@ export default function LLMProviderSelection({ const needsOllamaUrl = (llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL); + const needsLocalImageUrl = !llmConfig.DISABLE_IMAGE_GENERATION && + llmConfig.IMAGE_PROVIDER === "local" && !llmConfig.LOCAL_IMAGE_URL; + setButtonState({ isLoading: false, - isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl, - text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : "Save Configuration", + isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl || needsLocalImageUrl, + text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : needsLocalImageUrl ? "Please Enter Local Server URL" : "Save Configuration", showProgress: false }); @@ -336,6 +339,54 @@ export default function LLMProviderSelection({ return <>; } + // Show Local Image Generation configuration + if (provider.value === "local") { + return ( +
+
+ +
+ { + input_field_changed(e.target.value, "local_image_url"); + }} + /> +
+

+ + URL of your local image generation server (Automatic1111, ComfyUI, Fooocus, FLUX, etc.) +

+
+
+ +
+ { + input_field_changed(e.target.value, "local_image_model"); + }} + /> +
+

+ + Leave empty to use the currently loaded model +

+
+
+ ); + } + // Show API key input for other providers return (
diff --git a/servers/nextjs/types/llm_config.ts b/servers/nextjs/types/llm_config.ts index 7250e74f..985120a0 100644 --- a/servers/nextjs/types/llm_config.ts +++ b/servers/nextjs/types/llm_config.ts @@ -28,6 +28,10 @@ export interface LLMConfig { PEXELS_API_KEY?: string; PIXABAY_API_KEY?: string; + // Local Image Generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.) + LOCAL_IMAGE_URL?: string; + LOCAL_IMAGE_MODEL?: string; + // Other Configs TOOL_CALLS?: boolean; DISABLE_THINKING?: boolean; diff --git a/servers/nextjs/utils/providerConstants.ts b/servers/nextjs/utils/providerConstants.ts index 0dd53af9..d7b18003 100644 --- a/servers/nextjs/utils/providerConstants.ts +++ b/servers/nextjs/utils/providerConstants.ts @@ -61,6 +61,15 @@ export const IMAGE_PROVIDERS: Record = { apiKeyField: "GOOGLE_API_KEY", apiKeyFieldLabel: "Google API Key" }, + local: { + value: "local", + label: "Local Image Generation", + description: "Use your local AI image server (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)", + icon: "/icons/local.png", + requiresApiKey: false, + apiKeyField: "LOCAL_IMAGE_URL", + apiKeyFieldLabel: "Local Server URL" + }, }; export const LLM_PROVIDERS: Record = { diff --git a/servers/nextjs/utils/providerUtils.ts b/servers/nextjs/utils/providerUtils.ts index 15a44180..6cb131c0 100644 --- a/servers/nextjs/utils/providerUtils.ts +++ b/servers/nextjs/utils/providerUtils.ts @@ -48,6 +48,8 @@ export const updateLLMConfig = ( disable_thinking: "DISABLE_THINKING", extended_reasoning: "EXTENDED_REASONING", web_grounding: "WEB_GROUNDING", + local_image_url: "LOCAL_IMAGE_URL", + local_image_model: "LOCAL_IMAGE_MODEL", }; const configKey = fieldMappings[field]; diff --git a/servers/nextjs/utils/storeHelpers.ts b/servers/nextjs/utils/storeHelpers.ts index 71c7abbf..df53a7ac 100644 --- a/servers/nextjs/utils/storeHelpers.ts +++ b/servers/nextjs/utils/storeHelpers.ts @@ -73,6 +73,8 @@ export const hasValidLLMConfig = (llmConfig: LLMConfig) => { return llmConfig.OPENAI_API_KEY && llmConfig.OPENAI_API_KEY !== ""; case "gemini_flash": return llmConfig.GOOGLE_API_KEY && llmConfig.GOOGLE_API_KEY !== ""; + case "local": + return llmConfig.LOCAL_IMAGE_URL && llmConfig.LOCAL_IMAGE_URL !== ""; default: return false; }