diff --git a/docker-compose.yml b/docker-compose.yml index 39a24c97..23d54b89 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -100,6 +100,8 @@ services: - WEB_GROUNDING=${WEB_GROUNDING} - DATABASE_URL=${DATABASE_URL} - DISABLE_ANONYMOUS_TRACKING=${DISABLE_ANONYMOUS_TRACKING} + - LOCAL_IMAGE_URL=${LOCAL_IMAGE_URL} + - LOCAL_IMAGE_WORKFLOW=${LOCAL_IMAGE_WORKFLOW} development-gpu: build: diff --git a/servers/fastapi/enums/image_provider.py b/servers/fastapi/enums/image_provider.py index 2c7b3bb2..37c20195 100644 --- a/servers/fastapi/enums/image_provider.py +++ b/servers/fastapi/enums/image_provider.py @@ -5,3 +5,4 @@ class ImageProvider(Enum): PIXABAY = "pixabay" GEMINI_FLASH = "gemini_flash" DALLE3 = "dall-e-3" + LOCAL = "local" # Local image generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.) diff --git a/servers/fastapi/models/user_config.py b/servers/fastapi/models/user_config.py index 99ca7e5e..2e0e0a48 100644 --- a/servers/fastapi/models/user_config.py +++ b/servers/fastapi/models/user_config.py @@ -32,6 +32,10 @@ class UserConfig(BaseModel): PEXELS_API_KEY: Optional[str] = None PIXABAY_API_KEY: Optional[str] = None + # Local Image Generation (ComfyUI) + LOCAL_IMAGE_URL: Optional[str] = None + LOCAL_IMAGE_WORKFLOW: Optional[str] = None # ComfyUI workflow JSON + # Reasoning TOOL_CALLS: Optional[bool] = None DISABLE_THINKING: Optional[bool] = None diff --git a/servers/fastapi/services/image_generation_service.py b/servers/fastapi/services/image_generation_service.py index a94b8b08..a0de5715 100644 --- a/servers/fastapi/services/image_generation_service.py +++ b/servers/fastapi/services/image_generation_service.py @@ -1,4 +1,6 @@ import asyncio +import base64 +import json import os import aiohttp from google import genai @@ -9,12 +11,15 @@ from models.sql.image_asset import ImageAsset from utils.download_helpers import download_file from utils.get_env import get_pexels_api_key_env from utils.get_env import get_pixabay_api_key_env +from utils.get_env import get_local_image_url_env +from utils.get_env import get_local_image_workflow_env from utils.image_provider import ( is_image_generation_disabled, is_pixels_selected, is_pixabay_selected, is_gemini_flash_selected, is_dalle3_selected, + is_local_selected, ) import uuid @@ -37,6 +42,8 @@ class ImageGenerationService: return self.generate_image_google elif is_dalle3_selected(): return self.generate_image_openai + elif is_local_selected(): + return self.generate_image_local return None def is_stock_provider_selected(self): @@ -137,3 +144,224 @@ class ImageGenerationService: data = await response.json() image_url = data["hits"][0]["largeImageURL"] return image_url + + async def generate_image_local(self, prompt: str, output_directory: str) -> str: + """ + Generate image using ComfyUI workflow API. + + User provides: + - LOCAL_IMAGE_URL: ComfyUI server URL (e.g., http://192.168.1.7:8188) + - LOCAL_IMAGE_WORKFLOW: Workflow JSON exported from ComfyUI + + The workflow should have a CLIPTextEncode node with "Positive" in the title + where the prompt will be injected. + + Args: + prompt: The text prompt for image generation + output_directory: Directory to save the generated image + + Returns: + Path to the generated image file + """ + comfyui_url = get_local_image_url_env() + workflow_json = get_local_image_workflow_env() + + if not comfyui_url: + raise ValueError("LOCAL_IMAGE_URL environment variable is not set") + + if not workflow_json: + raise ValueError("LOCAL_IMAGE_WORKFLOW environment variable is not set. Please provide a ComfyUI workflow JSON.") + + # Ensure URL doesn't have trailing slash + comfyui_url = comfyui_url.rstrip("/") + + # Parse the workflow JSON + try: + workflow = json.loads(workflow_json) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid workflow JSON: {str(e)}") + + # Find and update the positive prompt node + workflow = self._inject_prompt_into_workflow(workflow, prompt) + + async with aiohttp.ClientSession(trust_env=True) as session: + # Step 1: Submit workflow + prompt_id = await self._submit_comfyui_workflow(session, comfyui_url, workflow) + + # Step 2: Wait for completion + status_data = await self._wait_for_comfyui_completion(session, comfyui_url, prompt_id) + + # Step 3: Download the generated image + image_path = await self._download_comfyui_image( + session, comfyui_url, status_data, prompt_id, output_directory + ) + + return image_path + + def _inject_prompt_into_workflow(self, workflow: dict, prompt: str) -> dict: + """ + Find the positive prompt node in the workflow and inject the prompt text. + Looks for CLIPTextEncode nodes with 'Positive' in the title. + """ + prompt_injected = False + + for node_id, node_data in workflow.items(): + # Check if this is a CLIPTextEncode node + if node_data.get("class_type") == "CLIPTextEncode": + meta = node_data.get("_meta", {}) + title = meta.get("title", "").lower() + + # Check if it's a positive prompt node + if "positive" in title: + if "inputs" in node_data and "text" in node_data["inputs"]: + node_data["inputs"]["text"] = prompt + prompt_injected = True + print(f"Injected prompt into node {node_id}: {title}") + break + + if not prompt_injected: + # Fallback: try to find any CLIPTextEncode node with text input + for node_id, node_data in workflow.items(): + if node_data.get("class_type") == "CLIPTextEncode": + if "inputs" in node_data and "text" in node_data["inputs"]: + # Skip if it looks like a negative prompt + meta = node_data.get("_meta", {}) + title = meta.get("title", "").lower() + if "negative" in title: + continue + node_data["inputs"]["text"] = prompt + prompt_injected = True + print(f"Injected prompt into node {node_id} (fallback)") + break + + if not prompt_injected: + raise ValueError("Could not find a positive prompt node (CLIPTextEncode) in the workflow") + + return workflow + + async def _submit_comfyui_workflow( + self, session: aiohttp.ClientSession, comfyui_url: str, workflow: dict + ) -> str: + """Submit workflow to ComfyUI and return the prompt_id.""" + client_id = str(uuid.uuid4()) + payload = { + "prompt": workflow, + "client_id": client_id + } + + response = await session.post( + f"{comfyui_url}/prompt", + json=payload, + timeout=aiohttp.ClientTimeout(total=30) + ) + + if response.status != 200: + error_text = await response.text() + raise Exception(f"Failed to submit workflow to ComfyUI: {error_text}") + + data = await response.json() + prompt_id = data.get("prompt_id") + + if not prompt_id: + raise Exception("No prompt_id returned from ComfyUI") + + print(f"ComfyUI workflow submitted. Prompt ID: {prompt_id}") + return prompt_id + + async def _wait_for_comfyui_completion( + self, session: aiohttp.ClientSession, comfyui_url: str, prompt_id: str, + timeout: int = 300, poll_interval: int = 4 + ) -> dict: + """Poll ComfyUI history endpoint until workflow completes.""" + start_time = asyncio.get_event_loop().time() + + while True: + elapsed = asyncio.get_event_loop().time() - start_time + if elapsed > timeout: + raise Exception(f"ComfyUI workflow timed out after {timeout} seconds") + + await asyncio.sleep(poll_interval) + + response = await session.get( + f"{comfyui_url}/history/{prompt_id}", + timeout=aiohttp.ClientTimeout(total=30) + ) + + if response.status != 200: + continue + + try: + status_data = await response.json() + except: + continue + + if prompt_id in status_data: + execution_data = status_data[prompt_id] + + # Check for completion + if "status" in execution_data: + status = execution_data["status"] + if status.get("completed", False): + print("ComfyUI workflow completed successfully") + return status_data + if "error" in status: + raise Exception(f"ComfyUI workflow error: {status['error']}") + + # Also check if outputs exist (alternative completion check) + if "outputs" in execution_data and execution_data["outputs"]: + print("ComfyUI workflow completed (outputs found)") + return status_data + + print(f"Waiting for ComfyUI workflow... ({int(elapsed)}s)") + + async def _download_comfyui_image( + self, session: aiohttp.ClientSession, comfyui_url: str, + status_data: dict, prompt_id: str, output_directory: str + ) -> str: + """Download the generated image from ComfyUI.""" + if prompt_id not in status_data: + raise Exception("Prompt ID not found in status data") + + outputs = status_data[prompt_id].get("outputs", {}) + + if not outputs: + raise Exception("No outputs found in ComfyUI response") + + # Find the first image in outputs + for node_id, node_output in outputs.items(): + if "images" in node_output: + for image_info in node_output["images"]: + filename = image_info["filename"] + subfolder = image_info.get("subfolder", "") + + # Build view params + params = { + "filename": filename, + "type": "output" + } + if subfolder: + params["subfolder"] = subfolder + + # Download the image + response = await session.get( + f"{comfyui_url}/view", + params=params, + timeout=aiohttp.ClientTimeout(total=60) + ) + + if response.status == 200: + image_data = await response.read() + + # Determine extension + ext = filename.split(".")[-1] if "." in filename else "png" + image_path = os.path.join(output_directory, f"{uuid.uuid4()}.{ext}") + + with open(image_path, "wb") as f: + f.write(image_data) + + print(f"Downloaded image from ComfyUI: {image_path}") + return image_path + else: + raise Exception(f"Failed to download image: {response.status}") + + raise Exception("No images found in ComfyUI outputs") diff --git a/servers/fastapi/utils/get_env.py b/servers/fastapi/utils/get_env.py index 0d98e8c5..8d97ba27 100644 --- a/servers/fastapi/utils/get_env.py +++ b/servers/fastapi/utils/get_env.py @@ -99,3 +99,11 @@ def get_extended_reasoning_env(): def get_web_grounding_env(): return os.getenv("WEB_GROUNDING") + + +def get_local_image_url_env(): + return os.getenv("LOCAL_IMAGE_URL") + + +def get_local_image_workflow_env(): + return os.getenv("LOCAL_IMAGE_WORKFLOW") diff --git a/servers/fastapi/utils/image_provider.py b/servers/fastapi/utils/image_provider.py index 42a87519..43b270fe 100644 --- a/servers/fastapi/utils/image_provider.py +++ b/servers/fastapi/utils/image_provider.py @@ -3,6 +3,7 @@ from utils.get_env import ( get_disable_image_generation_env, get_google_api_key_env, get_image_provider_env, + get_local_image_url_env, get_openai_api_key_env, get_pexels_api_key_env, get_pixabay_api_key_env, @@ -30,6 +31,10 @@ def is_dalle3_selected() -> bool: return ImageProvider.DALLE3 == get_selected_image_provider() +def is_local_selected() -> bool: + return ImageProvider.LOCAL == get_selected_image_provider() + + def get_selected_image_provider() -> ImageProvider | None: """ Get the selected image provider from environment variables. @@ -52,5 +57,7 @@ def get_image_provider_api_key() -> str: return get_google_api_key_env() elif selected_image_provider == ImageProvider.DALLE3: return get_openai_api_key_env() + elif selected_image_provider == ImageProvider.LOCAL: + return get_local_image_url_env() # Returns URL instead of API key else: raise ValueError(f"Invalid image provider: {selected_image_provider}") diff --git a/servers/fastapi/utils/set_env.py b/servers/fastapi/utils/set_env.py index bd94e4c9..766637e7 100644 --- a/servers/fastapi/utils/set_env.py +++ b/servers/fastapi/utils/set_env.py @@ -87,3 +87,11 @@ def set_extended_reasoning_env(value): def set_web_grounding_env(value): os.environ["WEB_GROUNDING"] = value + + +def set_local_image_url_env(value): + os.environ["LOCAL_IMAGE_URL"] = value + + +def set_local_image_workflow_env(value): + os.environ["LOCAL_IMAGE_WORKFLOW"] = value diff --git a/servers/fastapi/utils/user_config.py b/servers/fastapi/utils/user_config.py index ca60d44a..bc79125e 100644 --- a/servers/fastapi/utils/user_config.py +++ b/servers/fastapi/utils/user_config.py @@ -13,6 +13,8 @@ from utils.get_env import ( get_google_api_key_env, get_google_model_env, get_llm_provider_env, + get_local_image_url_env, + get_local_image_workflow_env, get_ollama_model_env, get_ollama_url_env, get_openai_api_key_env, @@ -38,6 +40,8 @@ from utils.set_env import ( set_google_api_key_env, set_google_model_env, set_llm_provider_env, + set_local_image_url_env, + set_local_image_workflow_env, set_ollama_model_env, set_ollama_url_env, set_openai_api_key_env, @@ -85,6 +89,8 @@ def get_user_config(): ), PIXABAY_API_KEY=existing_config.PIXABAY_API_KEY or get_pixabay_api_key_env(), PEXELS_API_KEY=existing_config.PEXELS_API_KEY or get_pexels_api_key_env(), + LOCAL_IMAGE_URL=existing_config.LOCAL_IMAGE_URL or get_local_image_url_env(), + LOCAL_IMAGE_WORKFLOW=existing_config.LOCAL_IMAGE_WORKFLOW or get_local_image_workflow_env(), TOOL_CALLS=( existing_config.TOOL_CALLS if existing_config.TOOL_CALLS is not None @@ -142,6 +148,10 @@ def update_env_with_user_config(): set_pixabay_api_key_env(user_config.PIXABAY_API_KEY) if user_config.PEXELS_API_KEY: set_pexels_api_key_env(user_config.PEXELS_API_KEY) + if user_config.LOCAL_IMAGE_URL: + set_local_image_url_env(user_config.LOCAL_IMAGE_URL) + if user_config.LOCAL_IMAGE_WORKFLOW: + set_local_image_workflow_env(user_config.LOCAL_IMAGE_WORKFLOW) if user_config.TOOL_CALLS is not None: set_tool_calls_env(str(user_config.TOOL_CALLS)) if user_config.DISABLE_THINKING is not None: diff --git a/servers/nextjs/app/api/has-required-key/route.ts b/servers/nextjs/app/api/has-required-key/route.ts index 05efdbe8..3248e68d 100644 --- a/servers/nextjs/app/api/has-required-key/route.ts +++ b/servers/nextjs/app/api/has-required-key/route.ts @@ -12,10 +12,10 @@ export async function GET() { const raw = fs.readFileSync(userConfigPath, "utf-8"); const cfg = JSON.parse(raw || "{}"); keyFromFile = cfg?.OPENAI_API_KEY || ""; - } catch {} + } catch { } } - console.log(keyFromFile); + const keyFromEnv = process.env.OPENAI_API_KEY || ""; console.log(keyFromEnv); diff --git a/servers/nextjs/app/api/user-config/route.ts b/servers/nextjs/app/api/user-config/route.ts index 828a8ff0..697ef967 100644 --- a/servers/nextjs/app/api/user-config/route.ts +++ b/servers/nextjs/app/api/user-config/route.ts @@ -64,6 +64,8 @@ export async function POST(request: Request) { userConfig.PIXABAY_API_KEY || existingConfig.PIXABAY_API_KEY, IMAGE_PROVIDER: userConfig.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER, PEXELS_API_KEY: userConfig.PEXELS_API_KEY || existingConfig.PEXELS_API_KEY, + LOCAL_IMAGE_URL: userConfig.LOCAL_IMAGE_URL || existingConfig.LOCAL_IMAGE_URL, + LOCAL_IMAGE_WORKFLOW: userConfig.LOCAL_IMAGE_WORKFLOW || existingConfig.LOCAL_IMAGE_WORKFLOW, TOOL_CALLS: userConfig.TOOL_CALLS === undefined ? existingConfig.TOOL_CALLS diff --git a/servers/nextjs/components/LLMSelection.tsx b/servers/nextjs/components/LLMSelection.tsx index 9bd13ccf..0decd24b 100644 --- a/servers/nextjs/components/LLMSelection.tsx +++ b/servers/nextjs/components/LLMSelection.tsx @@ -82,10 +82,14 @@ export default function LLMProviderSelection({ const needsOllamaUrl = (llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL); + const needsComfyUIConfig = !llmConfig.DISABLE_IMAGE_GENERATION && + llmConfig.IMAGE_PROVIDER === "local" && + (!llmConfig.LOCAL_IMAGE_URL || !llmConfig.LOCAL_IMAGE_WORKFLOW); + setButtonState({ isLoading: false, - isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl, - text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : "Save Configuration", + isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl || needsComfyUIConfig, + text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : needsComfyUIConfig ? "Please Configure ComfyUI" : "Save Configuration", showProgress: false }); @@ -336,6 +340,54 @@ export default function LLMProviderSelection({ return <>>; } + // Show ComfyUI configuration + if (provider.value === "local") { + return ( +
+ + Use your machine IP address (not localhost) when running in Docker +
++ Export your workflow from ComfyUI using "Save (API Format)" and paste the JSON here. + The positive prompt node (CLIPTextEncode) will be automatically updated. +
+