diff --git a/docker-compose.yml b/docker-compose.yml index d20cf3eb..23d54b89 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -101,7 +101,7 @@ services: - DATABASE_URL=${DATABASE_URL} - DISABLE_ANONYMOUS_TRACKING=${DISABLE_ANONYMOUS_TRACKING} - LOCAL_IMAGE_URL=${LOCAL_IMAGE_URL} - - LOCAL_IMAGE_MODEL=${LOCAL_IMAGE_MODEL} + - LOCAL_IMAGE_WORKFLOW=${LOCAL_IMAGE_WORKFLOW} development-gpu: build: diff --git a/servers/fastapi/models/user_config.py b/servers/fastapi/models/user_config.py index 506dee66..2e0e0a48 100644 --- a/servers/fastapi/models/user_config.py +++ b/servers/fastapi/models/user_config.py @@ -32,9 +32,9 @@ class UserConfig(BaseModel): PEXELS_API_KEY: Optional[str] = None PIXABAY_API_KEY: Optional[str] = None - # Local Image Generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.) + # Local Image Generation (ComfyUI) LOCAL_IMAGE_URL: Optional[str] = None - LOCAL_IMAGE_MODEL: Optional[str] = None + LOCAL_IMAGE_WORKFLOW: Optional[str] = None # ComfyUI workflow JSON # Reasoning TOOL_CALLS: Optional[bool] = None diff --git a/servers/fastapi/services/image_generation_service.py b/servers/fastapi/services/image_generation_service.py index 72a9e7dc..a0de5715 100644 --- a/servers/fastapi/services/image_generation_service.py +++ b/servers/fastapi/services/image_generation_service.py @@ -1,5 +1,6 @@ import asyncio import base64 +import json import os import aiohttp from google import genai @@ -11,7 +12,7 @@ from utils.download_helpers import download_file from utils.get_env import get_pexels_api_key_env from utils.get_env import get_pixabay_api_key_env from utils.get_env import get_local_image_url_env -from utils.get_env import get_local_image_model_env +from utils.get_env import get_local_image_workflow_env from utils.image_provider import ( is_image_generation_disabled, is_pixels_selected, @@ -146,17 +147,14 @@ class ImageGenerationService: async def generate_image_local(self, prompt: str, output_directory: str) -> str: """ - Generate image using a local image generation server. + Generate image using ComfyUI workflow API. - User provides the full API URL including the endpoint. - Examples: - - Automatic1111: http://192.168.1.7:7860/sdapi/v1/txt2img - - Fooocus: http://192.168.1.7:7860/v1/generation/text-to-image - - Custom: http://192.168.1.7:7860/generate + User provides: + - LOCAL_IMAGE_URL: ComfyUI server URL (e.g., http://192.168.1.7:8188) + - LOCAL_IMAGE_WORKFLOW: Workflow JSON exported from ComfyUI - Supports both: - - JSON response with base64 images (Automatic1111 style) - - Direct binary image response (raw PNG/JPEG) + The workflow should have a CLIPTextEncode node with "Positive" in the title + where the prompt will be injected. Args: prompt: The text prompt for image generation @@ -165,82 +163,205 @@ class ImageGenerationService: Returns: Path to the generated image file """ - api_url = get_local_image_url_env() - local_model = get_local_image_model_env() + comfyui_url = get_local_image_url_env() + workflow_json = get_local_image_workflow_env() - if not api_url: + if not comfyui_url: raise ValueError("LOCAL_IMAGE_URL environment variable is not set") - # Build the request payload (Automatic1111 compatible format) - # Most local tools accept similar payload structure - payload = { - "prompt": prompt, - "negative_prompt": "blurry, bad quality, distorted, ugly, deformed", - "steps": 20, - "width": 1024, - "height": 1024, - "cfg_scale": 7, - "sampler_name": "Euler a", - } + if not workflow_json: + raise ValueError("LOCAL_IMAGE_WORKFLOW environment variable is not set. Please provide a ComfyUI workflow JSON.") - # Add model override if specified - if local_model: - payload["override_settings"] = { - "sd_model_checkpoint": local_model - } + # Ensure URL doesn't have trailing slash + comfyui_url = comfyui_url.rstrip("/") + + # Parse the workflow JSON + try: + workflow = json.loads(workflow_json) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid workflow JSON: {str(e)}") + + # Find and update the positive prompt node + workflow = self._inject_prompt_into_workflow(workflow, prompt) async with aiohttp.ClientSession(trust_env=True) as session: + # Step 1: Submit workflow + prompt_id = await self._submit_comfyui_workflow(session, comfyui_url, workflow) + + # Step 2: Wait for completion + status_data = await self._wait_for_comfyui_completion(session, comfyui_url, prompt_id) + + # Step 3: Download the generated image + image_path = await self._download_comfyui_image( + session, comfyui_url, status_data, prompt_id, output_directory + ) + + return image_path + + def _inject_prompt_into_workflow(self, workflow: dict, prompt: str) -> dict: + """ + Find the positive prompt node in the workflow and inject the prompt text. + Looks for CLIPTextEncode nodes with 'Positive' in the title. + """ + prompt_injected = False + + for node_id, node_data in workflow.items(): + # Check if this is a CLIPTextEncode node + if node_data.get("class_type") == "CLIPTextEncode": + meta = node_data.get("_meta", {}) + title = meta.get("title", "").lower() + + # Check if it's a positive prompt node + if "positive" in title: + if "inputs" in node_data and "text" in node_data["inputs"]: + node_data["inputs"]["text"] = prompt + prompt_injected = True + print(f"Injected prompt into node {node_id}: {title}") + break + + if not prompt_injected: + # Fallback: try to find any CLIPTextEncode node with text input + for node_id, node_data in workflow.items(): + if node_data.get("class_type") == "CLIPTextEncode": + if "inputs" in node_data and "text" in node_data["inputs"]: + # Skip if it looks like a negative prompt + meta = node_data.get("_meta", {}) + title = meta.get("title", "").lower() + if "negative" in title: + continue + node_data["inputs"]["text"] = prompt + prompt_injected = True + print(f"Injected prompt into node {node_id} (fallback)") + break + + if not prompt_injected: + raise ValueError("Could not find a positive prompt node (CLIPTextEncode) in the workflow") + + return workflow + + async def _submit_comfyui_workflow( + self, session: aiohttp.ClientSession, comfyui_url: str, workflow: dict + ) -> str: + """Submit workflow to ComfyUI and return the prompt_id.""" + client_id = str(uuid.uuid4()) + payload = { + "prompt": workflow, + "client_id": client_id + } + + response = await session.post( + f"{comfyui_url}/prompt", + json=payload, + timeout=aiohttp.ClientTimeout(total=30) + ) + + if response.status != 200: + error_text = await response.text() + raise Exception(f"Failed to submit workflow to ComfyUI: {error_text}") + + data = await response.json() + prompt_id = data.get("prompt_id") + + if not prompt_id: + raise Exception("No prompt_id returned from ComfyUI") + + print(f"ComfyUI workflow submitted. Prompt ID: {prompt_id}") + return prompt_id + + async def _wait_for_comfyui_completion( + self, session: aiohttp.ClientSession, comfyui_url: str, prompt_id: str, + timeout: int = 300, poll_interval: int = 4 + ) -> dict: + """Poll ComfyUI history endpoint until workflow completes.""" + start_time = asyncio.get_event_loop().time() + + while True: + elapsed = asyncio.get_event_loop().time() - start_time + if elapsed > timeout: + raise Exception(f"ComfyUI workflow timed out after {timeout} seconds") + + await asyncio.sleep(poll_interval) + + response = await session.get( + f"{comfyui_url}/history/{prompt_id}", + timeout=aiohttp.ClientTimeout(total=30) + ) + + if response.status != 200: + continue + try: - response = await session.post( - api_url, - json=payload, - timeout=aiohttp.ClientTimeout(total=300) # 5 min timeout for generation - ) + status_data = await response.json() + except: + continue + + if prompt_id in status_data: + execution_data = status_data[prompt_id] - if response.status != 200: - error_text = await response.text() - raise Exception(f"Local image API error: {response.status} - {error_text}") + # Check for completion + if "status" in execution_data: + status = execution_data["status"] + if status.get("completed", False): + print("ComfyUI workflow completed successfully") + return status_data + if "error" in status: + raise Exception(f"ComfyUI workflow error: {status['error']}") - content_type = response.headers.get("Content-Type", "") - - # Handle direct binary image response (image/png, image/jpeg, etc.) - if content_type.startswith("image/"): - image_data = await response.read() - # Determine file extension from content type - ext = "png" if "png" in content_type else "jpg" - image_path = os.path.join(output_directory, f"{uuid.uuid4()}.{ext}") + # Also check if outputs exist (alternative completion check) + if "outputs" in execution_data and execution_data["outputs"]: + print("ComfyUI workflow completed (outputs found)") + return status_data + + print(f"Waiting for ComfyUI workflow... ({int(elapsed)}s)") + + async def _download_comfyui_image( + self, session: aiohttp.ClientSession, comfyui_url: str, + status_data: dict, prompt_id: str, output_directory: str + ) -> str: + """Download the generated image from ComfyUI.""" + if prompt_id not in status_data: + raise Exception("Prompt ID not found in status data") + + outputs = status_data[prompt_id].get("outputs", {}) + + if not outputs: + raise Exception("No outputs found in ComfyUI response") + + # Find the first image in outputs + for node_id, node_output in outputs.items(): + if "images" in node_output: + for image_info in node_output["images"]: + filename = image_info["filename"] + subfolder = image_info.get("subfolder", "") - with open(image_path, "wb") as f: - f.write(image_data) + # Build view params + params = { + "filename": filename, + "type": "output" + } + if subfolder: + params["subfolder"] = subfolder - return image_path - - # Handle JSON response with base64 encoded images - data = await response.json() - - # Check for images in various response formats - if "images" in data and len(data["images"]) > 0: - image_base64 = data["images"][0] - # Handle if it's a dict with base64 key - if isinstance(image_base64, dict) and "base64" in image_base64: - image_base64 = image_base64["base64"] - elif "image" in data: - image_base64 = data["image"] - elif "output" in data: - image_base64 = data["output"] - elif "result" in data: - image_base64 = data["result"] - else: - raise Exception(f"No images found in response. Keys: {list(data.keys())}") - - # Decode base64 and save to file - image_data = base64.b64decode(image_base64) - image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png") - - with open(image_path, "wb") as f: - f.write(image_data) - - return image_path + # Download the image + response = await session.get( + f"{comfyui_url}/view", + params=params, + timeout=aiohttp.ClientTimeout(total=60) + ) - except aiohttp.ClientError as e: - raise Exception(f"Failed to connect to local image server at {api_url}: {str(e)}") + if response.status == 200: + image_data = await response.read() + + # Determine extension + ext = filename.split(".")[-1] if "." in filename else "png" + image_path = os.path.join(output_directory, f"{uuid.uuid4()}.{ext}") + + with open(image_path, "wb") as f: + f.write(image_data) + + print(f"Downloaded image from ComfyUI: {image_path}") + return image_path + else: + raise Exception(f"Failed to download image: {response.status}") + + raise Exception("No images found in ComfyUI outputs") diff --git a/servers/fastapi/utils/get_env.py b/servers/fastapi/utils/get_env.py index 88c19f25..8d97ba27 100644 --- a/servers/fastapi/utils/get_env.py +++ b/servers/fastapi/utils/get_env.py @@ -105,5 +105,5 @@ def get_local_image_url_env(): return os.getenv("LOCAL_IMAGE_URL") -def get_local_image_model_env(): - return os.getenv("LOCAL_IMAGE_MODEL") +def get_local_image_workflow_env(): + return os.getenv("LOCAL_IMAGE_WORKFLOW") diff --git a/servers/fastapi/utils/set_env.py b/servers/fastapi/utils/set_env.py index 365688a5..766637e7 100644 --- a/servers/fastapi/utils/set_env.py +++ b/servers/fastapi/utils/set_env.py @@ -93,5 +93,5 @@ def set_local_image_url_env(value): os.environ["LOCAL_IMAGE_URL"] = value -def set_local_image_model_env(value): - os.environ["LOCAL_IMAGE_MODEL"] = value +def set_local_image_workflow_env(value): + os.environ["LOCAL_IMAGE_WORKFLOW"] = value diff --git a/servers/fastapi/utils/user_config.py b/servers/fastapi/utils/user_config.py index f718b374..bc79125e 100644 --- a/servers/fastapi/utils/user_config.py +++ b/servers/fastapi/utils/user_config.py @@ -13,8 +13,8 @@ from utils.get_env import ( get_google_api_key_env, get_google_model_env, get_llm_provider_env, - get_local_image_model_env, get_local_image_url_env, + get_local_image_workflow_env, get_ollama_model_env, get_ollama_url_env, get_openai_api_key_env, @@ -40,8 +40,8 @@ from utils.set_env import ( set_google_api_key_env, set_google_model_env, set_llm_provider_env, - set_local_image_model_env, set_local_image_url_env, + set_local_image_workflow_env, set_ollama_model_env, set_ollama_url_env, set_openai_api_key_env, @@ -90,7 +90,7 @@ def get_user_config(): PIXABAY_API_KEY=existing_config.PIXABAY_API_KEY or get_pixabay_api_key_env(), PEXELS_API_KEY=existing_config.PEXELS_API_KEY or get_pexels_api_key_env(), LOCAL_IMAGE_URL=existing_config.LOCAL_IMAGE_URL or get_local_image_url_env(), - LOCAL_IMAGE_MODEL=existing_config.LOCAL_IMAGE_MODEL or get_local_image_model_env(), + LOCAL_IMAGE_WORKFLOW=existing_config.LOCAL_IMAGE_WORKFLOW or get_local_image_workflow_env(), TOOL_CALLS=( existing_config.TOOL_CALLS if existing_config.TOOL_CALLS is not None @@ -150,8 +150,8 @@ def update_env_with_user_config(): set_pexels_api_key_env(user_config.PEXELS_API_KEY) if user_config.LOCAL_IMAGE_URL: set_local_image_url_env(user_config.LOCAL_IMAGE_URL) - if user_config.LOCAL_IMAGE_MODEL: - set_local_image_model_env(user_config.LOCAL_IMAGE_MODEL) + if user_config.LOCAL_IMAGE_WORKFLOW: + set_local_image_workflow_env(user_config.LOCAL_IMAGE_WORKFLOW) if user_config.TOOL_CALLS is not None: set_tool_calls_env(str(user_config.TOOL_CALLS)) if user_config.DISABLE_THINKING is not None: diff --git a/servers/nextjs/app/api/user-config/route.ts b/servers/nextjs/app/api/user-config/route.ts index 8edbc040..697ef967 100644 --- a/servers/nextjs/app/api/user-config/route.ts +++ b/servers/nextjs/app/api/user-config/route.ts @@ -65,7 +65,7 @@ export async function POST(request: Request) { IMAGE_PROVIDER: userConfig.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER, PEXELS_API_KEY: userConfig.PEXELS_API_KEY || existingConfig.PEXELS_API_KEY, LOCAL_IMAGE_URL: userConfig.LOCAL_IMAGE_URL || existingConfig.LOCAL_IMAGE_URL, - LOCAL_IMAGE_MODEL: userConfig.LOCAL_IMAGE_MODEL || existingConfig.LOCAL_IMAGE_MODEL, + LOCAL_IMAGE_WORKFLOW: userConfig.LOCAL_IMAGE_WORKFLOW || existingConfig.LOCAL_IMAGE_WORKFLOW, TOOL_CALLS: userConfig.TOOL_CALLS === undefined ? existingConfig.TOOL_CALLS diff --git a/servers/nextjs/components/LLMSelection.tsx b/servers/nextjs/components/LLMSelection.tsx index cf00adce..0decd24b 100644 --- a/servers/nextjs/components/LLMSelection.tsx +++ b/servers/nextjs/components/LLMSelection.tsx @@ -82,13 +82,14 @@ export default function LLMProviderSelection({ const needsOllamaUrl = (llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL); - const needsLocalImageUrl = !llmConfig.DISABLE_IMAGE_GENERATION && - llmConfig.IMAGE_PROVIDER === "local" && !llmConfig.LOCAL_IMAGE_URL; + const needsComfyUIConfig = !llmConfig.DISABLE_IMAGE_GENERATION && + llmConfig.IMAGE_PROVIDER === "local" && + (!llmConfig.LOCAL_IMAGE_URL || !llmConfig.LOCAL_IMAGE_WORKFLOW); setButtonState({ isLoading: false, - isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl || needsLocalImageUrl, - text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : needsLocalImageUrl ? "Please Enter Local Server URL" : "Save Configuration", + isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl || needsComfyUIConfig, + text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : needsComfyUIConfig ? "Please Configure ComfyUI" : "Save Configuration", showProgress: false }); @@ -339,18 +340,18 @@ export default function LLMProviderSelection({ return <>>; } - // Show Local Image Generation configuration + // Show ComfyUI configuration if (provider.value === "local") { return (
- Enter the full API URL including endpoint. Examples: +
+ + Use your machine IP address (not localhost) when running in Docker
-http://IP:7860/sdapi/v1/txt2imghttp://IP:7860/v1/generation/text-to-image- - Leave empty to use the currently loaded model +
+ Export your workflow from ComfyUI using "Save (API Format)" and paste the JSON here. + The positive prompt node (CLIPTextEncode) will be automatically updated.