Merge pull request #371 from presenton/feat/local_image_provider

Comfyui-integration
2025-12-18 00:37:37 +05:45 · 2025-12-18 00:37:37 +05:45 · 2bfb895558
commit 2bfb895558
parent c9b60d5f51 37a919c3f1
15 changed files with 343 additions and 4 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -100,6 +100,8 @@ services:
      - WEB_GROUNDING=${WEB_GROUNDING}
      - DATABASE_URL=${DATABASE_URL}
      - DISABLE_ANONYMOUS_TRACKING=${DISABLE_ANONYMOUS_TRACKING}
+      - LOCAL_IMAGE_URL=${LOCAL_IMAGE_URL}
+      - LOCAL_IMAGE_WORKFLOW=${LOCAL_IMAGE_WORKFLOW}

  development-gpu:
    build:
--- a/servers/fastapi/enums/image_provider.py
+++ b/servers/fastapi/enums/image_provider.py
@ -5,3 +5,4 @@ class ImageProvider(Enum):
    PIXABAY = "pixabay"
    GEMINI_FLASH = "gemini_flash"
    DALLE3 = "dall-e-3"
+    LOCAL = "local"  # Local image generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)
--- a/servers/fastapi/models/user_config.py
+++ b/servers/fastapi/models/user_config.py
@ -32,6 +32,10 @@ class UserConfig(BaseModel):
    PEXELS_API_KEY: Optional[str] = None
    PIXABAY_API_KEY: Optional[str] = None

+    # Local Image Generation (ComfyUI)
+    LOCAL_IMAGE_URL: Optional[str] = None
+    LOCAL_IMAGE_WORKFLOW: Optional[str] = None  # ComfyUI workflow JSON
+
    # Reasoning
    TOOL_CALLS: Optional[bool] = None
    DISABLE_THINKING: Optional[bool] = None
--- a/servers/fastapi/services/image_generation_service.py
+++ b/servers/fastapi/services/image_generation_service.py
@ -1,4 +1,6 @@
 import asyncio
+import base64
+import json
 import os
 import aiohttp
 from google import genai
@ -9,12 +11,15 @@ from models.sql.image_asset import ImageAsset
 from utils.download_helpers import download_file
 from utils.get_env import get_pexels_api_key_env
 from utils.get_env import get_pixabay_api_key_env
+from utils.get_env import get_local_image_url_env
+from utils.get_env import get_local_image_workflow_env
 from utils.image_provider import (
    is_image_generation_disabled,
    is_pixels_selected,
    is_pixabay_selected,
    is_gemini_flash_selected,
    is_dalle3_selected,
+    is_local_selected,
 )
 import uuid

@ -37,6 +42,8 @@ class ImageGenerationService:
            return self.generate_image_google
        elif is_dalle3_selected():
            return self.generate_image_openai
+        elif is_local_selected():
+            return self.generate_image_local
        return None

    def is_stock_provider_selected(self):
@ -137,3 +144,224 @@ class ImageGenerationService:
            data = await response.json()
            image_url = data["hits"][0]["largeImageURL"]
            return image_url
+
+    async def generate_image_local(self, prompt: str, output_directory: str) -> str:
+        """
+        Generate image using ComfyUI workflow API.
+        
+        User provides:
+        - LOCAL_IMAGE_URL: ComfyUI server URL (e.g., http://192.168.1.7:8188)
+        - LOCAL_IMAGE_WORKFLOW: Workflow JSON exported from ComfyUI
+        
+        The workflow should have a CLIPTextEncode node with "Positive" in the title
+        where the prompt will be injected.
+        
+        Args:
+            prompt: The text prompt for image generation
+            output_directory: Directory to save the generated image
+            
+        Returns:
+            Path to the generated image file
+        """
+        comfyui_url = get_local_image_url_env()
+        workflow_json = get_local_image_workflow_env()
+        
+        if not comfyui_url:
+            raise ValueError("LOCAL_IMAGE_URL environment variable is not set")
+        
+        if not workflow_json:
+            raise ValueError("LOCAL_IMAGE_WORKFLOW environment variable is not set. Please provide a ComfyUI workflow JSON.")
+        
+        # Ensure URL doesn't have trailing slash
+        comfyui_url = comfyui_url.rstrip("/")
+        
+        # Parse the workflow JSON
+        try:
+            workflow = json.loads(workflow_json)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid workflow JSON: {str(e)}")
+        
+        # Find and update the positive prompt node
+        workflow = self._inject_prompt_into_workflow(workflow, prompt)
+        
+        async with aiohttp.ClientSession(trust_env=True) as session:
+            # Step 1: Submit workflow
+            prompt_id = await self._submit_comfyui_workflow(session, comfyui_url, workflow)
+            
+            # Step 2: Wait for completion
+            status_data = await self._wait_for_comfyui_completion(session, comfyui_url, prompt_id)
+            
+            # Step 3: Download the generated image
+            image_path = await self._download_comfyui_image(
+                session, comfyui_url, status_data, prompt_id, output_directory
+            )
+            
+            return image_path
+    
+    def _inject_prompt_into_workflow(self, workflow: dict, prompt: str) -> dict:
+        """
+        Find the positive prompt node in the workflow and inject the prompt text.
+        Looks for CLIPTextEncode nodes with 'Positive' in the title.
+        """
+        prompt_injected = False
+        
+        for node_id, node_data in workflow.items():
+            # Check if this is a CLIPTextEncode node
+            if node_data.get("class_type") == "CLIPTextEncode":
+                meta = node_data.get("_meta", {})
+                title = meta.get("title", "").lower()
+                
+                # Check if it's a positive prompt node
+                if "positive" in title:
+                    if "inputs" in node_data and "text" in node_data["inputs"]:
+                        node_data["inputs"]["text"] = prompt
+                        prompt_injected = True
+                        print(f"Injected prompt into node {node_id}: {title}")
+                        break
+        
+        if not prompt_injected:
+            # Fallback: try to find any CLIPTextEncode node with text input
+            for node_id, node_data in workflow.items():
+                if node_data.get("class_type") == "CLIPTextEncode":
+                    if "inputs" in node_data and "text" in node_data["inputs"]:
+                        # Skip if it looks like a negative prompt
+                        meta = node_data.get("_meta", {})
+                        title = meta.get("title", "").lower()
+                        if "negative" in title:
+                            continue
+                        node_data["inputs"]["text"] = prompt
+                        prompt_injected = True
+                        print(f"Injected prompt into node {node_id} (fallback)")
+                        break
+        
+        if not prompt_injected:
+            raise ValueError("Could not find a positive prompt node (CLIPTextEncode) in the workflow")
+        
+        return workflow
+    
+    async def _submit_comfyui_workflow(
+        self, session: aiohttp.ClientSession, comfyui_url: str, workflow: dict
+    ) -> str:
+        """Submit workflow to ComfyUI and return the prompt_id."""
+        client_id = str(uuid.uuid4())
+        payload = {
+            "prompt": workflow,
+            "client_id": client_id
+        }
+        
+        response = await session.post(
+            f"{comfyui_url}/prompt",
+            json=payload,
+            timeout=aiohttp.ClientTimeout(total=30)
+        )
+        
+        if response.status != 200:
+            error_text = await response.text()
+            raise Exception(f"Failed to submit workflow to ComfyUI: {error_text}")
+        
+        data = await response.json()
+        prompt_id = data.get("prompt_id")
+        
+        if not prompt_id:
+            raise Exception("No prompt_id returned from ComfyUI")
+        
+        print(f"ComfyUI workflow submitted. Prompt ID: {prompt_id}")
+        return prompt_id
+    
+    async def _wait_for_comfyui_completion(
+        self, session: aiohttp.ClientSession, comfyui_url: str, prompt_id: str,
+        timeout: int = 300, poll_interval: int = 4
+    ) -> dict:
+        """Poll ComfyUI history endpoint until workflow completes."""
+        start_time = asyncio.get_event_loop().time()
+        
+        while True:
+            elapsed = asyncio.get_event_loop().time() - start_time
+            if elapsed > timeout:
+                raise Exception(f"ComfyUI workflow timed out after {timeout} seconds")
+            
+            await asyncio.sleep(poll_interval)
+            
+            response = await session.get(
+                f"{comfyui_url}/history/{prompt_id}",
+                timeout=aiohttp.ClientTimeout(total=30)
+            )
+            
+            if response.status != 200:
+                continue
+            
+            try:
+                status_data = await response.json()
+            except:
+                continue
+            
+            if prompt_id in status_data:
+                execution_data = status_data[prompt_id]
+                
+                # Check for completion
+                if "status" in execution_data:
+                    status = execution_data["status"]
+                    if status.get("completed", False):
+                        print("ComfyUI workflow completed successfully")
+                        return status_data
+                    if "error" in status:
+                        raise Exception(f"ComfyUI workflow error: {status['error']}")
+                
+                # Also check if outputs exist (alternative completion check)
+                if "outputs" in execution_data and execution_data["outputs"]:
+                    print("ComfyUI workflow completed (outputs found)")
+                    return status_data
+            
+            print(f"Waiting for ComfyUI workflow... ({int(elapsed)}s)")
+    
+    async def _download_comfyui_image(
+        self, session: aiohttp.ClientSession, comfyui_url: str,
+        status_data: dict, prompt_id: str, output_directory: str
+    ) -> str:
+        """Download the generated image from ComfyUI."""
+        if prompt_id not in status_data:
+            raise Exception("Prompt ID not found in status data")
+        
+        outputs = status_data[prompt_id].get("outputs", {})
+        
+        if not outputs:
+            raise Exception("No outputs found in ComfyUI response")
+        
+        # Find the first image in outputs
+        for node_id, node_output in outputs.items():
+            if "images" in node_output:
+                for image_info in node_output["images"]:
+                    filename = image_info["filename"]
+                    subfolder = image_info.get("subfolder", "")
+                    
+                    # Build view params
+                    params = {
+                        "filename": filename,
+                        "type": "output"
+                    }
+                    if subfolder:
+                        params["subfolder"] = subfolder
+                    
+                    # Download the image
+                    response = await session.get(
+                        f"{comfyui_url}/view",
+                        params=params,
+                        timeout=aiohttp.ClientTimeout(total=60)
+                    )
+                    
+                    if response.status == 200:
+                        image_data = await response.read()
+                        
+                        # Determine extension
+                        ext = filename.split(".")[-1] if "." in filename else "png"
+                        image_path = os.path.join(output_directory, f"{uuid.uuid4()}.{ext}")
+                        
+                        with open(image_path, "wb") as f:
+                            f.write(image_data)
+                        
+                        print(f"Downloaded image from ComfyUI: {image_path}")
+                        return image_path
+                    else:
+                        raise Exception(f"Failed to download image: {response.status}")
+        
+        raise Exception("No images found in ComfyUI outputs")
--- a/servers/fastapi/utils/get_env.py
+++ b/servers/fastapi/utils/get_env.py
@ -99,3 +99,11 @@ def get_extended_reasoning_env():

 def get_web_grounding_env():
    return os.getenv("WEB_GROUNDING")
+
+
+def get_local_image_url_env():
+    return os.getenv("LOCAL_IMAGE_URL")
+
+
+def get_local_image_workflow_env():
+    return os.getenv("LOCAL_IMAGE_WORKFLOW")
--- a/servers/fastapi/utils/image_provider.py
+++ b/servers/fastapi/utils/image_provider.py
@ -3,6 +3,7 @@ from utils.get_env import (
    get_disable_image_generation_env,
    get_google_api_key_env,
    get_image_provider_env,
+    get_local_image_url_env,
    get_openai_api_key_env,
    get_pexels_api_key_env,
    get_pixabay_api_key_env,
@ -30,6 +31,10 @@ def is_dalle3_selected() -> bool:
    return ImageProvider.DALLE3 == get_selected_image_provider()


+def is_local_selected() -> bool:
+    return ImageProvider.LOCAL == get_selected_image_provider()
+
+
 def get_selected_image_provider() -> ImageProvider | None:
    """
    Get the selected image provider from environment variables.
@ -52,5 +57,7 @@ def get_image_provider_api_key() -> str:
        return get_google_api_key_env()
    elif selected_image_provider == ImageProvider.DALLE3:
        return get_openai_api_key_env()
+    elif selected_image_provider == ImageProvider.LOCAL:
+        return get_local_image_url_env()  # Returns URL instead of API key
    else:
        raise ValueError(f"Invalid image provider: {selected_image_provider}")
--- a/servers/fastapi/utils/set_env.py
+++ b/servers/fastapi/utils/set_env.py
@ -87,3 +87,11 @@ def set_extended_reasoning_env(value):

 def set_web_grounding_env(value):
    os.environ["WEB_GROUNDING"] = value
+
+
+def set_local_image_url_env(value):
+    os.environ["LOCAL_IMAGE_URL"] = value
+
+
+def set_local_image_workflow_env(value):
+    os.environ["LOCAL_IMAGE_WORKFLOW"] = value
--- a/servers/fastapi/utils/user_config.py
+++ b/servers/fastapi/utils/user_config.py
@ -13,6 +13,8 @@ from utils.get_env import (
    get_google_api_key_env,
    get_google_model_env,
    get_llm_provider_env,
+    get_local_image_url_env,
+    get_local_image_workflow_env,
    get_ollama_model_env,
    get_ollama_url_env,
    get_openai_api_key_env,
@ -38,6 +40,8 @@ from utils.set_env import (
    set_google_api_key_env,
    set_google_model_env,
    set_llm_provider_env,
+    set_local_image_url_env,
+    set_local_image_workflow_env,
    set_ollama_model_env,
    set_ollama_url_env,
    set_openai_api_key_env,
@ -85,6 +89,8 @@ def get_user_config():
        ),
        PIXABAY_API_KEY=existing_config.PIXABAY_API_KEY or get_pixabay_api_key_env(),
        PEXELS_API_KEY=existing_config.PEXELS_API_KEY or get_pexels_api_key_env(),
+        LOCAL_IMAGE_URL=existing_config.LOCAL_IMAGE_URL or get_local_image_url_env(),
+        LOCAL_IMAGE_WORKFLOW=existing_config.LOCAL_IMAGE_WORKFLOW or get_local_image_workflow_env(),
        TOOL_CALLS=(
            existing_config.TOOL_CALLS
            if existing_config.TOOL_CALLS is not None
@ -142,6 +148,10 @@ def update_env_with_user_config():
        set_pixabay_api_key_env(user_config.PIXABAY_API_KEY)
    if user_config.PEXELS_API_KEY:
        set_pexels_api_key_env(user_config.PEXELS_API_KEY)
+    if user_config.LOCAL_IMAGE_URL:
+        set_local_image_url_env(user_config.LOCAL_IMAGE_URL)
+    if user_config.LOCAL_IMAGE_WORKFLOW:
+        set_local_image_workflow_env(user_config.LOCAL_IMAGE_WORKFLOW)
    if user_config.TOOL_CALLS is not None:
        set_tool_calls_env(str(user_config.TOOL_CALLS))
    if user_config.DISABLE_THINKING is not None:
--- a/servers/nextjs/app/api/has-required-key/route.ts
+++ b/servers/nextjs/app/api/has-required-key/route.ts
@ -12,10 +12,10 @@ export async function GET() {
      const raw = fs.readFileSync(userConfigPath, "utf-8");
      const cfg = JSON.parse(raw || "{}");
      keyFromFile = cfg?.OPENAI_API_KEY || "";
-    } catch {}
+    } catch { }
  }

-  console.log(keyFromFile);
+

  const keyFromEnv = process.env.OPENAI_API_KEY || "";
  console.log(keyFromEnv);
--- a/servers/nextjs/app/api/user-config/route.ts
+++ b/servers/nextjs/app/api/user-config/route.ts
@ -64,6 +64,8 @@ export async function POST(request: Request) {
      userConfig.PIXABAY_API_KEY || existingConfig.PIXABAY_API_KEY,
    IMAGE_PROVIDER: userConfig.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER,
    PEXELS_API_KEY: userConfig.PEXELS_API_KEY || existingConfig.PEXELS_API_KEY,
+    LOCAL_IMAGE_URL: userConfig.LOCAL_IMAGE_URL || existingConfig.LOCAL_IMAGE_URL,
+    LOCAL_IMAGE_WORKFLOW: userConfig.LOCAL_IMAGE_WORKFLOW || existingConfig.LOCAL_IMAGE_WORKFLOW,
    TOOL_CALLS:
      userConfig.TOOL_CALLS === undefined
        ? existingConfig.TOOL_CALLS
--- a/servers/nextjs/components/LLMSelection.tsx
+++ b/servers/nextjs/components/LLMSelection.tsx
@ -82,10 +82,14 @@ export default function LLMProviderSelection({

    const needsOllamaUrl = (llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL);

+    const needsComfyUIConfig = !llmConfig.DISABLE_IMAGE_GENERATION &&
+      llmConfig.IMAGE_PROVIDER === "local" &&
+      (!llmConfig.LOCAL_IMAGE_URL || !llmConfig.LOCAL_IMAGE_WORKFLOW);
+
    setButtonState({
      isLoading: false,
-      isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl,
-      text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : "Save Configuration",
+      isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl || needsComfyUIConfig,
+      text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : needsComfyUIConfig ? "Please Configure ComfyUI" : "Save Configuration",
      showProgress: false
    });

@ -336,6 +340,54 @@ export default function LLMProviderSelection({
                  return <></>;
                }

+                // Show ComfyUI configuration
+                if (provider.value === "local") {
+                  return (
+                    <div className="mb-8 space-y-4">
+                      <div>
+                        <label className="block text-sm font-medium text-gray-700 mb-2">
+                          ComfyUI Server URL
+                        </label>
+                        <div className="relative">
+                          <input
+                            type="text"
+                            placeholder="http://192.168.1.7:8188"
+                            className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
+                            value={llmConfig.LOCAL_IMAGE_URL || ""}
+                            onChange={(e) => {
+                              input_field_changed(e.target.value, "local_image_url");
+                            }}
+                          />
+                        </div>
+                        <p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
+                          <span className="block w-1 h-1 rounded-full bg-gray-400"></span>
+                          Use your machine IP address (not localhost) when running in Docker
+                        </p>
+                      </div>
+                      <div>
+                        <label className="block text-sm font-medium text-gray-700 mb-2">
+                          Workflow JSON
+                        </label>
+                        <div className="relative">
+                          <textarea
+                            placeholder='Paste your ComfyUI workflow JSON here (export via "Save (API Format)" in ComfyUI)'
+                            className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors font-mono text-xs"
+                            rows={6}
+                            value={llmConfig.LOCAL_IMAGE_WORKFLOW || ""}
+                            onChange={(e) => {
+                              input_field_changed(e.target.value, "local_image_workflow");
+                            }}
+                          />
+                        </div>
+                        <p className="mt-2 text-sm text-gray-500">
+                          Export your workflow from ComfyUI using &quot;Save (API Format)&quot; and paste the JSON here.
+                          The positive prompt node (CLIPTextEncode) will be automatically updated.
+                        </p>
+                      </div>
+                    </div>
+                  );
+                }
+
                // Show API key input for other providers
                return (
                  <div className="mb-8">
--- a/servers/nextjs/types/llm_config.ts
+++ b/servers/nextjs/types/llm_config.ts
@ -28,6 +28,10 @@ export interface LLMConfig {
  PEXELS_API_KEY?: string;
  PIXABAY_API_KEY?: string;

+  // Local Image Generation (ComfyUI)
+  LOCAL_IMAGE_URL?: string;
+  LOCAL_IMAGE_WORKFLOW?: string;  // ComfyUI workflow JSON
+
  // Other Configs
  TOOL_CALLS?: boolean;
  DISABLE_THINKING?: boolean;
--- a/servers/nextjs/utils/providerConstants.ts
+++ b/servers/nextjs/utils/providerConstants.ts
@ -61,6 +61,15 @@ export const IMAGE_PROVIDERS: Record<string, ImageProviderOption> = {
    apiKeyField: "GOOGLE_API_KEY",
    apiKeyFieldLabel: "Google API Key"
  },
+  local: {
+    value: "local",
+    label: "ComfyUI",
+    description: "Use your local ComfyUI server with custom workflows",
+    icon: "/icons/local.png",
+    requiresApiKey: false,
+    apiKeyField: "LOCAL_IMAGE_URL",
+    apiKeyFieldLabel: "ComfyUI Server URL"
+  },
 };

 export const LLM_PROVIDERS: Record<string, LLMProviderOption> = {
--- a/servers/nextjs/utils/providerUtils.ts
+++ b/servers/nextjs/utils/providerUtils.ts
@ -48,6 +48,8 @@ export const updateLLMConfig = (
    disable_thinking: "DISABLE_THINKING",
    extended_reasoning: "EXTENDED_REASONING",
    web_grounding: "WEB_GROUNDING",
+    local_image_url: "LOCAL_IMAGE_URL",
+    local_image_workflow: "LOCAL_IMAGE_WORKFLOW",
  };

  const configKey = fieldMappings[field];
--- a/servers/nextjs/utils/storeHelpers.ts
+++ b/servers/nextjs/utils/storeHelpers.ts
@ -73,6 +73,8 @@ export const hasValidLLMConfig = (llmConfig: LLMConfig) => {
        return llmConfig.OPENAI_API_KEY && llmConfig.OPENAI_API_KEY !== "";
      case "gemini_flash":
        return llmConfig.GOOGLE_API_KEY && llmConfig.GOOGLE_API_KEY !== "";
+      case "local":
+        return llmConfig.LOCAL_IMAGE_URL && llmConfig.LOCAL_IMAGE_URL !== "";
      default:
        return false;
    }