Merge pull request #371 from presenton/feat/local_image_provider

Comfyui-integration
This commit is contained in:
Shiva Raj Badu 2025-12-18 00:37:37 +05:45 committed by GitHub
commit 2bfb895558
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 343 additions and 4 deletions

View file

@ -100,6 +100,8 @@ services:
- WEB_GROUNDING=${WEB_GROUNDING}
- DATABASE_URL=${DATABASE_URL}
- DISABLE_ANONYMOUS_TRACKING=${DISABLE_ANONYMOUS_TRACKING}
- LOCAL_IMAGE_URL=${LOCAL_IMAGE_URL}
- LOCAL_IMAGE_WORKFLOW=${LOCAL_IMAGE_WORKFLOW}
development-gpu:
build:

View file

@ -5,3 +5,4 @@ class ImageProvider(Enum):
PIXABAY = "pixabay"
GEMINI_FLASH = "gemini_flash"
DALLE3 = "dall-e-3"
LOCAL = "local" # Local image generation (Stable Diffusion, FLUX, ComfyUI, Fooocus, etc.)

View file

@ -32,6 +32,10 @@ class UserConfig(BaseModel):
PEXELS_API_KEY: Optional[str] = None
PIXABAY_API_KEY: Optional[str] = None
# Local Image Generation (ComfyUI)
LOCAL_IMAGE_URL: Optional[str] = None
LOCAL_IMAGE_WORKFLOW: Optional[str] = None # ComfyUI workflow JSON
# Reasoning
TOOL_CALLS: Optional[bool] = None
DISABLE_THINKING: Optional[bool] = None

View file

@ -1,4 +1,6 @@
import asyncio
import base64
import json
import os
import aiohttp
from google import genai
@ -9,12 +11,15 @@ from models.sql.image_asset import ImageAsset
from utils.download_helpers import download_file
from utils.get_env import get_pexels_api_key_env
from utils.get_env import get_pixabay_api_key_env
from utils.get_env import get_local_image_url_env
from utils.get_env import get_local_image_workflow_env
from utils.image_provider import (
is_image_generation_disabled,
is_pixels_selected,
is_pixabay_selected,
is_gemini_flash_selected,
is_dalle3_selected,
is_local_selected,
)
import uuid
@ -37,6 +42,8 @@ class ImageGenerationService:
return self.generate_image_google
elif is_dalle3_selected():
return self.generate_image_openai
elif is_local_selected():
return self.generate_image_local
return None
def is_stock_provider_selected(self):
@ -137,3 +144,224 @@ class ImageGenerationService:
data = await response.json()
image_url = data["hits"][0]["largeImageURL"]
return image_url
async def generate_image_local(self, prompt: str, output_directory: str) -> str:
"""
Generate image using ComfyUI workflow API.
User provides:
- LOCAL_IMAGE_URL: ComfyUI server URL (e.g., http://192.168.1.7:8188)
- LOCAL_IMAGE_WORKFLOW: Workflow JSON exported from ComfyUI
The workflow should have a CLIPTextEncode node with "Positive" in the title
where the prompt will be injected.
Args:
prompt: The text prompt for image generation
output_directory: Directory to save the generated image
Returns:
Path to the generated image file
"""
comfyui_url = get_local_image_url_env()
workflow_json = get_local_image_workflow_env()
if not comfyui_url:
raise ValueError("LOCAL_IMAGE_URL environment variable is not set")
if not workflow_json:
raise ValueError("LOCAL_IMAGE_WORKFLOW environment variable is not set. Please provide a ComfyUI workflow JSON.")
# Ensure URL doesn't have trailing slash
comfyui_url = comfyui_url.rstrip("/")
# Parse the workflow JSON
try:
workflow = json.loads(workflow_json)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid workflow JSON: {str(e)}")
# Find and update the positive prompt node
workflow = self._inject_prompt_into_workflow(workflow, prompt)
async with aiohttp.ClientSession(trust_env=True) as session:
# Step 1: Submit workflow
prompt_id = await self._submit_comfyui_workflow(session, comfyui_url, workflow)
# Step 2: Wait for completion
status_data = await self._wait_for_comfyui_completion(session, comfyui_url, prompt_id)
# Step 3: Download the generated image
image_path = await self._download_comfyui_image(
session, comfyui_url, status_data, prompt_id, output_directory
)
return image_path
def _inject_prompt_into_workflow(self, workflow: dict, prompt: str) -> dict:
"""
Find the positive prompt node in the workflow and inject the prompt text.
Looks for CLIPTextEncode nodes with 'Positive' in the title.
"""
prompt_injected = False
for node_id, node_data in workflow.items():
# Check if this is a CLIPTextEncode node
if node_data.get("class_type") == "CLIPTextEncode":
meta = node_data.get("_meta", {})
title = meta.get("title", "").lower()
# Check if it's a positive prompt node
if "positive" in title:
if "inputs" in node_data and "text" in node_data["inputs"]:
node_data["inputs"]["text"] = prompt
prompt_injected = True
print(f"Injected prompt into node {node_id}: {title}")
break
if not prompt_injected:
# Fallback: try to find any CLIPTextEncode node with text input
for node_id, node_data in workflow.items():
if node_data.get("class_type") == "CLIPTextEncode":
if "inputs" in node_data and "text" in node_data["inputs"]:
# Skip if it looks like a negative prompt
meta = node_data.get("_meta", {})
title = meta.get("title", "").lower()
if "negative" in title:
continue
node_data["inputs"]["text"] = prompt
prompt_injected = True
print(f"Injected prompt into node {node_id} (fallback)")
break
if not prompt_injected:
raise ValueError("Could not find a positive prompt node (CLIPTextEncode) in the workflow")
return workflow
async def _submit_comfyui_workflow(
self, session: aiohttp.ClientSession, comfyui_url: str, workflow: dict
) -> str:
"""Submit workflow to ComfyUI and return the prompt_id."""
client_id = str(uuid.uuid4())
payload = {
"prompt": workflow,
"client_id": client_id
}
response = await session.post(
f"{comfyui_url}/prompt",
json=payload,
timeout=aiohttp.ClientTimeout(total=30)
)
if response.status != 200:
error_text = await response.text()
raise Exception(f"Failed to submit workflow to ComfyUI: {error_text}")
data = await response.json()
prompt_id = data.get("prompt_id")
if not prompt_id:
raise Exception("No prompt_id returned from ComfyUI")
print(f"ComfyUI workflow submitted. Prompt ID: {prompt_id}")
return prompt_id
async def _wait_for_comfyui_completion(
self, session: aiohttp.ClientSession, comfyui_url: str, prompt_id: str,
timeout: int = 300, poll_interval: int = 4
) -> dict:
"""Poll ComfyUI history endpoint until workflow completes."""
start_time = asyncio.get_event_loop().time()
while True:
elapsed = asyncio.get_event_loop().time() - start_time
if elapsed > timeout:
raise Exception(f"ComfyUI workflow timed out after {timeout} seconds")
await asyncio.sleep(poll_interval)
response = await session.get(
f"{comfyui_url}/history/{prompt_id}",
timeout=aiohttp.ClientTimeout(total=30)
)
if response.status != 200:
continue
try:
status_data = await response.json()
except:
continue
if prompt_id in status_data:
execution_data = status_data[prompt_id]
# Check for completion
if "status" in execution_data:
status = execution_data["status"]
if status.get("completed", False):
print("ComfyUI workflow completed successfully")
return status_data
if "error" in status:
raise Exception(f"ComfyUI workflow error: {status['error']}")
# Also check if outputs exist (alternative completion check)
if "outputs" in execution_data and execution_data["outputs"]:
print("ComfyUI workflow completed (outputs found)")
return status_data
print(f"Waiting for ComfyUI workflow... ({int(elapsed)}s)")
async def _download_comfyui_image(
self, session: aiohttp.ClientSession, comfyui_url: str,
status_data: dict, prompt_id: str, output_directory: str
) -> str:
"""Download the generated image from ComfyUI."""
if prompt_id not in status_data:
raise Exception("Prompt ID not found in status data")
outputs = status_data[prompt_id].get("outputs", {})
if not outputs:
raise Exception("No outputs found in ComfyUI response")
# Find the first image in outputs
for node_id, node_output in outputs.items():
if "images" in node_output:
for image_info in node_output["images"]:
filename = image_info["filename"]
subfolder = image_info.get("subfolder", "")
# Build view params
params = {
"filename": filename,
"type": "output"
}
if subfolder:
params["subfolder"] = subfolder
# Download the image
response = await session.get(
f"{comfyui_url}/view",
params=params,
timeout=aiohttp.ClientTimeout(total=60)
)
if response.status == 200:
image_data = await response.read()
# Determine extension
ext = filename.split(".")[-1] if "." in filename else "png"
image_path = os.path.join(output_directory, f"{uuid.uuid4()}.{ext}")
with open(image_path, "wb") as f:
f.write(image_data)
print(f"Downloaded image from ComfyUI: {image_path}")
return image_path
else:
raise Exception(f"Failed to download image: {response.status}")
raise Exception("No images found in ComfyUI outputs")

View file

@ -99,3 +99,11 @@ def get_extended_reasoning_env():
def get_web_grounding_env():
return os.getenv("WEB_GROUNDING")
def get_local_image_url_env():
return os.getenv("LOCAL_IMAGE_URL")
def get_local_image_workflow_env():
return os.getenv("LOCAL_IMAGE_WORKFLOW")

View file

@ -3,6 +3,7 @@ from utils.get_env import (
get_disable_image_generation_env,
get_google_api_key_env,
get_image_provider_env,
get_local_image_url_env,
get_openai_api_key_env,
get_pexels_api_key_env,
get_pixabay_api_key_env,
@ -30,6 +31,10 @@ def is_dalle3_selected() -> bool:
return ImageProvider.DALLE3 == get_selected_image_provider()
def is_local_selected() -> bool:
return ImageProvider.LOCAL == get_selected_image_provider()
def get_selected_image_provider() -> ImageProvider | None:
"""
Get the selected image provider from environment variables.
@ -52,5 +57,7 @@ def get_image_provider_api_key() -> str:
return get_google_api_key_env()
elif selected_image_provider == ImageProvider.DALLE3:
return get_openai_api_key_env()
elif selected_image_provider == ImageProvider.LOCAL:
return get_local_image_url_env() # Returns URL instead of API key
else:
raise ValueError(f"Invalid image provider: {selected_image_provider}")

View file

@ -87,3 +87,11 @@ def set_extended_reasoning_env(value):
def set_web_grounding_env(value):
os.environ["WEB_GROUNDING"] = value
def set_local_image_url_env(value):
os.environ["LOCAL_IMAGE_URL"] = value
def set_local_image_workflow_env(value):
os.environ["LOCAL_IMAGE_WORKFLOW"] = value

View file

@ -13,6 +13,8 @@ from utils.get_env import (
get_google_api_key_env,
get_google_model_env,
get_llm_provider_env,
get_local_image_url_env,
get_local_image_workflow_env,
get_ollama_model_env,
get_ollama_url_env,
get_openai_api_key_env,
@ -38,6 +40,8 @@ from utils.set_env import (
set_google_api_key_env,
set_google_model_env,
set_llm_provider_env,
set_local_image_url_env,
set_local_image_workflow_env,
set_ollama_model_env,
set_ollama_url_env,
set_openai_api_key_env,
@ -85,6 +89,8 @@ def get_user_config():
),
PIXABAY_API_KEY=existing_config.PIXABAY_API_KEY or get_pixabay_api_key_env(),
PEXELS_API_KEY=existing_config.PEXELS_API_KEY or get_pexels_api_key_env(),
LOCAL_IMAGE_URL=existing_config.LOCAL_IMAGE_URL or get_local_image_url_env(),
LOCAL_IMAGE_WORKFLOW=existing_config.LOCAL_IMAGE_WORKFLOW or get_local_image_workflow_env(),
TOOL_CALLS=(
existing_config.TOOL_CALLS
if existing_config.TOOL_CALLS is not None
@ -142,6 +148,10 @@ def update_env_with_user_config():
set_pixabay_api_key_env(user_config.PIXABAY_API_KEY)
if user_config.PEXELS_API_KEY:
set_pexels_api_key_env(user_config.PEXELS_API_KEY)
if user_config.LOCAL_IMAGE_URL:
set_local_image_url_env(user_config.LOCAL_IMAGE_URL)
if user_config.LOCAL_IMAGE_WORKFLOW:
set_local_image_workflow_env(user_config.LOCAL_IMAGE_WORKFLOW)
if user_config.TOOL_CALLS is not None:
set_tool_calls_env(str(user_config.TOOL_CALLS))
if user_config.DISABLE_THINKING is not None:

View file

@ -12,10 +12,10 @@ export async function GET() {
const raw = fs.readFileSync(userConfigPath, "utf-8");
const cfg = JSON.parse(raw || "{}");
keyFromFile = cfg?.OPENAI_API_KEY || "";
} catch {}
} catch { }
}
console.log(keyFromFile);
const keyFromEnv = process.env.OPENAI_API_KEY || "";
console.log(keyFromEnv);

View file

@ -64,6 +64,8 @@ export async function POST(request: Request) {
userConfig.PIXABAY_API_KEY || existingConfig.PIXABAY_API_KEY,
IMAGE_PROVIDER: userConfig.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER,
PEXELS_API_KEY: userConfig.PEXELS_API_KEY || existingConfig.PEXELS_API_KEY,
LOCAL_IMAGE_URL: userConfig.LOCAL_IMAGE_URL || existingConfig.LOCAL_IMAGE_URL,
LOCAL_IMAGE_WORKFLOW: userConfig.LOCAL_IMAGE_WORKFLOW || existingConfig.LOCAL_IMAGE_WORKFLOW,
TOOL_CALLS:
userConfig.TOOL_CALLS === undefined
? existingConfig.TOOL_CALLS

View file

@ -82,10 +82,14 @@ export default function LLMProviderSelection({
const needsOllamaUrl = (llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL);
const needsComfyUIConfig = !llmConfig.DISABLE_IMAGE_GENERATION &&
llmConfig.IMAGE_PROVIDER === "local" &&
(!llmConfig.LOCAL_IMAGE_URL || !llmConfig.LOCAL_IMAGE_WORKFLOW);
setButtonState({
isLoading: false,
isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl,
text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : "Save Configuration",
isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl || needsComfyUIConfig,
text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : needsComfyUIConfig ? "Please Configure ComfyUI" : "Save Configuration",
showProgress: false
});
@ -336,6 +340,54 @@ export default function LLMProviderSelection({
return <></>;
}
// Show ComfyUI configuration
if (provider.value === "local") {
return (
<div className="mb-8 space-y-4">
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
ComfyUI Server URL
</label>
<div className="relative">
<input
type="text"
placeholder="http://192.168.1.7:8188"
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
value={llmConfig.LOCAL_IMAGE_URL || ""}
onChange={(e) => {
input_field_changed(e.target.value, "local_image_url");
}}
/>
</div>
<p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
<span className="block w-1 h-1 rounded-full bg-gray-400"></span>
Use your machine IP address (not localhost) when running in Docker
</p>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
Workflow JSON
</label>
<div className="relative">
<textarea
placeholder='Paste your ComfyUI workflow JSON here (export via "Save (API Format)" in ComfyUI)'
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors font-mono text-xs"
rows={6}
value={llmConfig.LOCAL_IMAGE_WORKFLOW || ""}
onChange={(e) => {
input_field_changed(e.target.value, "local_image_workflow");
}}
/>
</div>
<p className="mt-2 text-sm text-gray-500">
Export your workflow from ComfyUI using &quot;Save (API Format)&quot; and paste the JSON here.
The positive prompt node (CLIPTextEncode) will be automatically updated.
</p>
</div>
</div>
);
}
// Show API key input for other providers
return (
<div className="mb-8">

View file

@ -28,6 +28,10 @@ export interface LLMConfig {
PEXELS_API_KEY?: string;
PIXABAY_API_KEY?: string;
// Local Image Generation (ComfyUI)
LOCAL_IMAGE_URL?: string;
LOCAL_IMAGE_WORKFLOW?: string; // ComfyUI workflow JSON
// Other Configs
TOOL_CALLS?: boolean;
DISABLE_THINKING?: boolean;

View file

@ -61,6 +61,15 @@ export const IMAGE_PROVIDERS: Record<string, ImageProviderOption> = {
apiKeyField: "GOOGLE_API_KEY",
apiKeyFieldLabel: "Google API Key"
},
local: {
value: "local",
label: "ComfyUI",
description: "Use your local ComfyUI server with custom workflows",
icon: "/icons/local.png",
requiresApiKey: false,
apiKeyField: "LOCAL_IMAGE_URL",
apiKeyFieldLabel: "ComfyUI Server URL"
},
};
export const LLM_PROVIDERS: Record<string, LLMProviderOption> = {

View file

@ -48,6 +48,8 @@ export const updateLLMConfig = (
disable_thinking: "DISABLE_THINKING",
extended_reasoning: "EXTENDED_REASONING",
web_grounding: "WEB_GROUNDING",
local_image_url: "LOCAL_IMAGE_URL",
local_image_workflow: "LOCAL_IMAGE_WORKFLOW",
};
const configKey = fieldMappings[field];

View file

@ -73,6 +73,8 @@ export const hasValidLLMConfig = (llmConfig: LLMConfig) => {
return llmConfig.OPENAI_API_KEY && llmConfig.OPENAI_API_KEY !== "";
case "gemini_flash":
return llmConfig.GOOGLE_API_KEY && llmConfig.GOOGLE_API_KEY !== "";
case "local":
return llmConfig.LOCAL_IMAGE_URL && llmConfig.LOCAL_IMAGE_URL !== "";
default:
return false;
}