refactor: image generation service handle multiple response formats

This commit is contained in:
shiva raj badu 2025-12-16 19:29:23 +05:45
parent c34fb75302
commit 0a70f3c4e3
No known key found for this signature in database
2 changed files with 56 additions and 32 deletions

View file

@ -147,15 +147,16 @@ class ImageGenerationService:
async def generate_image_local(self, prompt: str, output_directory: str) -> str:
"""
Generate image using a local image generation server.
Supports Automatic1111 WebUI API format (commonly used by many local AI image tools).
Compatible with:
- Automatic1111 (Stable Diffusion WebUI)
- Stable Diffusion WebUI Forge
- ComfyUI (with API wrapper)
- Fooocus (with API mode)
- FLUX-based UIs with compatible API
- Any server implementing the /sdapi/v1/txt2img endpoint
User provides the full API URL including the endpoint.
Examples:
- Automatic1111: http://192.168.1.7:7860/sdapi/v1/txt2img
- Fooocus: http://192.168.1.7:7860/v1/generation/text-to-image
- Custom: http://192.168.1.7:7860/generate
Supports both:
- JSON response with base64 images (Automatic1111 style)
- Direct binary image response (raw PNG/JPEG)
Args:
prompt: The text prompt for image generation
@ -164,19 +165,14 @@ class ImageGenerationService:
Returns:
Path to the generated image file
"""
local_url = get_local_image_url_env()
api_url = get_local_image_url_env()
local_model = get_local_image_model_env()
if not local_url:
if not api_url:
raise ValueError("LOCAL_IMAGE_URL environment variable is not set")
# Ensure URL doesn't have trailing slash
local_url = local_url.rstrip("/")
# Build the API endpoint URL (Automatic1111 compatible format)
api_url = f"{local_url}/sdapi/v1/txt2img"
# Build the request payload
# Build the request payload (Automatic1111 compatible format)
# Most local tools accept similar payload structure
payload = {
"prompt": prompt,
"negative_prompt": "blurry, bad quality, distorted, ugly, deformed",
@ -205,22 +201,46 @@ class ImageGenerationService:
error_text = await response.text()
raise Exception(f"Local image API error: {response.status} - {error_text}")
data = await response.json()
content_type = response.headers.get("Content-Type", "")
# API returns images as base64 encoded strings
if "images" in data and len(data["images"]) > 0:
image_base64 = data["images"][0]
# Decode base64 and save to file
image_data = base64.b64decode(image_base64)
image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png")
# Handle direct binary image response (image/png, image/jpeg, etc.)
if content_type.startswith("image/"):
image_data = await response.read()
# Determine file extension from content type
ext = "png" if "png" in content_type else "jpg"
image_path = os.path.join(output_directory, f"{uuid.uuid4()}.{ext}")
with open(image_path, "wb") as f:
f.write(image_data)
return image_path
# Handle JSON response with base64 encoded images
data = await response.json()
# Check for images in various response formats
if "images" in data and len(data["images"]) > 0:
image_base64 = data["images"][0]
# Handle if it's a dict with base64 key
if isinstance(image_base64, dict) and "base64" in image_base64:
image_base64 = image_base64["base64"]
elif "image" in data:
image_base64 = data["image"]
elif "output" in data:
image_base64 = data["output"]
elif "result" in data:
image_base64 = data["result"]
else:
raise Exception("No images returned from local image API")
raise Exception(f"No images found in response. Keys: {list(data.keys())}")
# Decode base64 and save to file
image_data = base64.b64decode(image_base64)
image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png")
with open(image_path, "wb") as f:
f.write(image_data)
return image_path
except aiohttp.ClientError as e:
raise Exception(f"Failed to connect to local image server at {local_url}: {str(e)}")
raise Exception(f"Failed to connect to local image server at {api_url}: {str(e)}")

View file

@ -345,12 +345,12 @@ export default function LLMProviderSelection({
<div className="mb-8 space-y-4">
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
Local Server URL
Local API URL (Full Endpoint)
</label>
<div className="relative">
<input
type="text"
placeholder="http://192.168.1.7:7860"
placeholder="http://192.168.1.7:7860/sdapi/v1/txt2img"
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
value={llmConfig.LOCAL_IMAGE_URL || ""}
onChange={(e) => {
@ -358,10 +358,14 @@ export default function LLMProviderSelection({
}}
/>
</div>
<p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
<span className="block w-1 h-1 rounded-full bg-gray-400"></span>
URL of your local image generation server (Automatic1111, ComfyUI, Fooocus, FLUX, etc.)
<p className="mt-2 text-sm text-gray-500">
Enter the full API URL including endpoint. Examples:
</p>
<ul className="mt-1 text-xs text-gray-500 space-y-0.5 ml-4">
<li> Automatic1111: <code className="bg-gray-100 px-1 rounded">http://IP:7860/sdapi/v1/txt2img</code></li>
<li> Fooocus: <code className="bg-gray-100 px-1 rounded">http://IP:7860/v1/generation/text-to-image</code></li>
<li> Use your machine IP address, not localhost</li>
</ul>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">