refactor: image generation service handle multiple response formats

2025-12-16 19:29:23 +05:45 · 2025-12-16 19:29:23 +05:45 · 0a70f3c4e3
commit 0a70f3c4e3
parent c34fb75302
2 changed files with 56 additions and 32 deletions
--- a/servers/fastapi/services/image_generation_service.py
+++ b/servers/fastapi/services/image_generation_service.py
@ -147,15 +147,16 @@ class ImageGenerationService:
    async def generate_image_local(self, prompt: str, output_directory: str) -> str:
        """
        Generate image using a local image generation server.
-        Supports Automatic1111 WebUI API format (commonly used by many local AI image tools).
        
-        Compatible with:
-        - Automatic1111 (Stable Diffusion WebUI)
-        - Stable Diffusion WebUI Forge
-        - ComfyUI (with API wrapper)
-        - Fooocus (with API mode)
-        - FLUX-based UIs with compatible API
-        - Any server implementing the /sdapi/v1/txt2img endpoint
+        User provides the full API URL including the endpoint.
+        Examples:
+        - Automatic1111: http://192.168.1.7:7860/sdapi/v1/txt2img
+        - Fooocus: http://192.168.1.7:7860/v1/generation/text-to-image
+        - Custom: http://192.168.1.7:7860/generate
+        
+        Supports both:
+        - JSON response with base64 images (Automatic1111 style)
+        - Direct binary image response (raw PNG/JPEG)
        
        Args:
            prompt: The text prompt for image generation
@ -164,19 +165,14 @@ class ImageGenerationService:
        Returns:
            Path to the generated image file
        """
-        local_url = get_local_image_url_env()
+        api_url = get_local_image_url_env()
        local_model = get_local_image_model_env()
        
-        if not local_url:
+        if not api_url:
            raise ValueError("LOCAL_IMAGE_URL environment variable is not set")
        
-        # Ensure URL doesn't have trailing slash
-        local_url = local_url.rstrip("/")
-        
-        # Build the API endpoint URL (Automatic1111 compatible format)
-        api_url = f"{local_url}/sdapi/v1/txt2img"
-        
-        # Build the request payload
+        # Build the request payload (Automatic1111 compatible format)
+        # Most local tools accept similar payload structure
        payload = {
            "prompt": prompt,
            "negative_prompt": "blurry, bad quality, distorted, ugly, deformed",
@ -205,22 +201,46 @@ class ImageGenerationService:
                    error_text = await response.text()
                    raise Exception(f"Local image API error: {response.status} - {error_text}")
                
-                data = await response.json()
+                content_type = response.headers.get("Content-Type", "")
                
-                # API returns images as base64 encoded strings
-                if "images" in data and len(data["images"]) > 0:
-                    image_base64 = data["images"][0]
-                    
-                    # Decode base64 and save to file
-                    image_data = base64.b64decode(image_base64)
-                    image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png")
+                # Handle direct binary image response (image/png, image/jpeg, etc.)
+                if content_type.startswith("image/"):
+                    image_data = await response.read()
+                    # Determine file extension from content type
+                    ext = "png" if "png" in content_type else "jpg"
+                    image_path = os.path.join(output_directory, f"{uuid.uuid4()}.{ext}")
                    
                    with open(image_path, "wb") as f:
                        f.write(image_data)
                    
                    return image_path
+                
+                # Handle JSON response with base64 encoded images
+                data = await response.json()
+                
+                # Check for images in various response formats
+                if "images" in data and len(data["images"]) > 0:
+                    image_base64 = data["images"][0]
+                    # Handle if it's a dict with base64 key
+                    if isinstance(image_base64, dict) and "base64" in image_base64:
+                        image_base64 = image_base64["base64"]
+                elif "image" in data:
+                    image_base64 = data["image"]
+                elif "output" in data:
+                    image_base64 = data["output"]
+                elif "result" in data:
+                    image_base64 = data["result"]
                else:
-                    raise Exception("No images returned from local image API")
+                    raise Exception(f"No images found in response. Keys: {list(data.keys())}")
+                
+                # Decode base64 and save to file
+                image_data = base64.b64decode(image_base64)
+                image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png")
+                
+                with open(image_path, "wb") as f:
+                    f.write(image_data)
+                
+                return image_path
                    
            except aiohttp.ClientError as e:
-                raise Exception(f"Failed to connect to local image server at {local_url}: {str(e)}")
+                raise Exception(f"Failed to connect to local image server at {api_url}: {str(e)}")
--- a/servers/nextjs/components/LLMSelection.tsx
+++ b/servers/nextjs/components/LLMSelection.tsx
@ -345,12 +345,12 @@ export default function LLMProviderSelection({
                    <div className="mb-8 space-y-4">
                      <div>
                        <label className="block text-sm font-medium text-gray-700 mb-2">
-                          Local Server URL
+                          Local API URL (Full Endpoint)
                        </label>
                        <div className="relative">
                          <input
                            type="text"
-                            placeholder="http://192.168.1.7:7860"
+                            placeholder="http://192.168.1.7:7860/sdapi/v1/txt2img"
                            className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
                            value={llmConfig.LOCAL_IMAGE_URL || ""}
                            onChange={(e) => {
@ -358,10 +358,14 @@ export default function LLMProviderSelection({
                            }}
                          />
                        </div>
-                        <p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
-                          <span className="block w-1 h-1 rounded-full bg-gray-400"></span>
-                          URL of your local image generation server (Automatic1111, ComfyUI, Fooocus, FLUX, etc.)
+                        <p className="mt-2 text-sm text-gray-500">
+                          Enter the full API URL including endpoint. Examples:
                        </p>
+                        <ul className="mt-1 text-xs text-gray-500 space-y-0.5 ml-4">
+                          <li>• Automatic1111: <code className="bg-gray-100 px-1 rounded">http://IP:7860/sdapi/v1/txt2img</code></li>
+                          <li>• Fooocus: <code className="bg-gray-100 px-1 rounded">http://IP:7860/v1/generation/text-to-image</code></li>
+                          <li>• Use your machine IP address, not localhost</li>
+                        </ul>
                      </div>
                      <div>
                        <label className="block text-sm font-medium text-gray-700 mb-2">