refactor: image generation service handle multiple response formats
This commit is contained in:
parent
c34fb75302
commit
0a70f3c4e3
2 changed files with 56 additions and 32 deletions
|
|
@ -147,15 +147,16 @@ class ImageGenerationService:
|
|||
async def generate_image_local(self, prompt: str, output_directory: str) -> str:
|
||||
"""
|
||||
Generate image using a local image generation server.
|
||||
Supports Automatic1111 WebUI API format (commonly used by many local AI image tools).
|
||||
|
||||
Compatible with:
|
||||
- Automatic1111 (Stable Diffusion WebUI)
|
||||
- Stable Diffusion WebUI Forge
|
||||
- ComfyUI (with API wrapper)
|
||||
- Fooocus (with API mode)
|
||||
- FLUX-based UIs with compatible API
|
||||
- Any server implementing the /sdapi/v1/txt2img endpoint
|
||||
User provides the full API URL including the endpoint.
|
||||
Examples:
|
||||
- Automatic1111: http://192.168.1.7:7860/sdapi/v1/txt2img
|
||||
- Fooocus: http://192.168.1.7:7860/v1/generation/text-to-image
|
||||
- Custom: http://192.168.1.7:7860/generate
|
||||
|
||||
Supports both:
|
||||
- JSON response with base64 images (Automatic1111 style)
|
||||
- Direct binary image response (raw PNG/JPEG)
|
||||
|
||||
Args:
|
||||
prompt: The text prompt for image generation
|
||||
|
|
@ -164,19 +165,14 @@ class ImageGenerationService:
|
|||
Returns:
|
||||
Path to the generated image file
|
||||
"""
|
||||
local_url = get_local_image_url_env()
|
||||
api_url = get_local_image_url_env()
|
||||
local_model = get_local_image_model_env()
|
||||
|
||||
if not local_url:
|
||||
if not api_url:
|
||||
raise ValueError("LOCAL_IMAGE_URL environment variable is not set")
|
||||
|
||||
# Ensure URL doesn't have trailing slash
|
||||
local_url = local_url.rstrip("/")
|
||||
|
||||
# Build the API endpoint URL (Automatic1111 compatible format)
|
||||
api_url = f"{local_url}/sdapi/v1/txt2img"
|
||||
|
||||
# Build the request payload
|
||||
# Build the request payload (Automatic1111 compatible format)
|
||||
# Most local tools accept similar payload structure
|
||||
payload = {
|
||||
"prompt": prompt,
|
||||
"negative_prompt": "blurry, bad quality, distorted, ugly, deformed",
|
||||
|
|
@ -205,22 +201,46 @@ class ImageGenerationService:
|
|||
error_text = await response.text()
|
||||
raise Exception(f"Local image API error: {response.status} - {error_text}")
|
||||
|
||||
data = await response.json()
|
||||
content_type = response.headers.get("Content-Type", "")
|
||||
|
||||
# API returns images as base64 encoded strings
|
||||
if "images" in data and len(data["images"]) > 0:
|
||||
image_base64 = data["images"][0]
|
||||
|
||||
# Decode base64 and save to file
|
||||
image_data = base64.b64decode(image_base64)
|
||||
image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png")
|
||||
# Handle direct binary image response (image/png, image/jpeg, etc.)
|
||||
if content_type.startswith("image/"):
|
||||
image_data = await response.read()
|
||||
# Determine file extension from content type
|
||||
ext = "png" if "png" in content_type else "jpg"
|
||||
image_path = os.path.join(output_directory, f"{uuid.uuid4()}.{ext}")
|
||||
|
||||
with open(image_path, "wb") as f:
|
||||
f.write(image_data)
|
||||
|
||||
return image_path
|
||||
|
||||
# Handle JSON response with base64 encoded images
|
||||
data = await response.json()
|
||||
|
||||
# Check for images in various response formats
|
||||
if "images" in data and len(data["images"]) > 0:
|
||||
image_base64 = data["images"][0]
|
||||
# Handle if it's a dict with base64 key
|
||||
if isinstance(image_base64, dict) and "base64" in image_base64:
|
||||
image_base64 = image_base64["base64"]
|
||||
elif "image" in data:
|
||||
image_base64 = data["image"]
|
||||
elif "output" in data:
|
||||
image_base64 = data["output"]
|
||||
elif "result" in data:
|
||||
image_base64 = data["result"]
|
||||
else:
|
||||
raise Exception("No images returned from local image API")
|
||||
raise Exception(f"No images found in response. Keys: {list(data.keys())}")
|
||||
|
||||
# Decode base64 and save to file
|
||||
image_data = base64.b64decode(image_base64)
|
||||
image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png")
|
||||
|
||||
with open(image_path, "wb") as f:
|
||||
f.write(image_data)
|
||||
|
||||
return image_path
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
raise Exception(f"Failed to connect to local image server at {local_url}: {str(e)}")
|
||||
raise Exception(f"Failed to connect to local image server at {api_url}: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -345,12 +345,12 @@ export default function LLMProviderSelection({
|
|||
<div className="mb-8 space-y-4">
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
Local Server URL
|
||||
Local API URL (Full Endpoint)
|
||||
</label>
|
||||
<div className="relative">
|
||||
<input
|
||||
type="text"
|
||||
placeholder="http://192.168.1.7:7860"
|
||||
placeholder="http://192.168.1.7:7860/sdapi/v1/txt2img"
|
||||
className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
|
||||
value={llmConfig.LOCAL_IMAGE_URL || ""}
|
||||
onChange={(e) => {
|
||||
|
|
@ -358,10 +358,14 @@ export default function LLMProviderSelection({
|
|||
}}
|
||||
/>
|
||||
</div>
|
||||
<p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
|
||||
<span className="block w-1 h-1 rounded-full bg-gray-400"></span>
|
||||
URL of your local image generation server (Automatic1111, ComfyUI, Fooocus, FLUX, etc.)
|
||||
<p className="mt-2 text-sm text-gray-500">
|
||||
Enter the full API URL including endpoint. Examples:
|
||||
</p>
|
||||
<ul className="mt-1 text-xs text-gray-500 space-y-0.5 ml-4">
|
||||
<li>• Automatic1111: <code className="bg-gray-100 px-1 rounded">http://IP:7860/sdapi/v1/txt2img</code></li>
|
||||
<li>• Fooocus: <code className="bg-gray-100 px-1 rounded">http://IP:7860/v1/generation/text-to-image</code></li>
|
||||
<li>• Use your machine IP address, not localhost</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue