feat: adds gpt image 1.5

2025-12-21 01:14:31 +05:45 · 2025-12-21 01:14:31 +05:45 · 908bea18b5
commit 908bea18b5
parent 738ed1af36
15 changed files with 495 additions and 182 deletions
--- a/servers/fastapi/enums/image_provider.py
+++ b/servers/fastapi/enums/image_provider.py
@ -1,9 +1,11 @@
 from enum import Enum

+
 class ImageProvider(Enum):
    PEXELS = "pexels"
    PIXABAY = "pixabay"
    GEMINI_FLASH = "gemini_flash"
-    NANOBANANA_PRO = "nanobanana_pro"  # Google's gemini-3-pro-image-preview
+    NANOBANANA_PRO = "nanobanana_pro"
    DALLE3 = "dall-e-3"
+    GPT_IMAGE_1_5 = "gpt-image-1.5"
    COMFYUI = "comfyui"
--- a/servers/fastapi/models/user_config.py
+++ b/servers/fastapi/models/user_config.py
@ -36,6 +36,11 @@ class UserConfig(BaseModel):
    COMFYUI_URL: Optional[str] = None
    COMFYUI_WORKFLOW: Optional[str] = None

+    # Dalle 3 Quality
+    DALL_E_3_QUALITY: Optional[str] = None
+    # Gpt Image 1.5 Quality
+    GPT_IMAGE_1_5_QUALITY: Optional[str] = None
+
    # Reasoning
    TOOL_CALLS: Optional[bool] = None
    DISABLE_THINKING: Optional[bool] = None
--- a/servers/fastapi/services/image_generation_service.py
+++ b/servers/fastapi/services/image_generation_service.py
@ -3,17 +3,21 @@ import base64
 import json
 import os
 import aiohttp
+from fastapi import HTTPException
 from google import genai
-from google.genai.types import GenerateContentConfig
-from openai import AsyncOpenAI
+from openai import NOT_GIVEN, AsyncOpenAI
 from models.image_prompt import ImagePrompt
 from models.sql.image_asset import ImageAsset
-from utils.download_helpers import download_file
-from utils.get_env import get_pexels_api_key_env
+from utils.get_env import (
+    get_dall_e_3_quality_env,
+    get_gpt_image_1_5_quality_env,
+    get_pexels_api_key_env,
+)
 from utils.get_env import get_pixabay_api_key_env
 from utils.get_env import get_comfyui_url_env
 from utils.get_env import get_comfyui_workflow_env
 from utils.image_provider import (
+    is_gpt_image_1_5_selected,
    is_image_generation_disabled,
    is_pixels_selected,
    is_pixabay_selected,
@ -44,7 +48,9 @@ class ImageGenerationService:
        elif is_nanobanana_pro_selected():
            return self.generate_image_nanobanana_pro
        elif is_dalle3_selected():
-            return self.generate_image_openai
+            return self.generate_image_openai_dalle3
+        elif is_gpt_image_1_5_selected():
+            return self.generate_image_openai_gpt_image_1_5
        elif is_comfyui_selected():
            return self.generate_image_comfyui
        return None
@ -98,46 +104,83 @@ class ImageGenerationService:
            print(f"Error generating image: {e}")
            return "/static/images/placeholder.jpg"

-    async def generate_image_openai(self, prompt: str, output_directory: str,model: str = "dall-e-3") -> str:
-    
+    async def generate_image_openai(
+        self, prompt: str, output_directory: str, model: str, quality: str
+    ) -> str:
        client = AsyncOpenAI()
        result = await client.images.generate(
            model=model,
            prompt=prompt,
            n=1,
-            quality="standard",
+            quality=quality,
+            response_format="b64_json" if model == "dall-e-3" else NOT_GIVEN,
            size="1024x1024",
        )
-        image_url = result.data[0].url
-        return await download_file(image_url, output_directory)
+        image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png")
+        with open(image_path, "wb") as f:
+            f.write(base64.b64decode(result.data[0].b64_json))
+        return image_path

-    async def _generate_image_google(self, prompt: str, output_directory: str, model: str) -> str:
+    async def generate_image_openai_dalle3(
+        self, prompt: str, output_directory: str
+    ) -> str:
+        return await self.generate_image_openai(
+            prompt,
+            output_directory,
+            "dall-e-3",
+            get_dall_e_3_quality_env() or "standard",
+        )
+
+    async def generate_image_openai_gpt_image_1_5(
+        self, prompt: str, output_directory: str
+    ) -> str:
+        return await self.generate_image_openai(
+            prompt,
+            output_directory,
+            "gpt-image-1.5",
+            get_gpt_image_1_5_quality_env() or "medium",
+        )
+
+    async def _generate_image_google(
+        self, prompt: str, output_directory: str, model: str
+    ) -> str:
        """Base method for Google image generation models."""
        client = genai.Client()
        response = await asyncio.to_thread(
            client.models.generate_content,
            model=model,
            contents=[prompt],
-            config=GenerateContentConfig(response_modalities=["TEXT", "IMAGE"]),
        )

+        image_path = None
        for part in response.candidates[0].content.parts:
-            if part.text is not None:
-                print(part.text)
-            elif part.inline_data is not None:
+            if part.inline_data is not None:
+                image = part.as_image()
                image_path = os.path.join(output_directory, f"{uuid.uuid4()}.jpg")
-                with open(image_path, "wb") as f:
-                    f.write(part.inline_data.data)
+                image.save(image_path)
+
+        if not image_path:
+            raise HTTPException(
+                status_code=500, detail=f"No image generated by google {model}"
+            )

        return image_path

-    async def generate_image_gemini_flash(self, prompt: str, output_directory: str) -> str:
+    async def generate_image_gemini_flash(
+        self, prompt: str, output_directory: str
+    ) -> str:
        """Generate image using Gemini Flash (gemini-2.5-flash-image-preview)."""
-        return await self._generate_image_google(prompt, output_directory, "gemini-2.5-flash-image-preview")
+        return await self._generate_image_google(
+            prompt, output_directory, "gemini-2.5-flash-image-preview"
+        )

-    async def generate_image_nanobanana_pro(self, prompt: str, output_directory: str) -> str:
+    async def generate_image_nanobanana_pro(
+        self, prompt: str, output_directory: str
+    ) -> str:
        """Generate image using NanoBanana Pro (gemini-3-pro-image-preview)."""
-        return await self._generate_image_google(prompt, output_directory, "gemini-3-pro-image-preview")
+        return await self._generate_image_google(
+            prompt, output_directory, "gemini-3-pro-image-preview"
+        )

    async def get_image_from_pexels(self, prompt: str) -> str:
        async with aiohttp.ClientSession(trust_env=True) as session:
@ -161,134 +204,145 @@ class ImageGenerationService:
    async def generate_image_comfyui(self, prompt: str, output_directory: str) -> str:
        """
        Generate image using ComfyUI workflow API.
-        
+
        User provides:
        - COMFYUI_URL: ComfyUI server URL (e.g., http://192.168.1.7:8188)
        - COMFYUI_WORKFLOW: Workflow JSON exported from ComfyUI
-        
+
        The workflow should have a CLIPTextEncode node with "Positive" in the title
        where the prompt will be injected.
-        
+
        Args:
            prompt: The text prompt for image generation
            output_directory: Directory to save the generated image
-            
+
        Returns:
            Path to the generated image file
        """
        comfyui_url = get_comfyui_url_env()
        workflow_json = get_comfyui_workflow_env()
-        
+
        if not comfyui_url:
            raise ValueError("COMFYUI_URL environment variable is not set")
-        
+
        if not workflow_json:
-            raise ValueError("COMFYUI_WORKFLOW environment variable is not set. Please provide a ComfyUI workflow JSON.")
-        
+            raise ValueError(
+                "COMFYUI_WORKFLOW environment variable is not set. Please provide a ComfyUI workflow JSON."
+            )
+
        # Ensure URL doesn't have trailing slash
        comfyui_url = comfyui_url.rstrip("/")
-        
+
        # Parse the workflow JSON
        try:
            workflow = json.loads(workflow_json)
        except json.JSONDecodeError as e:
            raise ValueError(f"Invalid workflow JSON: {str(e)}")
-        
+
        # Find and update the positive prompt node
        workflow = self._inject_prompt_into_workflow(workflow, prompt)
-        
+
        async with aiohttp.ClientSession(trust_env=True) as session:
            # Step 1: Submit workflow
-            prompt_id = await self._submit_comfyui_workflow(session, comfyui_url, workflow)
-            
+            prompt_id = await self._submit_comfyui_workflow(
+                session, comfyui_url, workflow
+            )
+
            # Step 2: Wait for completion
-            status_data = await self._wait_for_comfyui_completion(session, comfyui_url, prompt_id)
-            
+            status_data = await self._wait_for_comfyui_completion(
+                session, comfyui_url, prompt_id
+            )
+
            # Step 3: Download the generated image
            image_path = await self._download_comfyui_image(
                session, comfyui_url, status_data, prompt_id, output_directory
            )
-            
+
            return image_path
-    
+
    def _inject_prompt_into_workflow(self, workflow: dict, prompt: str) -> dict:
        """
        Find the prompt node in the workflow and inject the prompt text.
        Looks for a node with title 'Input Prompt' (case-insensitive).
-        
+
        User must rename their prompt node to 'Input Prompt' in ComfyUI.
        """
        for node_id, node_data in workflow.items():
            meta = node_data.get("_meta", {})
            title = meta.get("title", "").lower()
-            
+
            if title == "input prompt":
                if "inputs" in node_data and "text" in node_data["inputs"]:
                    node_data["inputs"]["text"] = prompt
-                    print(f"Injected prompt into node {node_id}: {meta.get('title', '')}")
+                    print(
+                        f"Injected prompt into node {node_id}: {meta.get('title', '')}"
+                    )
                    return workflow
-        
-        raise ValueError("Could not find a node with title 'Input Prompt' in the workflow. Please rename your prompt node to 'Input Prompt' in ComfyUI.")
-    
+
+        raise ValueError(
+            "Could not find a node with title 'Input Prompt' in the workflow. Please rename your prompt node to 'Input Prompt' in ComfyUI."
+        )
+
    async def _submit_comfyui_workflow(
        self, session: aiohttp.ClientSession, comfyui_url: str, workflow: dict
    ) -> str:
        """Submit workflow to ComfyUI and return the prompt_id."""
        client_id = str(uuid.uuid4())
-        payload = {
-            "prompt": workflow,
-            "client_id": client_id
-        }
-        
+        payload = {"prompt": workflow, "client_id": client_id}
+
        response = await session.post(
            f"{comfyui_url}/prompt",
            json=payload,
-            timeout=aiohttp.ClientTimeout(total=30)
+            timeout=aiohttp.ClientTimeout(total=30),
        )
-        
+
        if response.status != 200:
            error_text = await response.text()
            raise Exception(f"Failed to submit workflow to ComfyUI: {error_text}")
-        
+
        data = await response.json()
        prompt_id = data.get("prompt_id")
-        
+
        if not prompt_id:
            raise Exception("No prompt_id returned from ComfyUI")
-        
+
        print(f"ComfyUI workflow submitted. Prompt ID: {prompt_id}")
        return prompt_id
-    
+
    async def _wait_for_comfyui_completion(
-        self, session: aiohttp.ClientSession, comfyui_url: str, prompt_id: str,
-        timeout: int = 300, poll_interval: int = 4
+        self,
+        session: aiohttp.ClientSession,
+        comfyui_url: str,
+        prompt_id: str,
+        timeout: int = 300,
+        poll_interval: int = 4,
    ) -> dict:
        """Poll ComfyUI history endpoint until workflow completes."""
        start_time = asyncio.get_event_loop().time()
-        
+
        while True:
            elapsed = asyncio.get_event_loop().time() - start_time
            if elapsed > timeout:
                raise Exception(f"ComfyUI workflow timed out after {timeout} seconds")
-            
+
            await asyncio.sleep(poll_interval)
-            
+
            response = await session.get(
                f"{comfyui_url}/history/{prompt_id}",
-                timeout=aiohttp.ClientTimeout(total=30)
+                timeout=aiohttp.ClientTimeout(total=30),
            )
-            
+
            if response.status != 200:
                continue
-            
+
            try:
                status_data = await response.json()
-            except:
+            except Exception as _:
                continue
-            
+
            if prompt_id in status_data:
                execution_data = status_data[prompt_id]
-                
+
                # Check for completion
                if "status" in execution_data:
                    status = execution_data["status"]
@ -297,62 +351,65 @@ class ImageGenerationService:
                        return status_data
                    if "error" in status:
                        raise Exception(f"ComfyUI workflow error: {status['error']}")
-                
+
                # Also check if outputs exist (alternative completion check)
                if "outputs" in execution_data and execution_data["outputs"]:
                    print("ComfyUI workflow completed (outputs found)")
                    return status_data
-            
+
            print(f"Waiting for ComfyUI workflow... ({int(elapsed)}s)")
-    
+
    async def _download_comfyui_image(
-        self, session: aiohttp.ClientSession, comfyui_url: str,
-        status_data: dict, prompt_id: str, output_directory: str
+        self,
+        session: aiohttp.ClientSession,
+        comfyui_url: str,
+        status_data: dict,
+        prompt_id: str,
+        output_directory: str,
    ) -> str:
        """Download the generated image from ComfyUI."""
        if prompt_id not in status_data:
            raise Exception("Prompt ID not found in status data")
-        
+
        outputs = status_data[prompt_id].get("outputs", {})
-        
+
        if not outputs:
            raise Exception("No outputs found in ComfyUI response")
-        
+
        # Find the first image in outputs
        for node_id, node_output in outputs.items():
            if "images" in node_output:
                for image_info in node_output["images"]:
                    filename = image_info["filename"]
                    subfolder = image_info.get("subfolder", "")
-                    
+
                    # Build view params
-                    params = {
-                        "filename": filename,
-                        "type": "output"
-                    }
+                    params = {"filename": filename, "type": "output"}
                    if subfolder:
                        params["subfolder"] = subfolder
-                    
+
                    # Download the image
                    response = await session.get(
                        f"{comfyui_url}/view",
                        params=params,
-                        timeout=aiohttp.ClientTimeout(total=60)
+                        timeout=aiohttp.ClientTimeout(total=60),
                    )
-                    
+
                    if response.status == 200:
                        image_data = await response.read()
-                        
+
                        # Determine extension
                        ext = filename.split(".")[-1] if "." in filename else "png"
-                        image_path = os.path.join(output_directory, f"{uuid.uuid4()}.{ext}")
-                        
+                        image_path = os.path.join(
+                            output_directory, f"{uuid.uuid4()}.{ext}"
+                        )
+
                        with open(image_path, "wb") as f:
                            f.write(image_data)
-                        
+
                        print(f"Downloaded image from ComfyUI: {image_path}")
                        return image_path
                    else:
                        raise Exception(f"Failed to download image: {response.status}")
-        
+
        raise Exception("No images found in ComfyUI outputs")
--- a/servers/fastapi/utils/get_env.py
+++ b/servers/fastapi/utils/get_env.py
@ -107,3 +107,13 @@ def get_comfyui_url_env():

 def get_comfyui_workflow_env():
    return os.getenv("COMFYUI_WORKFLOW")
+
+
+# Dalle 3 Quality
+def get_dall_e_3_quality_env():
+    return os.getenv("DALL_E_3_QUALITY")
+
+
+# Gpt Image 1.5 Quality
+def get_gpt_image_1_5_quality_env():
+    return os.getenv("GPT_IMAGE_1_5_QUALITY")
--- a/servers/fastapi/utils/image_provider.py
+++ b/servers/fastapi/utils/image_provider.py
@ -1,12 +1,7 @@
 from enums.image_provider import ImageProvider
 from utils.get_env import (
-    get_comfyui_url_env,
    get_disable_image_generation_env,
-    get_google_api_key_env,
    get_image_provider_env,
-    get_openai_api_key_env,
-    get_pexels_api_key_env,
-    get_pixabay_api_key_env,
 )
 from utils.parsers import parse_bool_or_none

@ -35,6 +30,10 @@ def is_dalle3_selected() -> bool:
    return ImageProvider.DALLE3 == get_selected_image_provider()


+def is_gpt_image_1_5_selected() -> bool:
+    return ImageProvider.GPT_IMAGE_1_5 == get_selected_image_provider()
+
+
 def is_comfyui_selected() -> bool:
    return ImageProvider.COMFYUI == get_selected_image_provider()

@ -49,19 +48,3 @@ def get_selected_image_provider() -> ImageProvider | None:
    if image_provider_env:
        return ImageProvider(image_provider_env)
    return None
-
-
-def get_image_provider_api_key() -> str:
-    selected_image_provider = get_selected_image_provider()
-    if selected_image_provider == ImageProvider.PEXELS:
-        return get_pexels_api_key_env()
-    elif selected_image_provider == ImageProvider.PIXABAY:
-        return get_pixabay_api_key_env()
-    elif selected_image_provider == ImageProvider.GEMINI_FLASH:
-        return get_google_api_key_env()
-    elif selected_image_provider == ImageProvider.DALLE3:
-        return get_openai_api_key_env()
-    elif selected_image_provider == ImageProvider.COMFYUI:
-        return get_comfyui_url_env()  # Returns URL instead of API key
-    else:
-        raise ValueError(f"Invalid image provider: {selected_image_provider}")
--- a/servers/fastapi/utils/model_availability.py
+++ b/servers/fastapi/utils/model_availability.py
@ -128,15 +128,22 @@ async def check_llm_and_image_provider_api_or_model_availability():
            if not pixabay_api_key:
                raise Exception("PIXABAY_API_KEY must be provided")

-        elif selected_image_provider == ImageProvider.GEMINI_FLASH:
+        elif (
+            selected_image_provider == ImageProvider.GEMINI_FLASH
+            or selected_image_provider == ImageProvider.NANOBANANA_PRO
+        ):
            google_api_key = get_google_api_key_env()
            if not google_api_key:
                raise Exception("GOOGLE_API_KEY must be provided")

-        elif selected_image_provider == ImageProvider.DALLE3:
+        elif (
+            selected_image_provider == ImageProvider.DALLE3
+            or selected_image_provider == ImageProvider.GPT_IMAGE_1_5
+        ):
            openai_api_key = get_openai_api_key_env()
            if not openai_api_key:
                raise Exception("OPENAI_API_KEY must be provided")
+
        elif selected_image_provider == ImageProvider.COMFYUI:
            comfyui_url = get_comfyui_url_env()
            if not comfyui_url:
--- a/servers/fastapi/utils/set_env.py
+++ b/servers/fastapi/utils/set_env.py
@ -95,3 +95,11 @@ def set_comfyui_url_env(value):

 def set_comfyui_workflow_env(value):
    os.environ["COMFYUI_WORKFLOW"] = value
+
+
+def set_dall_e_3_quality_env(value):
+    os.environ["DALL_E_3_QUALITY"] = value
+
+
+def set_gpt_image_1_5_quality_env(value):
+    os.environ["GPT_IMAGE_1_5_QUALITY"] = value
--- a/servers/fastapi/utils/user_config.py
+++ b/servers/fastapi/utils/user_config.py
@ -10,10 +10,12 @@ from utils.get_env import (
    get_custom_llm_api_key_env,
    get_custom_llm_url_env,
    get_custom_model_env,
+    get_dall_e_3_quality_env,
    get_disable_image_generation_env,
    get_disable_thinking_env,
    get_google_api_key_env,
    get_google_model_env,
+    get_gpt_image_1_5_quality_env,
    get_llm_provider_env,
    get_ollama_model_env,
    get_ollama_url_env,
@ -36,11 +38,13 @@ from utils.set_env import (
    set_custom_llm_api_key_env,
    set_custom_llm_url_env,
    set_custom_model_env,
+    set_dall_e_3_quality_env,
    set_disable_image_generation_env,
    set_disable_thinking_env,
    set_extended_reasoning_env,
    set_google_api_key_env,
    set_google_model_env,
+    set_gpt_image_1_5_quality_env,
    set_llm_provider_env,
    set_ollama_model_env,
    set_ollama_url_env,
@ -91,6 +95,9 @@ def get_user_config():
        PEXELS_API_KEY=existing_config.PEXELS_API_KEY or get_pexels_api_key_env(),
        COMFYUI_URL=existing_config.COMFYUI_URL or get_comfyui_url_env(),
        COMFYUI_WORKFLOW=existing_config.COMFYUI_WORKFLOW or get_comfyui_workflow_env(),
+        DALL_E_3_QUALITY=existing_config.DALL_E_3_QUALITY or get_dall_e_3_quality_env(),
+        GPT_IMAGE_1_5_QUALITY=existing_config.GPT_IMAGE_1_5_QUALITY
+        or get_gpt_image_1_5_quality_env(),
        TOOL_CALLS=(
            existing_config.TOOL_CALLS
            if existing_config.TOOL_CALLS is not None
@ -152,6 +159,10 @@ def update_env_with_user_config():
        set_comfyui_url_env(user_config.COMFYUI_URL)
    if user_config.COMFYUI_WORKFLOW:
        set_comfyui_workflow_env(user_config.COMFYUI_WORKFLOW)
+    if user_config.DALL_E_3_QUALITY:
+        set_dall_e_3_quality_env(user_config.DALL_E_3_QUALITY)
+    if user_config.GPT_IMAGE_1_5_QUALITY:
+        set_gpt_image_1_5_quality_env(user_config.GPT_IMAGE_1_5_QUALITY)
    if user_config.TOOL_CALLS is not None:
        set_tool_calls_env(str(user_config.TOOL_CALLS))
    if user_config.DISABLE_THINKING is not None:
--- a/servers/nextjs/app/api/user-config/route.ts
+++ b/servers/nextjs/app/api/user-config/route.ts
@ -65,7 +65,12 @@ export async function POST(request: Request) {
    IMAGE_PROVIDER: userConfig.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER,
    PEXELS_API_KEY: userConfig.PEXELS_API_KEY || existingConfig.PEXELS_API_KEY,
    COMFYUI_URL: userConfig.COMFYUI_URL || existingConfig.COMFYUI_URL,
-    COMFYUI_WORKFLOW: userConfig.COMFYUI_WORKFLOW || existingConfig.COMFYUI_WORKFLOW,
+    COMFYUI_WORKFLOW:
+      userConfig.COMFYUI_WORKFLOW || existingConfig.COMFYUI_WORKFLOW,
+    DALL_E_3_QUALITY:
+      userConfig.DALL_E_3_QUALITY || existingConfig.DALL_E_3_QUALITY,
+    GPT_IMAGE_1_5_QUALITY:
+      userConfig.GPT_IMAGE_1_5_QUALITY || existingConfig.GPT_IMAGE_1_5_QUALITY,
    TOOL_CALLS:
      userConfig.TOOL_CALLS === undefined
        ? existingConfig.TOOL_CALLS
--- a/servers/nextjs/components/LLMSelection.tsx
+++ b/servers/nextjs/components/LLMSelection.tsx
@ -26,6 +26,37 @@ import {
 import { IMAGE_PROVIDERS, LLM_PROVIDERS } from "@/utils/providerConstants";
 import { LLMConfig } from "@/types/llm_config";

+const DALLE_3_QUALITY_OPTIONS = [
+  {
+    label: "Standard",
+    value: "standard",
+    description: "Faster generation with lower cost",
+  },
+  {
+    label: "HD",
+    value: "hd",
+    description: "Higher quality images with increased cost",
+  },
+];
+
+const GPT_IMAGE_1_5_QUALITY_OPTIONS = [
+  {
+    label: "Low",
+    value: "low",
+    description: "Fastest and most cost-effective",
+  },
+  {
+    label: "Medium",
+    value: "medium",
+    description: "Balanced quality and speed",
+  },
+  {
+    label: "High",
+    value: "high",
+    description: "Best quality with longer generation time",
+  },
+];
+
 // Button state interface
 interface ButtonState {
  isLoading: boolean;
@ -40,7 +71,9 @@ interface LLMProviderSelectionProps {
  initialLLMConfig: LLMConfig;
  onConfigChange: (config: LLMConfig) => void;
  buttonState: ButtonState;
-  setButtonState: (state: ButtonState | ((prev: ButtonState) => ButtonState)) => void;
+  setButtonState: (
+    state: ButtonState | ((prev: ButtonState) => ButtonState)
+  ) => void;
 }

 export default function LLMProviderSelection({
@ -71,29 +104,43 @@ export default function LLMProviderSelection({

    const needsImageProviderApiKey =
      !llmConfig.DISABLE_IMAGE_GENERATION &&
-      (
-        (llmConfig.IMAGE_PROVIDER === "dall-e-3" && !llmConfig.OPENAI_API_KEY) ||
-        (llmConfig.IMAGE_PROVIDER === "gemini_flash" && !llmConfig.GOOGLE_API_KEY) ||
-        (llmConfig.IMAGE_PROVIDER === "nanobanana_pro" && !llmConfig.GOOGLE_API_KEY) ||
+      ((llmConfig.IMAGE_PROVIDER === "dall-e-3" && !llmConfig.OPENAI_API_KEY) ||
+        (llmConfig.IMAGE_PROVIDER === "gpt-image-1.5" &&
+          !llmConfig.OPENAI_API_KEY) ||
+        (llmConfig.IMAGE_PROVIDER === "gemini_flash" &&
+          !llmConfig.GOOGLE_API_KEY) ||
+        (llmConfig.IMAGE_PROVIDER === "nanobanana_pro" &&
+          !llmConfig.GOOGLE_API_KEY) ||
        (llmConfig.IMAGE_PROVIDER === "pexels" && !llmConfig.PEXELS_API_KEY) ||
-        (llmConfig.IMAGE_PROVIDER === "pixabay" && !llmConfig.PIXABAY_API_KEY)
-      );
+        (llmConfig.IMAGE_PROVIDER === "pixabay" && !llmConfig.PIXABAY_API_KEY));

    const needsApiKey = needsProviderApiKey || needsImageProviderApiKey;

-    const needsOllamaUrl = (llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL);
+    const needsOllamaUrl = llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL;

-    const needsComfyUIConfig = !llmConfig.DISABLE_IMAGE_GENERATION &&
+    const needsComfyUIConfig =
+      !llmConfig.DISABLE_IMAGE_GENERATION &&
      llmConfig.IMAGE_PROVIDER === "comfyui" &&
      (!llmConfig.COMFYUI_URL || !llmConfig.COMFYUI_WORKFLOW);

    setButtonState({
      isLoading: false,
-      isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl || needsComfyUIConfig,
-      text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : needsComfyUIConfig ? "Please Configure ComfyUI" : "Save Configuration",
-      showProgress: false
+      isDisabled:
+        needsModelSelection ||
+        needsApiKey ||
+        needsOllamaUrl ||
+        needsComfyUIConfig,
+      text: needsModelSelection
+        ? "Please Select a Model"
+        : needsApiKey
+        ? "Please Enter API Key"
+        : needsOllamaUrl
+        ? "Please Enter Ollama URL"
+        : needsComfyUIConfig
+        ? "Please Configure ComfyUI"
+        : "Save Configuration",
+      showProgress: false,
    });
-
  }, [llmConfig]);

  const input_field_changed = (new_value: string | boolean, field: string) => {
@ -101,6 +148,45 @@ export default function LLMProviderSelection({
    setLlmConfig(updatedConfig);
  };

+  const getApiKeyValue = (field?: string) => {
+    switch (field) {
+      case "OPENAI_API_KEY":
+        return llmConfig.OPENAI_API_KEY || "";
+      case "GOOGLE_API_KEY":
+        return llmConfig.GOOGLE_API_KEY || "";
+      case "ANTHROPIC_API_KEY":
+        return llmConfig.ANTHROPIC_API_KEY || "";
+      case "PEXELS_API_KEY":
+        return llmConfig.PEXELS_API_KEY || "";
+      case "PIXABAY_API_KEY":
+        return llmConfig.PIXABAY_API_KEY || "";
+      default:
+        return "";
+    }
+  };
+
+  const handleApiKeyInputChange = (field: string | undefined, value: string) => {
+    switch (field) {
+      case "OPENAI_API_KEY":
+        input_field_changed(value, "openai_api_key");
+        break;
+      case "GOOGLE_API_KEY":
+        input_field_changed(value, "google_api_key");
+        break;
+      case "ANTHROPIC_API_KEY":
+        input_field_changed(value, "anthropic_api_key");
+        break;
+      case "PEXELS_API_KEY":
+        input_field_changed(value, "pexels_api_key");
+        break;
+      case "PIXABAY_API_KEY":
+        input_field_changed(value, "pixabay_api_key");
+        break;
+      default:
+        break;
+    }
+  };
+
  const handleProviderChange = (provider: string) => {
    const newConfig = changeProviderUtil(llmConfig, provider);
    setLlmConfig(newConfig);
@ -122,7 +208,7 @@ export default function LLMProviderSelection({

      if (!prevConfig.DISABLE_IMAGE_GENERATION && !prevConfig.IMAGE_PROVIDER) {
        if (prevConfig.LLM === "openai") {
-          updates.IMAGE_PROVIDER = "dall-e-3";
+          updates.IMAGE_PROVIDER = "gpt-image-1.5";
        } else if (prevConfig.LLM === "google") {
          updates.IMAGE_PROVIDER = "gemini_flash";
        } else {
@ -142,6 +228,104 @@ export default function LLMProviderSelection({
    });
  }, []);

+  useEffect(() => {
+    setLlmConfig((prevConfig) => {
+      const updates: Partial<LLMConfig> = {};
+
+      if (
+        prevConfig.IMAGE_PROVIDER === "dall-e-3" &&
+        !prevConfig.DALL_E_3_QUALITY
+      ) {
+        updates.DALL_E_3_QUALITY = "standard";
+      }
+
+      if (
+        prevConfig.IMAGE_PROVIDER === "gpt-image-1.5" &&
+        !prevConfig.GPT_IMAGE_1_5_QUALITY
+      ) {
+        updates.GPT_IMAGE_1_5_QUALITY = "medium";
+      }
+
+      if (Object.keys(updates).length === 0) {
+        return prevConfig;
+      }
+
+      return { ...prevConfig, ...updates };
+    });
+  }, [llmConfig.IMAGE_PROVIDER]);
+
+  const renderQualitySelector = () => {
+    if (llmConfig.IMAGE_PROVIDER === "dall-e-3") {
+      return (
+        <div className="mb-8">
+          <label className="block text-sm font-medium text-gray-700 mb-2">
+            DALL·E 3 Image Quality
+          </label>
+          <div className="grid grid-cols-2 gap-3">
+            {DALLE_3_QUALITY_OPTIONS.map((option) => (
+              <button
+                key={option.value}
+                type="button"
+                className={cn(
+                  "border rounded-lg p-3 text-left transition-colors",
+                  llmConfig.DALL_E_3_QUALITY === option.value
+                    ? "border-blue-500 bg-blue-50"
+                    : "border-gray-200 hover:border-gray-300"
+                )}
+                onClick={() =>
+                  input_field_changed(option.value, "dall_e_3_quality")
+                }
+              >
+                <div className="text-sm font-medium text-gray-900">
+                  {option.label}
+                </div>
+                <div className="text-xs text-gray-600 mt-1">
+                  {option.description}
+                </div>
+              </button>
+            ))}
+          </div>
+        </div>
+      );
+    }
+
+    if (llmConfig.IMAGE_PROVIDER === "gpt-image-1.5") {
+      return (
+        <div className="mb-8">
+          <label className="block text-sm font-medium text-gray-700 mb-2">
+            GPT Image 1.5 Quality
+          </label>
+          <div className="grid grid-cols-3 gap-3">
+            {GPT_IMAGE_1_5_QUALITY_OPTIONS.map((option) => (
+              <button
+                key={option.value}
+                type="button"
+                className={cn(
+                  "border rounded-lg p-3 text-left transition-colors",
+                  llmConfig.GPT_IMAGE_1_5_QUALITY === option.value
+                    ? "border-blue-500 bg-blue-50"
+                    : "border-gray-200 hover:border-gray-300"
+                )}
+                onClick={() =>
+                  input_field_changed(option.value, "gpt_image_1_5_quality")
+                }
+              >
+                <div className="text-sm font-medium text-gray-900">
+                  {option.label}
+                </div>
+                <div className="text-xs text-gray-600 mt-1">
+                  {option.description}
+                </div>
+              </button>
+            ))}
+          </div>
+        </div>
+      );
+    }
+
+    return null;
+  };
+
  return (
    <div className="h-full flex flex-col mt-10">
      {/* Provider Selection - Fixed Header */}
@ -161,7 +345,6 @@ export default function LLMProviderSelection({
        </Tabs>
      </div>

-
      {/* Scrollable Content */}
      <div className="flex-1 overflow-y-auto p-6 pt-0 custom_scrollbar">
        <Tabs
@ -241,7 +424,8 @@ export default function LLMProviderSelection({
          </div>
          <p className="text-sm text-gray-500 flex items-center gap-2">
            <span className="block w-1 h-1 rounded-full bg-gray-400"></span>
-            When enabled, slides will not include automatically generated images.
+            When enabled, slides will not include automatically generated
+            images.
          </p>
        </div>

@ -267,8 +451,8 @@ export default function LLMProviderSelection({
                      <div className="flex gap-3 items-center">
                        <span className="text-sm font-medium text-gray-900">
                          {llmConfig.IMAGE_PROVIDER
-                            ? IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER]?.label ||
-                            llmConfig.IMAGE_PROVIDER
+                            ? IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER]
+                                ?.label || llmConfig.IMAGE_PROVIDER
                            : "Select image provider"}
                        </span>
                      </div>
@ -326,6 +510,8 @@ export default function LLMProviderSelection({
              </div>
            </div>

+            {renderQualitySelector()}
+
            {/* Dynamic API Key Input for Image Provider */}
            {llmConfig.IMAGE_PROVIDER &&
              IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER] &&
@ -333,15 +519,31 @@ export default function LLMProviderSelection({
                const provider = IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER];

                // Show info message when using same API key as main provider
-                if (provider.value === "dall-e-3" && llmConfig.LLM === "openai") {
+                if (
+                  provider.value === "dall-e-3" &&
+                  llmConfig.LLM === "openai"
+                ) {
                  return <></>;
                }

-                if (provider.value === "gemini_flash" && llmConfig.LLM === "google") {
+                if (
+                  provider.value === "gpt-image-1.5" &&
+                  llmConfig.LLM === "openai"
+                ) {
                  return <></>;
                }

-                if (provider.value === "nanobanana_pro" && llmConfig.LLM === "google") {
+                if (
+                  provider.value === "gemini_flash" &&
+                  llmConfig.LLM === "google"
+                ) {
+                  return <></>;
+                }
+
+                if (
+                  provider.value === "nanobanana_pro" &&
+                  llmConfig.LLM === "google"
+                ) {
                  return <></>;
                }

@ -360,13 +562,17 @@ export default function LLMProviderSelection({
                            className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
                            value={llmConfig.COMFYUI_URL || ""}
                            onChange={(e) => {
-                              input_field_changed(e.target.value, "comfyui_url");
+                              input_field_changed(
+                                e.target.value,
+                                "comfyui_url"
+                              );
                            }}
                          />
                        </div>
                        <p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
                          <span className="block w-1 h-1 rounded-full bg-gray-400"></span>
-                          Use your machine IP address (not localhost) when running in Docker
+                          Use your machine IP address (not localhost) when
+                          running in Docker
                        </p>
                      </div>
                      <div>
@ -380,13 +586,16 @@ export default function LLMProviderSelection({
                            rows={6}
                            value={llmConfig.COMFYUI_WORKFLOW || ""}
                            onChange={(e) => {
-                              input_field_changed(e.target.value, "comfyui_workflow");
+                              input_field_changed(
+                                e.target.value,
+                                "comfyui_workflow"
+                              );
                            }}
                          />
                        </div>
                        <p className="mt-2 text-sm text-gray-500">
-                          Export your workflow from ComfyUI using &quot;Export (API)&quot; and paste the JSON here.
-
+                          Export your workflow from ComfyUI using &quot;Export
+                          (API)&quot; and paste the JSON here.
                        </p>
                      </div>
                    </div>
@ -404,20 +613,13 @@ export default function LLMProviderSelection({
                        type="text"
                        placeholder={`Enter your ${provider.apiKeyFieldLabel}`}
                        className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors"
-                        value={
-                          provider.apiKeyField === "PEXELS_API_KEY"
-                            ? llmConfig.PEXELS_API_KEY || ""
-                            : provider.apiKeyField === "PIXABAY_API_KEY"
-                              ? llmConfig.PIXABAY_API_KEY || ""
-                              : ""
+                        value={getApiKeyValue(provider.apiKeyField)}
+                        onChange={(e) =>
+                          handleApiKeyInputChange(
+                            provider.apiKeyField,
+                            e.target.value
+                          )
                        }
-                        onChange={(e) => {
-                          if (provider.apiKeyField === "PEXELS_API_KEY") {
-                            input_field_changed(e.target.value, "pexels_api_key");
-                          } else if (provider.apiKeyField === "PIXABAY_API_KEY") {
-                            input_field_changed(e.target.value, "pixabay_api_key");
-                          }
-                        }}
                      />
                    </div>
                    <p className="mt-2 text-sm text-gray-500 flex items-center gap-2">
@ -443,14 +645,14 @@ export default function LLMProviderSelection({
                {llmConfig.LLM === "ollama"
                  ? llmConfig.OLLAMA_MODEL ?? "xxxxx"
                  : llmConfig.LLM === "custom"
-                    ? llmConfig.CUSTOM_MODEL ?? "xxxxx"
-                    : llmConfig.LLM === "anthropic"
-                      ? llmConfig.ANTHROPIC_MODEL ?? "xxxxx"
-                      : llmConfig.LLM === "google"
-                        ? llmConfig.GOOGLE_MODEL ?? "xxxxx"
-                        : llmConfig.LLM === "openai"
-                          ? llmConfig.OPENAI_MODEL ?? "xxxxx"
-                          : "xxxxx"}{" "}
+                  ? llmConfig.CUSTOM_MODEL ?? "xxxxx"
+                  : llmConfig.LLM === "anthropic"
+                  ? llmConfig.ANTHROPIC_MODEL ?? "xxxxx"
+                  : llmConfig.LLM === "google"
+                  ? llmConfig.GOOGLE_MODEL ?? "xxxxx"
+                  : llmConfig.LLM === "openai"
+                  ? llmConfig.OPENAI_MODEL ?? "xxxxx"
+                  : "xxxxx"}{" "}
                for text generation{" "}
                {isImageGenerationDisabled ? (
                  "and image generation is disabled."
@ -458,7 +660,7 @@ export default function LLMProviderSelection({
                  <>
                    and{" "}
                    {llmConfig.IMAGE_PROVIDER &&
-                      IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER]
+                    IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER]
                      ? IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER].label
                      : "xxxxx"}{" "}
                    for images
@ -468,8 +670,7 @@ export default function LLMProviderSelection({
            </div>
          </div>
        </div>
-
      </div>
    </div>
  );
-} 
+}
--- a/servers/nextjs/types/llm_config.ts
+++ b/servers/nextjs/types/llm_config.ts
@ -32,6 +32,11 @@ export interface LLMConfig {
  COMFYUI_URL?: string;
  COMFYUI_WORKFLOW?: string;

+  // Dalle 3 Quality
+  DALL_E_3_QUALITY?: string;
+  // GPT Image 1.5 Quality
+  GPT_IMAGE_1_5_QUALITY?: string;
+
  // Other Configs
  TOOL_CALLS?: boolean;
  DISABLE_THINKING?: boolean;
--- a/servers/nextjs/utils/providerConstants.ts
+++ b/servers/nextjs/utils/providerConstants.ts
@ -32,7 +32,7 @@ export const IMAGE_PROVIDERS: Record<string, ImageProviderOption> = {
    icon: "/icons/pexels.png",
    requiresApiKey: true,
    apiKeyField: "PEXELS_API_KEY",
-    apiKeyFieldLabel: "Pexels API Key"
+    apiKeyFieldLabel: "Pexels API Key",
  },
  pixabay: {
    value: "pixabay",
@ -41,16 +41,25 @@ export const IMAGE_PROVIDERS: Record<string, ImageProviderOption> = {
    icon: "/icons/pixabay.png",
    requiresApiKey: true,
    apiKeyField: "PIXABAY_API_KEY",
-    apiKeyFieldLabel: "Pixabay API Key"
+    apiKeyFieldLabel: "Pixabay API Key",
  },
  "dall-e-3": {
    value: "dall-e-3",
    label: "DALL-E 3",
-    description: "OpenAI's latest image generation model",
+    description: "OpenAI's image generation model",
    icon: "/icons/dall-e.png",
    requiresApiKey: true,
    apiKeyField: "OPENAI_API_KEY",
-    apiKeyFieldLabel: "OpenAI API Key"
+    apiKeyFieldLabel: "OpenAI API Key",
+  },
+  "gpt-image-1.5": {
+    value: "gpt-image-1.5",
+    label: "GPT Image 1.5",
+    description: "OpenAI's image generation model",
+    icon: "/icons/gpt.png",
+    requiresApiKey: true,
+    apiKeyField: "OPENAI_API_KEY",
+    apiKeyFieldLabel: "OpenAI API Key",
  },
  gemini_flash: {
    value: "gemini_flash",
@ -59,16 +68,16 @@ export const IMAGE_PROVIDERS: Record<string, ImageProviderOption> = {
    icon: "/icons/google.png",
    requiresApiKey: true,
    apiKeyField: "GOOGLE_API_KEY",
-    apiKeyFieldLabel: "Google API Key"
+    apiKeyFieldLabel: "Google API Key",
  },
  nanobanana_pro: {
    value: "nanobanana_pro",
    label: "NanoBanana Pro",
-    description: "Google's advanced image generation (4K, better quality)",
+    description: "Google's advanced image generation model",
    icon: "/icons/google.png",
    requiresApiKey: true,
    apiKeyField: "GOOGLE_API_KEY",
-    apiKeyFieldLabel: "Google API Key"
+    apiKeyFieldLabel: "Google API Key",
  },
  comfyui: {
    value: "comfyui",
@ -77,7 +86,7 @@ export const IMAGE_PROVIDERS: Record<string, ImageProviderOption> = {
    icon: "/icons/comfyui.png",
    requiresApiKey: false,
    apiKeyField: "COMFYUI_URL",
-    apiKeyFieldLabel: "ComfyUI Server URL"
+    apiKeyFieldLabel: "ComfyUI Server URL",
  },
 };

@ -107,4 +116,4 @@ export const LLM_PROVIDERS: Record<string, LLMProviderOption> = {
    label: "Custom",
    description: "Custom LLM",
  },
-}; 
+};
--- a/servers/nextjs/utils/providerUtils.ts
+++ b/servers/nextjs/utils/providerUtils.ts
@ -50,6 +50,8 @@ export const updateLLMConfig = (
    web_grounding: "WEB_GROUNDING",
    comfyui_url: "COMFYUI_URL",
    comfyui_workflow: "COMFYUI_WORKFLOW",
+    dall_e_3_quality: "DALL_E_3_QUALITY",
+    gpt_image_1_5_quality: "GPT_IMAGE_1_5_QUALITY",
  };

  const configKey = fieldMappings[field];
@ -71,7 +73,7 @@ export const changeProvider = (

  // Auto Select appropriate image provider based on the text models
  if (provider === "openai") {
-    newConfig.IMAGE_PROVIDER = "dall-e-3";
+    newConfig.IMAGE_PROVIDER = "gpt-image-1.5";
  } else if (provider === "google") {
    newConfig.IMAGE_PROVIDER = "gemini_flash";
  } else {
--- a/servers/nextjs/utils/storeHelpers.ts
+++ b/servers/nextjs/utils/storeHelpers.ts
@ -16,7 +16,8 @@ export const handleSaveLLMConfig = async (llmConfig: LLMConfig) => {

 export const hasValidLLMConfig = (llmConfig: LLMConfig) => {
  if (!llmConfig.LLM) return false;
-  if (!llmConfig.DISABLE_IMAGE_GENERATION && !llmConfig.IMAGE_PROVIDER) return false;
+  if (!llmConfig.DISABLE_IMAGE_GENERATION && !llmConfig.IMAGE_PROVIDER)
+    return false;

  const isOpenAIConfigValid =
    llmConfig.OPENAI_MODEL !== "" &&
@ -71,6 +72,8 @@ export const hasValidLLMConfig = (llmConfig: LLMConfig) => {
        return llmConfig.PIXABAY_API_KEY && llmConfig.PIXABAY_API_KEY !== "";
      case "dall-e-3":
        return llmConfig.OPENAI_API_KEY && llmConfig.OPENAI_API_KEY !== "";
+      case "gpt-image-1.5":
+        return llmConfig.OPENAI_API_KEY && llmConfig.OPENAI_API_KEY !== "";
      case "gemini_flash":
        return llmConfig.GOOGLE_API_KEY && llmConfig.GOOGLE_API_KEY !== "";
      case "nanobanana_pro":
@ -86,14 +89,14 @@ export const hasValidLLMConfig = (llmConfig: LLMConfig) => {
    llmConfig.LLM === "openai"
      ? isOpenAIConfigValid
      : llmConfig.LLM === "google"
-        ? isGoogleConfigValid
-        : llmConfig.LLM === "anthropic"
-          ? isAnthropicConfigValid
-          : llmConfig.LLM === "ollama"
-            ? isOllamaConfigValid
-            : llmConfig.LLM === "custom"
-              ? isCustomConfigValid
-              : false;
+      ? isGoogleConfigValid
+      : llmConfig.LLM === "anthropic"
+      ? isAnthropicConfigValid
+      : llmConfig.LLM === "ollama"
+      ? isOllamaConfigValid
+      : llmConfig.LLM === "custom"
+      ? isCustomConfigValid
+      : false;

  return isLLMConfigValid && isImageConfigValid();
 };
--- a/start.js
+++ b/start.js
@ -97,7 +97,12 @@ const setupUserConfigFromEnv = () => {
    WEB_GROUNDING: process.env.WEB_GROUNDING || existingConfig.WEB_GROUNDING,
    USE_CUSTOM_URL: process.env.USE_CUSTOM_URL || existingConfig.USE_CUSTOM_URL,
    COMFYUI_URL: process.env.COMFYUI_URL || existingConfig.COMFYUI_URL,
-    COMFYUI_WORKFLOW: process.env.COMFYUI_WORKFLOW || existingConfig.COMFYUI_WORKFLOW,
+    COMFYUI_WORKFLOW:
+      process.env.COMFYUI_WORKFLOW || existingConfig.COMFYUI_WORKFLOW,
+    DALL_E_3_QUALITY:
+      process.env.DALL_E_3_QUALITY || existingConfig.DALL_E_3_QUALITY,
+    GPT_IMAGE_1_5_QUALITY:
+      process.env.GPT_IMAGE_1_5_QUALITY || existingConfig.GPT_IMAGE_1_5_QUALITY,
  };

  writeFileSync(userConfigPath, JSON.stringify(userConfig));