From 908bea18b529e89ca1ab249b30b226edfb2a1494 Mon Sep 17 00:00:00 2001 From: sauravniraula Date: Sun, 21 Dec 2025 01:14:31 +0545 Subject: [PATCH] feat: adds gpt image 1.5 --- servers/fastapi/enums/image_provider.py | 4 +- servers/fastapi/models/user_config.py | 5 + .../services/image_generation_service.py | 231 +++++++++----- servers/fastapi/utils/get_env.py | 10 + servers/fastapi/utils/image_provider.py | 25 +- servers/fastapi/utils/model_availability.py | 11 +- servers/fastapi/utils/set_env.py | 8 + servers/fastapi/utils/user_config.py | 11 + servers/nextjs/app/api/user-config/route.ts | 7 +- servers/nextjs/components/LLMSelection.tsx | 301 +++++++++++++++--- servers/nextjs/types/llm_config.ts | 5 + servers/nextjs/utils/providerConstants.ts | 27 +- servers/nextjs/utils/providerUtils.ts | 4 +- servers/nextjs/utils/storeHelpers.ts | 21 +- start.js | 7 +- 15 files changed, 495 insertions(+), 182 deletions(-) diff --git a/servers/fastapi/enums/image_provider.py b/servers/fastapi/enums/image_provider.py index 36025007..9d773ad5 100644 --- a/servers/fastapi/enums/image_provider.py +++ b/servers/fastapi/enums/image_provider.py @@ -1,9 +1,11 @@ from enum import Enum + class ImageProvider(Enum): PEXELS = "pexels" PIXABAY = "pixabay" GEMINI_FLASH = "gemini_flash" - NANOBANANA_PRO = "nanobanana_pro" # Google's gemini-3-pro-image-preview + NANOBANANA_PRO = "nanobanana_pro" DALLE3 = "dall-e-3" + GPT_IMAGE_1_5 = "gpt-image-1.5" COMFYUI = "comfyui" diff --git a/servers/fastapi/models/user_config.py b/servers/fastapi/models/user_config.py index 3f2e0f19..da424b59 100644 --- a/servers/fastapi/models/user_config.py +++ b/servers/fastapi/models/user_config.py @@ -36,6 +36,11 @@ class UserConfig(BaseModel): COMFYUI_URL: Optional[str] = None COMFYUI_WORKFLOW: Optional[str] = None + # Dalle 3 Quality + DALL_E_3_QUALITY: Optional[str] = None + # Gpt Image 1.5 Quality + GPT_IMAGE_1_5_QUALITY: Optional[str] = None + # Reasoning TOOL_CALLS: Optional[bool] = None DISABLE_THINKING: Optional[bool] = None diff --git a/servers/fastapi/services/image_generation_service.py b/servers/fastapi/services/image_generation_service.py index 802585a5..f9ec1202 100644 --- a/servers/fastapi/services/image_generation_service.py +++ b/servers/fastapi/services/image_generation_service.py @@ -3,17 +3,21 @@ import base64 import json import os import aiohttp +from fastapi import HTTPException from google import genai -from google.genai.types import GenerateContentConfig -from openai import AsyncOpenAI +from openai import NOT_GIVEN, AsyncOpenAI from models.image_prompt import ImagePrompt from models.sql.image_asset import ImageAsset -from utils.download_helpers import download_file -from utils.get_env import get_pexels_api_key_env +from utils.get_env import ( + get_dall_e_3_quality_env, + get_gpt_image_1_5_quality_env, + get_pexels_api_key_env, +) from utils.get_env import get_pixabay_api_key_env from utils.get_env import get_comfyui_url_env from utils.get_env import get_comfyui_workflow_env from utils.image_provider import ( + is_gpt_image_1_5_selected, is_image_generation_disabled, is_pixels_selected, is_pixabay_selected, @@ -44,7 +48,9 @@ class ImageGenerationService: elif is_nanobanana_pro_selected(): return self.generate_image_nanobanana_pro elif is_dalle3_selected(): - return self.generate_image_openai + return self.generate_image_openai_dalle3 + elif is_gpt_image_1_5_selected(): + return self.generate_image_openai_gpt_image_1_5 elif is_comfyui_selected(): return self.generate_image_comfyui return None @@ -98,46 +104,83 @@ class ImageGenerationService: print(f"Error generating image: {e}") return "/static/images/placeholder.jpg" - async def generate_image_openai(self, prompt: str, output_directory: str,model: str = "dall-e-3") -> str: - + async def generate_image_openai( + self, prompt: str, output_directory: str, model: str, quality: str + ) -> str: client = AsyncOpenAI() result = await client.images.generate( model=model, prompt=prompt, n=1, - quality="standard", + quality=quality, + response_format="b64_json" if model == "dall-e-3" else NOT_GIVEN, size="1024x1024", ) - image_url = result.data[0].url - return await download_file(image_url, output_directory) + image_path = os.path.join(output_directory, f"{uuid.uuid4()}.png") + with open(image_path, "wb") as f: + f.write(base64.b64decode(result.data[0].b64_json)) + return image_path - async def _generate_image_google(self, prompt: str, output_directory: str, model: str) -> str: + async def generate_image_openai_dalle3( + self, prompt: str, output_directory: str + ) -> str: + return await self.generate_image_openai( + prompt, + output_directory, + "dall-e-3", + get_dall_e_3_quality_env() or "standard", + ) + + async def generate_image_openai_gpt_image_1_5( + self, prompt: str, output_directory: str + ) -> str: + return await self.generate_image_openai( + prompt, + output_directory, + "gpt-image-1.5", + get_gpt_image_1_5_quality_env() or "medium", + ) + + async def _generate_image_google( + self, prompt: str, output_directory: str, model: str + ) -> str: """Base method for Google image generation models.""" client = genai.Client() response = await asyncio.to_thread( client.models.generate_content, model=model, contents=[prompt], - config=GenerateContentConfig(response_modalities=["TEXT", "IMAGE"]), ) + image_path = None for part in response.candidates[0].content.parts: - if part.text is not None: - print(part.text) - elif part.inline_data is not None: + if part.inline_data is not None: + image = part.as_image() image_path = os.path.join(output_directory, f"{uuid.uuid4()}.jpg") - with open(image_path, "wb") as f: - f.write(part.inline_data.data) + image.save(image_path) + + if not image_path: + raise HTTPException( + status_code=500, detail=f"No image generated by google {model}" + ) return image_path - async def generate_image_gemini_flash(self, prompt: str, output_directory: str) -> str: + async def generate_image_gemini_flash( + self, prompt: str, output_directory: str + ) -> str: """Generate image using Gemini Flash (gemini-2.5-flash-image-preview).""" - return await self._generate_image_google(prompt, output_directory, "gemini-2.5-flash-image-preview") + return await self._generate_image_google( + prompt, output_directory, "gemini-2.5-flash-image-preview" + ) - async def generate_image_nanobanana_pro(self, prompt: str, output_directory: str) -> str: + async def generate_image_nanobanana_pro( + self, prompt: str, output_directory: str + ) -> str: """Generate image using NanoBanana Pro (gemini-3-pro-image-preview).""" - return await self._generate_image_google(prompt, output_directory, "gemini-3-pro-image-preview") + return await self._generate_image_google( + prompt, output_directory, "gemini-3-pro-image-preview" + ) async def get_image_from_pexels(self, prompt: str) -> str: async with aiohttp.ClientSession(trust_env=True) as session: @@ -161,134 +204,145 @@ class ImageGenerationService: async def generate_image_comfyui(self, prompt: str, output_directory: str) -> str: """ Generate image using ComfyUI workflow API. - + User provides: - COMFYUI_URL: ComfyUI server URL (e.g., http://192.168.1.7:8188) - COMFYUI_WORKFLOW: Workflow JSON exported from ComfyUI - + The workflow should have a CLIPTextEncode node with "Positive" in the title where the prompt will be injected. - + Args: prompt: The text prompt for image generation output_directory: Directory to save the generated image - + Returns: Path to the generated image file """ comfyui_url = get_comfyui_url_env() workflow_json = get_comfyui_workflow_env() - + if not comfyui_url: raise ValueError("COMFYUI_URL environment variable is not set") - + if not workflow_json: - raise ValueError("COMFYUI_WORKFLOW environment variable is not set. Please provide a ComfyUI workflow JSON.") - + raise ValueError( + "COMFYUI_WORKFLOW environment variable is not set. Please provide a ComfyUI workflow JSON." + ) + # Ensure URL doesn't have trailing slash comfyui_url = comfyui_url.rstrip("/") - + # Parse the workflow JSON try: workflow = json.loads(workflow_json) except json.JSONDecodeError as e: raise ValueError(f"Invalid workflow JSON: {str(e)}") - + # Find and update the positive prompt node workflow = self._inject_prompt_into_workflow(workflow, prompt) - + async with aiohttp.ClientSession(trust_env=True) as session: # Step 1: Submit workflow - prompt_id = await self._submit_comfyui_workflow(session, comfyui_url, workflow) - + prompt_id = await self._submit_comfyui_workflow( + session, comfyui_url, workflow + ) + # Step 2: Wait for completion - status_data = await self._wait_for_comfyui_completion(session, comfyui_url, prompt_id) - + status_data = await self._wait_for_comfyui_completion( + session, comfyui_url, prompt_id + ) + # Step 3: Download the generated image image_path = await self._download_comfyui_image( session, comfyui_url, status_data, prompt_id, output_directory ) - + return image_path - + def _inject_prompt_into_workflow(self, workflow: dict, prompt: str) -> dict: """ Find the prompt node in the workflow and inject the prompt text. Looks for a node with title 'Input Prompt' (case-insensitive). - + User must rename their prompt node to 'Input Prompt' in ComfyUI. """ for node_id, node_data in workflow.items(): meta = node_data.get("_meta", {}) title = meta.get("title", "").lower() - + if title == "input prompt": if "inputs" in node_data and "text" in node_data["inputs"]: node_data["inputs"]["text"] = prompt - print(f"Injected prompt into node {node_id}: {meta.get('title', '')}") + print( + f"Injected prompt into node {node_id}: {meta.get('title', '')}" + ) return workflow - - raise ValueError("Could not find a node with title 'Input Prompt' in the workflow. Please rename your prompt node to 'Input Prompt' in ComfyUI.") - + + raise ValueError( + "Could not find a node with title 'Input Prompt' in the workflow. Please rename your prompt node to 'Input Prompt' in ComfyUI." + ) + async def _submit_comfyui_workflow( self, session: aiohttp.ClientSession, comfyui_url: str, workflow: dict ) -> str: """Submit workflow to ComfyUI and return the prompt_id.""" client_id = str(uuid.uuid4()) - payload = { - "prompt": workflow, - "client_id": client_id - } - + payload = {"prompt": workflow, "client_id": client_id} + response = await session.post( f"{comfyui_url}/prompt", json=payload, - timeout=aiohttp.ClientTimeout(total=30) + timeout=aiohttp.ClientTimeout(total=30), ) - + if response.status != 200: error_text = await response.text() raise Exception(f"Failed to submit workflow to ComfyUI: {error_text}") - + data = await response.json() prompt_id = data.get("prompt_id") - + if not prompt_id: raise Exception("No prompt_id returned from ComfyUI") - + print(f"ComfyUI workflow submitted. Prompt ID: {prompt_id}") return prompt_id - + async def _wait_for_comfyui_completion( - self, session: aiohttp.ClientSession, comfyui_url: str, prompt_id: str, - timeout: int = 300, poll_interval: int = 4 + self, + session: aiohttp.ClientSession, + comfyui_url: str, + prompt_id: str, + timeout: int = 300, + poll_interval: int = 4, ) -> dict: """Poll ComfyUI history endpoint until workflow completes.""" start_time = asyncio.get_event_loop().time() - + while True: elapsed = asyncio.get_event_loop().time() - start_time if elapsed > timeout: raise Exception(f"ComfyUI workflow timed out after {timeout} seconds") - + await asyncio.sleep(poll_interval) - + response = await session.get( f"{comfyui_url}/history/{prompt_id}", - timeout=aiohttp.ClientTimeout(total=30) + timeout=aiohttp.ClientTimeout(total=30), ) - + if response.status != 200: continue - + try: status_data = await response.json() - except: + except Exception as _: continue - + if prompt_id in status_data: execution_data = status_data[prompt_id] - + # Check for completion if "status" in execution_data: status = execution_data["status"] @@ -297,62 +351,65 @@ class ImageGenerationService: return status_data if "error" in status: raise Exception(f"ComfyUI workflow error: {status['error']}") - + # Also check if outputs exist (alternative completion check) if "outputs" in execution_data and execution_data["outputs"]: print("ComfyUI workflow completed (outputs found)") return status_data - + print(f"Waiting for ComfyUI workflow... ({int(elapsed)}s)") - + async def _download_comfyui_image( - self, session: aiohttp.ClientSession, comfyui_url: str, - status_data: dict, prompt_id: str, output_directory: str + self, + session: aiohttp.ClientSession, + comfyui_url: str, + status_data: dict, + prompt_id: str, + output_directory: str, ) -> str: """Download the generated image from ComfyUI.""" if prompt_id not in status_data: raise Exception("Prompt ID not found in status data") - + outputs = status_data[prompt_id].get("outputs", {}) - + if not outputs: raise Exception("No outputs found in ComfyUI response") - + # Find the first image in outputs for node_id, node_output in outputs.items(): if "images" in node_output: for image_info in node_output["images"]: filename = image_info["filename"] subfolder = image_info.get("subfolder", "") - + # Build view params - params = { - "filename": filename, - "type": "output" - } + params = {"filename": filename, "type": "output"} if subfolder: params["subfolder"] = subfolder - + # Download the image response = await session.get( f"{comfyui_url}/view", params=params, - timeout=aiohttp.ClientTimeout(total=60) + timeout=aiohttp.ClientTimeout(total=60), ) - + if response.status == 200: image_data = await response.read() - + # Determine extension ext = filename.split(".")[-1] if "." in filename else "png" - image_path = os.path.join(output_directory, f"{uuid.uuid4()}.{ext}") - + image_path = os.path.join( + output_directory, f"{uuid.uuid4()}.{ext}" + ) + with open(image_path, "wb") as f: f.write(image_data) - + print(f"Downloaded image from ComfyUI: {image_path}") return image_path else: raise Exception(f"Failed to download image: {response.status}") - + raise Exception("No images found in ComfyUI outputs") diff --git a/servers/fastapi/utils/get_env.py b/servers/fastapi/utils/get_env.py index c36ed2ac..c7dc16d0 100644 --- a/servers/fastapi/utils/get_env.py +++ b/servers/fastapi/utils/get_env.py @@ -107,3 +107,13 @@ def get_comfyui_url_env(): def get_comfyui_workflow_env(): return os.getenv("COMFYUI_WORKFLOW") + + +# Dalle 3 Quality +def get_dall_e_3_quality_env(): + return os.getenv("DALL_E_3_QUALITY") + + +# Gpt Image 1.5 Quality +def get_gpt_image_1_5_quality_env(): + return os.getenv("GPT_IMAGE_1_5_QUALITY") diff --git a/servers/fastapi/utils/image_provider.py b/servers/fastapi/utils/image_provider.py index 1be28bc5..15469709 100644 --- a/servers/fastapi/utils/image_provider.py +++ b/servers/fastapi/utils/image_provider.py @@ -1,12 +1,7 @@ from enums.image_provider import ImageProvider from utils.get_env import ( - get_comfyui_url_env, get_disable_image_generation_env, - get_google_api_key_env, get_image_provider_env, - get_openai_api_key_env, - get_pexels_api_key_env, - get_pixabay_api_key_env, ) from utils.parsers import parse_bool_or_none @@ -35,6 +30,10 @@ def is_dalle3_selected() -> bool: return ImageProvider.DALLE3 == get_selected_image_provider() +def is_gpt_image_1_5_selected() -> bool: + return ImageProvider.GPT_IMAGE_1_5 == get_selected_image_provider() + + def is_comfyui_selected() -> bool: return ImageProvider.COMFYUI == get_selected_image_provider() @@ -49,19 +48,3 @@ def get_selected_image_provider() -> ImageProvider | None: if image_provider_env: return ImageProvider(image_provider_env) return None - - -def get_image_provider_api_key() -> str: - selected_image_provider = get_selected_image_provider() - if selected_image_provider == ImageProvider.PEXELS: - return get_pexels_api_key_env() - elif selected_image_provider == ImageProvider.PIXABAY: - return get_pixabay_api_key_env() - elif selected_image_provider == ImageProvider.GEMINI_FLASH: - return get_google_api_key_env() - elif selected_image_provider == ImageProvider.DALLE3: - return get_openai_api_key_env() - elif selected_image_provider == ImageProvider.COMFYUI: - return get_comfyui_url_env() # Returns URL instead of API key - else: - raise ValueError(f"Invalid image provider: {selected_image_provider}") diff --git a/servers/fastapi/utils/model_availability.py b/servers/fastapi/utils/model_availability.py index ff16e7de..1c40070d 100644 --- a/servers/fastapi/utils/model_availability.py +++ b/servers/fastapi/utils/model_availability.py @@ -128,15 +128,22 @@ async def check_llm_and_image_provider_api_or_model_availability(): if not pixabay_api_key: raise Exception("PIXABAY_API_KEY must be provided") - elif selected_image_provider == ImageProvider.GEMINI_FLASH: + elif ( + selected_image_provider == ImageProvider.GEMINI_FLASH + or selected_image_provider == ImageProvider.NANOBANANA_PRO + ): google_api_key = get_google_api_key_env() if not google_api_key: raise Exception("GOOGLE_API_KEY must be provided") - elif selected_image_provider == ImageProvider.DALLE3: + elif ( + selected_image_provider == ImageProvider.DALLE3 + or selected_image_provider == ImageProvider.GPT_IMAGE_1_5 + ): openai_api_key = get_openai_api_key_env() if not openai_api_key: raise Exception("OPENAI_API_KEY must be provided") + elif selected_image_provider == ImageProvider.COMFYUI: comfyui_url = get_comfyui_url_env() if not comfyui_url: diff --git a/servers/fastapi/utils/set_env.py b/servers/fastapi/utils/set_env.py index 8925c7ae..e388d391 100644 --- a/servers/fastapi/utils/set_env.py +++ b/servers/fastapi/utils/set_env.py @@ -95,3 +95,11 @@ def set_comfyui_url_env(value): def set_comfyui_workflow_env(value): os.environ["COMFYUI_WORKFLOW"] = value + + +def set_dall_e_3_quality_env(value): + os.environ["DALL_E_3_QUALITY"] = value + + +def set_gpt_image_1_5_quality_env(value): + os.environ["GPT_IMAGE_1_5_QUALITY"] = value diff --git a/servers/fastapi/utils/user_config.py b/servers/fastapi/utils/user_config.py index 46bcc9e4..1dd799bb 100644 --- a/servers/fastapi/utils/user_config.py +++ b/servers/fastapi/utils/user_config.py @@ -10,10 +10,12 @@ from utils.get_env import ( get_custom_llm_api_key_env, get_custom_llm_url_env, get_custom_model_env, + get_dall_e_3_quality_env, get_disable_image_generation_env, get_disable_thinking_env, get_google_api_key_env, get_google_model_env, + get_gpt_image_1_5_quality_env, get_llm_provider_env, get_ollama_model_env, get_ollama_url_env, @@ -36,11 +38,13 @@ from utils.set_env import ( set_custom_llm_api_key_env, set_custom_llm_url_env, set_custom_model_env, + set_dall_e_3_quality_env, set_disable_image_generation_env, set_disable_thinking_env, set_extended_reasoning_env, set_google_api_key_env, set_google_model_env, + set_gpt_image_1_5_quality_env, set_llm_provider_env, set_ollama_model_env, set_ollama_url_env, @@ -91,6 +95,9 @@ def get_user_config(): PEXELS_API_KEY=existing_config.PEXELS_API_KEY or get_pexels_api_key_env(), COMFYUI_URL=existing_config.COMFYUI_URL or get_comfyui_url_env(), COMFYUI_WORKFLOW=existing_config.COMFYUI_WORKFLOW or get_comfyui_workflow_env(), + DALL_E_3_QUALITY=existing_config.DALL_E_3_QUALITY or get_dall_e_3_quality_env(), + GPT_IMAGE_1_5_QUALITY=existing_config.GPT_IMAGE_1_5_QUALITY + or get_gpt_image_1_5_quality_env(), TOOL_CALLS=( existing_config.TOOL_CALLS if existing_config.TOOL_CALLS is not None @@ -152,6 +159,10 @@ def update_env_with_user_config(): set_comfyui_url_env(user_config.COMFYUI_URL) if user_config.COMFYUI_WORKFLOW: set_comfyui_workflow_env(user_config.COMFYUI_WORKFLOW) + if user_config.DALL_E_3_QUALITY: + set_dall_e_3_quality_env(user_config.DALL_E_3_QUALITY) + if user_config.GPT_IMAGE_1_5_QUALITY: + set_gpt_image_1_5_quality_env(user_config.GPT_IMAGE_1_5_QUALITY) if user_config.TOOL_CALLS is not None: set_tool_calls_env(str(user_config.TOOL_CALLS)) if user_config.DISABLE_THINKING is not None: diff --git a/servers/nextjs/app/api/user-config/route.ts b/servers/nextjs/app/api/user-config/route.ts index 448d1402..586e9cf1 100644 --- a/servers/nextjs/app/api/user-config/route.ts +++ b/servers/nextjs/app/api/user-config/route.ts @@ -65,7 +65,12 @@ export async function POST(request: Request) { IMAGE_PROVIDER: userConfig.IMAGE_PROVIDER || existingConfig.IMAGE_PROVIDER, PEXELS_API_KEY: userConfig.PEXELS_API_KEY || existingConfig.PEXELS_API_KEY, COMFYUI_URL: userConfig.COMFYUI_URL || existingConfig.COMFYUI_URL, - COMFYUI_WORKFLOW: userConfig.COMFYUI_WORKFLOW || existingConfig.COMFYUI_WORKFLOW, + COMFYUI_WORKFLOW: + userConfig.COMFYUI_WORKFLOW || existingConfig.COMFYUI_WORKFLOW, + DALL_E_3_QUALITY: + userConfig.DALL_E_3_QUALITY || existingConfig.DALL_E_3_QUALITY, + GPT_IMAGE_1_5_QUALITY: + userConfig.GPT_IMAGE_1_5_QUALITY || existingConfig.GPT_IMAGE_1_5_QUALITY, TOOL_CALLS: userConfig.TOOL_CALLS === undefined ? existingConfig.TOOL_CALLS diff --git a/servers/nextjs/components/LLMSelection.tsx b/servers/nextjs/components/LLMSelection.tsx index 28aa3e49..602acec9 100644 --- a/servers/nextjs/components/LLMSelection.tsx +++ b/servers/nextjs/components/LLMSelection.tsx @@ -26,6 +26,37 @@ import { import { IMAGE_PROVIDERS, LLM_PROVIDERS } from "@/utils/providerConstants"; import { LLMConfig } from "@/types/llm_config"; +const DALLE_3_QUALITY_OPTIONS = [ + { + label: "Standard", + value: "standard", + description: "Faster generation with lower cost", + }, + { + label: "HD", + value: "hd", + description: "Higher quality images with increased cost", + }, +]; + +const GPT_IMAGE_1_5_QUALITY_OPTIONS = [ + { + label: "Low", + value: "low", + description: "Fastest and most cost-effective", + }, + { + label: "Medium", + value: "medium", + description: "Balanced quality and speed", + }, + { + label: "High", + value: "high", + description: "Best quality with longer generation time", + }, +]; + // Button state interface interface ButtonState { isLoading: boolean; @@ -40,7 +71,9 @@ interface LLMProviderSelectionProps { initialLLMConfig: LLMConfig; onConfigChange: (config: LLMConfig) => void; buttonState: ButtonState; - setButtonState: (state: ButtonState | ((prev: ButtonState) => ButtonState)) => void; + setButtonState: ( + state: ButtonState | ((prev: ButtonState) => ButtonState) + ) => void; } export default function LLMProviderSelection({ @@ -71,29 +104,43 @@ export default function LLMProviderSelection({ const needsImageProviderApiKey = !llmConfig.DISABLE_IMAGE_GENERATION && - ( - (llmConfig.IMAGE_PROVIDER === "dall-e-3" && !llmConfig.OPENAI_API_KEY) || - (llmConfig.IMAGE_PROVIDER === "gemini_flash" && !llmConfig.GOOGLE_API_KEY) || - (llmConfig.IMAGE_PROVIDER === "nanobanana_pro" && !llmConfig.GOOGLE_API_KEY) || + ((llmConfig.IMAGE_PROVIDER === "dall-e-3" && !llmConfig.OPENAI_API_KEY) || + (llmConfig.IMAGE_PROVIDER === "gpt-image-1.5" && + !llmConfig.OPENAI_API_KEY) || + (llmConfig.IMAGE_PROVIDER === "gemini_flash" && + !llmConfig.GOOGLE_API_KEY) || + (llmConfig.IMAGE_PROVIDER === "nanobanana_pro" && + !llmConfig.GOOGLE_API_KEY) || (llmConfig.IMAGE_PROVIDER === "pexels" && !llmConfig.PEXELS_API_KEY) || - (llmConfig.IMAGE_PROVIDER === "pixabay" && !llmConfig.PIXABAY_API_KEY) - ); + (llmConfig.IMAGE_PROVIDER === "pixabay" && !llmConfig.PIXABAY_API_KEY)); const needsApiKey = needsProviderApiKey || needsImageProviderApiKey; - const needsOllamaUrl = (llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL); + const needsOllamaUrl = llmConfig.LLM === "ollama" && !llmConfig.OLLAMA_URL; - const needsComfyUIConfig = !llmConfig.DISABLE_IMAGE_GENERATION && + const needsComfyUIConfig = + !llmConfig.DISABLE_IMAGE_GENERATION && llmConfig.IMAGE_PROVIDER === "comfyui" && (!llmConfig.COMFYUI_URL || !llmConfig.COMFYUI_WORKFLOW); setButtonState({ isLoading: false, - isDisabled: needsModelSelection || needsApiKey || needsOllamaUrl || needsComfyUIConfig, - text: needsModelSelection ? "Please Select a Model" : needsApiKey ? "Please Enter API Key" : needsOllamaUrl ? "Please Enter Ollama URL" : needsComfyUIConfig ? "Please Configure ComfyUI" : "Save Configuration", - showProgress: false + isDisabled: + needsModelSelection || + needsApiKey || + needsOllamaUrl || + needsComfyUIConfig, + text: needsModelSelection + ? "Please Select a Model" + : needsApiKey + ? "Please Enter API Key" + : needsOllamaUrl + ? "Please Enter Ollama URL" + : needsComfyUIConfig + ? "Please Configure ComfyUI" + : "Save Configuration", + showProgress: false, }); - }, [llmConfig]); const input_field_changed = (new_value: string | boolean, field: string) => { @@ -101,6 +148,45 @@ export default function LLMProviderSelection({ setLlmConfig(updatedConfig); }; + const getApiKeyValue = (field?: string) => { + switch (field) { + case "OPENAI_API_KEY": + return llmConfig.OPENAI_API_KEY || ""; + case "GOOGLE_API_KEY": + return llmConfig.GOOGLE_API_KEY || ""; + case "ANTHROPIC_API_KEY": + return llmConfig.ANTHROPIC_API_KEY || ""; + case "PEXELS_API_KEY": + return llmConfig.PEXELS_API_KEY || ""; + case "PIXABAY_API_KEY": + return llmConfig.PIXABAY_API_KEY || ""; + default: + return ""; + } + }; + + const handleApiKeyInputChange = (field: string | undefined, value: string) => { + switch (field) { + case "OPENAI_API_KEY": + input_field_changed(value, "openai_api_key"); + break; + case "GOOGLE_API_KEY": + input_field_changed(value, "google_api_key"); + break; + case "ANTHROPIC_API_KEY": + input_field_changed(value, "anthropic_api_key"); + break; + case "PEXELS_API_KEY": + input_field_changed(value, "pexels_api_key"); + break; + case "PIXABAY_API_KEY": + input_field_changed(value, "pixabay_api_key"); + break; + default: + break; + } + }; + const handleProviderChange = (provider: string) => { const newConfig = changeProviderUtil(llmConfig, provider); setLlmConfig(newConfig); @@ -122,7 +208,7 @@ export default function LLMProviderSelection({ if (!prevConfig.DISABLE_IMAGE_GENERATION && !prevConfig.IMAGE_PROVIDER) { if (prevConfig.LLM === "openai") { - updates.IMAGE_PROVIDER = "dall-e-3"; + updates.IMAGE_PROVIDER = "gpt-image-1.5"; } else if (prevConfig.LLM === "google") { updates.IMAGE_PROVIDER = "gemini_flash"; } else { @@ -142,6 +228,104 @@ export default function LLMProviderSelection({ }); }, []); + useEffect(() => { + setLlmConfig((prevConfig) => { + const updates: Partial = {}; + + if ( + prevConfig.IMAGE_PROVIDER === "dall-e-3" && + !prevConfig.DALL_E_3_QUALITY + ) { + updates.DALL_E_3_QUALITY = "standard"; + } + + if ( + prevConfig.IMAGE_PROVIDER === "gpt-image-1.5" && + !prevConfig.GPT_IMAGE_1_5_QUALITY + ) { + updates.GPT_IMAGE_1_5_QUALITY = "medium"; + } + + if (Object.keys(updates).length === 0) { + return prevConfig; + } + + return { ...prevConfig, ...updates }; + }); + }, [llmConfig.IMAGE_PROVIDER]); + + const renderQualitySelector = () => { + if (llmConfig.IMAGE_PROVIDER === "dall-e-3") { + return ( +
+ +
+ {DALLE_3_QUALITY_OPTIONS.map((option) => ( + + ))} +
+
+ ); + } + + if (llmConfig.IMAGE_PROVIDER === "gpt-image-1.5") { + return ( +
+ +
+ {GPT_IMAGE_1_5_QUALITY_OPTIONS.map((option) => ( + + ))} +
+
+ ); + } + + return null; + }; + return (
{/* Provider Selection - Fixed Header */} @@ -161,7 +345,6 @@ export default function LLMProviderSelection({
- {/* Scrollable Content */}

- When enabled, slides will not include automatically generated images. + When enabled, slides will not include automatically generated + images.

@@ -267,8 +451,8 @@ export default function LLMProviderSelection({
{llmConfig.IMAGE_PROVIDER - ? IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER]?.label || - llmConfig.IMAGE_PROVIDER + ? IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER] + ?.label || llmConfig.IMAGE_PROVIDER : "Select image provider"}
@@ -326,6 +510,8 @@ export default function LLMProviderSelection({ + {renderQualitySelector()} + {/* Dynamic API Key Input for Image Provider */} {llmConfig.IMAGE_PROVIDER && IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER] && @@ -333,15 +519,31 @@ export default function LLMProviderSelection({ const provider = IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER]; // Show info message when using same API key as main provider - if (provider.value === "dall-e-3" && llmConfig.LLM === "openai") { + if ( + provider.value === "dall-e-3" && + llmConfig.LLM === "openai" + ) { return <>; } - if (provider.value === "gemini_flash" && llmConfig.LLM === "google") { + if ( + provider.value === "gpt-image-1.5" && + llmConfig.LLM === "openai" + ) { return <>; } - if (provider.value === "nanobanana_pro" && llmConfig.LLM === "google") { + if ( + provider.value === "gemini_flash" && + llmConfig.LLM === "google" + ) { + return <>; + } + + if ( + provider.value === "nanobanana_pro" && + llmConfig.LLM === "google" + ) { return <>; } @@ -360,13 +562,17 @@ export default function LLMProviderSelection({ className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors" value={llmConfig.COMFYUI_URL || ""} onChange={(e) => { - input_field_changed(e.target.value, "comfyui_url"); + input_field_changed( + e.target.value, + "comfyui_url" + ); }} />

- Use your machine IP address (not localhost) when running in Docker + Use your machine IP address (not localhost) when + running in Docker

@@ -380,13 +586,16 @@ export default function LLMProviderSelection({ rows={6} value={llmConfig.COMFYUI_WORKFLOW || ""} onChange={(e) => { - input_field_changed(e.target.value, "comfyui_workflow"); + input_field_changed( + e.target.value, + "comfyui_workflow" + ); }} />

- Export your workflow from ComfyUI using "Export (API)" and paste the JSON here. - + Export your workflow from ComfyUI using "Export + (API)" and paste the JSON here.

@@ -404,20 +613,13 @@ export default function LLMProviderSelection({ type="text" placeholder={`Enter your ${provider.apiKeyFieldLabel}`} className="w-full px-4 py-2.5 outline-none border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 transition-colors" - value={ - provider.apiKeyField === "PEXELS_API_KEY" - ? llmConfig.PEXELS_API_KEY || "" - : provider.apiKeyField === "PIXABAY_API_KEY" - ? llmConfig.PIXABAY_API_KEY || "" - : "" + value={getApiKeyValue(provider.apiKeyField)} + onChange={(e) => + handleApiKeyInputChange( + provider.apiKeyField, + e.target.value + ) } - onChange={(e) => { - if (provider.apiKeyField === "PEXELS_API_KEY") { - input_field_changed(e.target.value, "pexels_api_key"); - } else if (provider.apiKeyField === "PIXABAY_API_KEY") { - input_field_changed(e.target.value, "pixabay_api_key"); - } - }} />

@@ -443,14 +645,14 @@ export default function LLMProviderSelection({ {llmConfig.LLM === "ollama" ? llmConfig.OLLAMA_MODEL ?? "xxxxx" : llmConfig.LLM === "custom" - ? llmConfig.CUSTOM_MODEL ?? "xxxxx" - : llmConfig.LLM === "anthropic" - ? llmConfig.ANTHROPIC_MODEL ?? "xxxxx" - : llmConfig.LLM === "google" - ? llmConfig.GOOGLE_MODEL ?? "xxxxx" - : llmConfig.LLM === "openai" - ? llmConfig.OPENAI_MODEL ?? "xxxxx" - : "xxxxx"}{" "} + ? llmConfig.CUSTOM_MODEL ?? "xxxxx" + : llmConfig.LLM === "anthropic" + ? llmConfig.ANTHROPIC_MODEL ?? "xxxxx" + : llmConfig.LLM === "google" + ? llmConfig.GOOGLE_MODEL ?? "xxxxx" + : llmConfig.LLM === "openai" + ? llmConfig.OPENAI_MODEL ?? "xxxxx" + : "xxxxx"}{" "} for text generation{" "} {isImageGenerationDisabled ? ( "and image generation is disabled." @@ -458,7 +660,7 @@ export default function LLMProviderSelection({ <> and{" "} {llmConfig.IMAGE_PROVIDER && - IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER] + IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER] ? IMAGE_PROVIDERS[llmConfig.IMAGE_PROVIDER].label : "xxxxx"}{" "} for images @@ -468,8 +670,7 @@ export default function LLMProviderSelection({ - ); -} \ No newline at end of file +} diff --git a/servers/nextjs/types/llm_config.ts b/servers/nextjs/types/llm_config.ts index f748b7c4..10ec5dd1 100644 --- a/servers/nextjs/types/llm_config.ts +++ b/servers/nextjs/types/llm_config.ts @@ -32,6 +32,11 @@ export interface LLMConfig { COMFYUI_URL?: string; COMFYUI_WORKFLOW?: string; + // Dalle 3 Quality + DALL_E_3_QUALITY?: string; + // GPT Image 1.5 Quality + GPT_IMAGE_1_5_QUALITY?: string; + // Other Configs TOOL_CALLS?: boolean; DISABLE_THINKING?: boolean; diff --git a/servers/nextjs/utils/providerConstants.ts b/servers/nextjs/utils/providerConstants.ts index 349cacf1..c8ac5cda 100644 --- a/servers/nextjs/utils/providerConstants.ts +++ b/servers/nextjs/utils/providerConstants.ts @@ -32,7 +32,7 @@ export const IMAGE_PROVIDERS: Record = { icon: "/icons/pexels.png", requiresApiKey: true, apiKeyField: "PEXELS_API_KEY", - apiKeyFieldLabel: "Pexels API Key" + apiKeyFieldLabel: "Pexels API Key", }, pixabay: { value: "pixabay", @@ -41,16 +41,25 @@ export const IMAGE_PROVIDERS: Record = { icon: "/icons/pixabay.png", requiresApiKey: true, apiKeyField: "PIXABAY_API_KEY", - apiKeyFieldLabel: "Pixabay API Key" + apiKeyFieldLabel: "Pixabay API Key", }, "dall-e-3": { value: "dall-e-3", label: "DALL-E 3", - description: "OpenAI's latest image generation model", + description: "OpenAI's image generation model", icon: "/icons/dall-e.png", requiresApiKey: true, apiKeyField: "OPENAI_API_KEY", - apiKeyFieldLabel: "OpenAI API Key" + apiKeyFieldLabel: "OpenAI API Key", + }, + "gpt-image-1.5": { + value: "gpt-image-1.5", + label: "GPT Image 1.5", + description: "OpenAI's image generation model", + icon: "/icons/gpt.png", + requiresApiKey: true, + apiKeyField: "OPENAI_API_KEY", + apiKeyFieldLabel: "OpenAI API Key", }, gemini_flash: { value: "gemini_flash", @@ -59,16 +68,16 @@ export const IMAGE_PROVIDERS: Record = { icon: "/icons/google.png", requiresApiKey: true, apiKeyField: "GOOGLE_API_KEY", - apiKeyFieldLabel: "Google API Key" + apiKeyFieldLabel: "Google API Key", }, nanobanana_pro: { value: "nanobanana_pro", label: "NanoBanana Pro", - description: "Google's advanced image generation (4K, better quality)", + description: "Google's advanced image generation model", icon: "/icons/google.png", requiresApiKey: true, apiKeyField: "GOOGLE_API_KEY", - apiKeyFieldLabel: "Google API Key" + apiKeyFieldLabel: "Google API Key", }, comfyui: { value: "comfyui", @@ -77,7 +86,7 @@ export const IMAGE_PROVIDERS: Record = { icon: "/icons/comfyui.png", requiresApiKey: false, apiKeyField: "COMFYUI_URL", - apiKeyFieldLabel: "ComfyUI Server URL" + apiKeyFieldLabel: "ComfyUI Server URL", }, }; @@ -107,4 +116,4 @@ export const LLM_PROVIDERS: Record = { label: "Custom", description: "Custom LLM", }, -}; \ No newline at end of file +}; diff --git a/servers/nextjs/utils/providerUtils.ts b/servers/nextjs/utils/providerUtils.ts index 25266a71..a04d105f 100644 --- a/servers/nextjs/utils/providerUtils.ts +++ b/servers/nextjs/utils/providerUtils.ts @@ -50,6 +50,8 @@ export const updateLLMConfig = ( web_grounding: "WEB_GROUNDING", comfyui_url: "COMFYUI_URL", comfyui_workflow: "COMFYUI_WORKFLOW", + dall_e_3_quality: "DALL_E_3_QUALITY", + gpt_image_1_5_quality: "GPT_IMAGE_1_5_QUALITY", }; const configKey = fieldMappings[field]; @@ -71,7 +73,7 @@ export const changeProvider = ( // Auto Select appropriate image provider based on the text models if (provider === "openai") { - newConfig.IMAGE_PROVIDER = "dall-e-3"; + newConfig.IMAGE_PROVIDER = "gpt-image-1.5"; } else if (provider === "google") { newConfig.IMAGE_PROVIDER = "gemini_flash"; } else { diff --git a/servers/nextjs/utils/storeHelpers.ts b/servers/nextjs/utils/storeHelpers.ts index 9a0a00f9..f7935df3 100644 --- a/servers/nextjs/utils/storeHelpers.ts +++ b/servers/nextjs/utils/storeHelpers.ts @@ -16,7 +16,8 @@ export const handleSaveLLMConfig = async (llmConfig: LLMConfig) => { export const hasValidLLMConfig = (llmConfig: LLMConfig) => { if (!llmConfig.LLM) return false; - if (!llmConfig.DISABLE_IMAGE_GENERATION && !llmConfig.IMAGE_PROVIDER) return false; + if (!llmConfig.DISABLE_IMAGE_GENERATION && !llmConfig.IMAGE_PROVIDER) + return false; const isOpenAIConfigValid = llmConfig.OPENAI_MODEL !== "" && @@ -71,6 +72,8 @@ export const hasValidLLMConfig = (llmConfig: LLMConfig) => { return llmConfig.PIXABAY_API_KEY && llmConfig.PIXABAY_API_KEY !== ""; case "dall-e-3": return llmConfig.OPENAI_API_KEY && llmConfig.OPENAI_API_KEY !== ""; + case "gpt-image-1.5": + return llmConfig.OPENAI_API_KEY && llmConfig.OPENAI_API_KEY !== ""; case "gemini_flash": return llmConfig.GOOGLE_API_KEY && llmConfig.GOOGLE_API_KEY !== ""; case "nanobanana_pro": @@ -86,14 +89,14 @@ export const hasValidLLMConfig = (llmConfig: LLMConfig) => { llmConfig.LLM === "openai" ? isOpenAIConfigValid : llmConfig.LLM === "google" - ? isGoogleConfigValid - : llmConfig.LLM === "anthropic" - ? isAnthropicConfigValid - : llmConfig.LLM === "ollama" - ? isOllamaConfigValid - : llmConfig.LLM === "custom" - ? isCustomConfigValid - : false; + ? isGoogleConfigValid + : llmConfig.LLM === "anthropic" + ? isAnthropicConfigValid + : llmConfig.LLM === "ollama" + ? isOllamaConfigValid + : llmConfig.LLM === "custom" + ? isCustomConfigValid + : false; return isLLMConfigValid && isImageConfigValid(); }; diff --git a/start.js b/start.js index f597373d..df58e353 100644 --- a/start.js +++ b/start.js @@ -97,7 +97,12 @@ const setupUserConfigFromEnv = () => { WEB_GROUNDING: process.env.WEB_GROUNDING || existingConfig.WEB_GROUNDING, USE_CUSTOM_URL: process.env.USE_CUSTOM_URL || existingConfig.USE_CUSTOM_URL, COMFYUI_URL: process.env.COMFYUI_URL || existingConfig.COMFYUI_URL, - COMFYUI_WORKFLOW: process.env.COMFYUI_WORKFLOW || existingConfig.COMFYUI_WORKFLOW, + COMFYUI_WORKFLOW: + process.env.COMFYUI_WORKFLOW || existingConfig.COMFYUI_WORKFLOW, + DALL_E_3_QUALITY: + process.env.DALL_E_3_QUALITY || existingConfig.DALL_E_3_QUALITY, + GPT_IMAGE_1_5_QUALITY: + process.env.GPT_IMAGE_1_5_QUALITY || existingConfig.GPT_IMAGE_1_5_QUALITY, }; writeFileSync(userConfigPath, JSON.stringify(userConfig));