update: use gemini for slide to html and edit

This commit is contained in:
Suraj Jha 2025-08-06 13:26:45 +05:45
parent d9a46fdff3
commit cced7b9423
2 changed files with 195 additions and 235 deletions

File diff suppressed because one or more lines are too long

View file

@ -4,7 +4,8 @@ from datetime import datetime
from typing import Optional, List, Dict
from fastapi import APIRouter, HTTPException, File, UploadFile, Form, Depends
from pydantic import BaseModel
import anthropic
from google import genai
from google.genai import types
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, delete, func
from utils.asset_directory_utils import get_images_directory
@ -94,13 +95,13 @@ class ErrorResponse(BaseModel):
async def generate_html_from_slide(base64_image: str, media_type: str, xml_content: str, api_key: str) -> str:
"""
Generate HTML content from slide image and XML using Anthropic Claude API.
Generate HTML content from slide image and XML using Google Gen AI API.
Args:
base64_image: Base64 encoded image data
media_type: MIME type of the image (e.g., 'image/png')
xml_content: OXML content as text
api_key: Anthropic API key
api_key: Google Gen AI API key
Returns:
Generated HTML content as string
@ -109,100 +110,85 @@ async def generate_html_from_slide(base64_image: str, media_type: str, xml_conte
HTTPException: If API call fails or no content is generated
"""
try:
# Initialize Anthropic client
client = anthropic.Anthropic(api_key=api_key)
# Initialize Google Gen AI client
client = genai.Client(api_key=api_key)
# Convert base64 to bytes
image_bytes = base64.b64decode(base64_image)
# Use streaming to handle long requests
print("Starting streaming request to Claude for HTML generation...")
print("Starting streaming request to Google Gen AI for HTML generation...")
html_content = ""
thinking_content = ""
with client.messages.stream(
model="claude-sonnet-4-20250514",
max_tokens=64000,
temperature=1,
system=GENERATE_HTML_SYSTEM_PROMPT,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": base64_image
}
},
{
"type": "text",
"text": f"\nOXML: \n\n{xml_content}"
}
]
}
],
thinking={
"type": "enabled",
"budget_tokens": 55000
}
) as stream:
print("Streaming started, collecting HTML response...")
# Collect all streamed content
for event in stream:
if event.type == "content_block_delta":
if event.delta.type == "thinking_delta":
thinking_content += event.delta.thinking
print(f"[HTML THINKING] {event.delta.thinking}", end="", flush=True)
elif event.delta.type == "text_delta":
html_content += event.delta.text
print(f"[HTML] {event.delta.text}", end="", flush=True)
elif event.type == "content_block_start":
if hasattr(event.content_block, 'type'):
print(f"\n[HTML BLOCK START] {event.content_block.type}")
elif event.type == "content_block_stop":
print(f"\n[HTML BLOCK STOP] Index: {event.index}")
elif event.type == "message_start":
print("[HTML MESSAGE START]")
elif event.type == "message_stop":
print("\n[HTML MESSAGE STOP] - Streaming complete")
# Create content with image and text
contents = [
types.Part.from_bytes(
mime_type=media_type,
data=image_bytes,
),
types.Part.from_text(text=f"\nOXML: \n\n{xml_content}"),
]
# Generate content config with thinking enabled
generate_content_config = types.GenerateContentConfig(
system_instruction=GENERATE_HTML_SYSTEM_PROMPT,
max_output_tokens=65536,
temperature=1.0,
thinking_config=types.ThinkingConfig(
thinking_budget=32768,
),
)
print("Streaming started, collecting HTML response...")
# Stream the response
for chunk in client.models.generate_content_stream(
model="gemini-2.5-pro",
contents=contents,
config=generate_content_config,
):
if chunk.text:
html_content += chunk.text
print(f"[HTML] {chunk.text}", end="", flush=True)
print(f"\nCollected HTML content length: {len(html_content)}")
print(f"Collected HTML thinking content length: {len(thinking_content)}")
if not html_content:
raise HTTPException(
status_code=500,
detail="No HTML content generated by Claude API"
detail="No HTML content generated by Google Gen AI API"
)
return html_content
except anthropic.APITimeoutError as e:
raise HTTPException(
status_code=408,
detail=f"Claude API timeout during HTML streaming: {str(e)}"
)
except anthropic.APIConnectionError as e:
raise HTTPException(
status_code=503,
detail=f"Claude API connection error during HTML streaming: {str(e)}"
)
except anthropic.APIError as e:
raise HTTPException(
status_code=500,
detail=f"Anthropic API error during HTML generation: {str(e)}"
)
except Exception as e:
# Handle various API errors
error_msg = str(e)
if "timeout" in error_msg.lower():
raise HTTPException(
status_code=408,
detail=f"Google Gen AI API timeout during HTML streaming: {error_msg}"
)
elif "connection" in error_msg.lower():
raise HTTPException(
status_code=503,
detail=f"Google Gen AI API connection error during HTML streaming: {error_msg}"
)
else:
raise HTTPException(
status_code=500,
detail=f"Google Gen AI API error during HTML generation: {error_msg}"
)
async def generate_react_component_from_html(html_content: str, api_key: str) -> str:
"""
Convert HTML content to TSX React component using Anthropic Claude API.
Convert HTML content to TSX React component using Google Gen AI API.
Args:
html_content: Generated HTML content
api_key: Anthropic API key
api_key: Google Gen AI API key
Returns:
Generated TSX React component code as string
@ -211,63 +197,44 @@ async def generate_react_component_from_html(html_content: str, api_key: str) ->
HTTPException: If API call fails or no content is generated
"""
try:
# Initialize Anthropic client
client = anthropic.Anthropic(api_key=api_key)
# Initialize Google Gen AI client
client = genai.Client(api_key=api_key)
print("Starting streaming request to Claude for React component generation...")
print("Starting streaming request to Google Gen AI for React component generation...")
react_content = ""
thinking_content = ""
with client.messages.stream(
model="claude-sonnet-4-20250514",
max_tokens=64000,
temperature=1,
system=HTML_TO_REACT_SYSTEM_PROMPT,
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": html_content
}
]
}
],
thinking={
"type": "enabled",
"budget_tokens": 25000
}
) as stream:
print("Streaming started, collecting React component response...")
# Collect all streamed content
for event in stream:
if event.type == "content_block_delta":
if event.delta.type == "thinking_delta":
thinking_content += event.delta.thinking
print(f"[REACT THINKING] {event.delta.thinking}", end="", flush=True)
elif event.delta.type == "text_delta":
react_content += event.delta.text
print(f"[REACT] {event.delta.text}", end="", flush=True)
elif event.type == "content_block_start":
if hasattr(event.content_block, 'type'):
print(f"\n[REACT BLOCK START] {event.content_block.type}")
elif event.type == "content_block_stop":
print(f"\n[REACT BLOCK STOP] Index: {event.index}")
elif event.type == "message_start":
print("[REACT MESSAGE START]")
elif event.type == "message_stop":
print("\n[REACT MESSAGE STOP] - Streaming complete")
# Create content with text
contents = types.Part.from_text(text=html_content)
# Generate content config with thinking enabled
generate_content_config = types.GenerateContentConfig(
system_instruction=HTML_TO_REACT_SYSTEM_PROMPT,
max_output_tokens=65536,
temperature=1.0,
thinking_config=types.ThinkingConfig(
thinking_budget=15000,
),
)
print("Streaming started, collecting React component response...")
# Stream the response
for chunk in client.models.generate_content_stream(
model="gemini-2.5-pro",
contents=contents,
config=generate_content_config,
):
if chunk.text:
react_content += chunk.text
print(f"[REACT] {chunk.text}", end="", flush=True)
print(f"\nCollected React content length: {len(react_content)}")
print(f"Collected React thinking content length: {len(thinking_content)}")
if not react_content:
raise HTTPException(
status_code=500,
detail="No React component generated by Claude API"
detail="No React component generated by Google Gen AI API"
)
# Filter out lines that start with import or export
@ -282,26 +249,29 @@ async def generate_react_component_from_html(html_content: str, api_key: str) ->
return filtered_react_content
except anthropic.APITimeoutError as e:
raise HTTPException(
status_code=408,
detail=f"Claude API timeout during React generation: {str(e)}"
)
except anthropic.APIConnectionError as e:
raise HTTPException(
status_code=503,
detail=f"Claude API connection error during React generation: {str(e)}"
)
except anthropic.APIError as e:
raise HTTPException(
status_code=500,
detail=f"Anthropic API error during React generation: {str(e)}"
)
except Exception as e:
# Handle various API errors
error_msg = str(e)
if "timeout" in error_msg.lower():
raise HTTPException(
status_code=408,
detail=f"Google Gen AI API timeout during React generation: {error_msg}"
)
elif "connection" in error_msg.lower():
raise HTTPException(
status_code=503,
detail=f"Google Gen AI API connection error during React generation: {error_msg}"
)
else:
raise HTTPException(
status_code=500,
detail=f"Google Gen AI API error during React generation: {error_msg}"
)
async def edit_html_with_images(current_ui_base64: str, sketch_base64: Optional[str], media_type: str, html_content: str, prompt: str, api_key: str) -> str:
"""
Edit HTML content based on one or two images and a text prompt using Anthropic Claude API.
Edit HTML content based on one or two images and a text prompt using Google Gen AI API.
Args:
current_ui_base64: Base64 encoded current UI image data
@ -309,7 +279,7 @@ async def edit_html_with_images(current_ui_base64: str, sketch_base64: Optional[
media_type: MIME type of the images (e.g., 'image/png')
html_content: Current HTML content to edit
prompt: Text prompt describing the changes
api_key: Anthropic API key
api_key: Google Gen AI API key
Returns:
Edited HTML content as string
@ -318,104 +288,85 @@ async def edit_html_with_images(current_ui_base64: str, sketch_base64: Optional[
HTTPException: If API call fails or no content is generated
"""
try:
# Initialize Anthropic client
client = anthropic.Anthropic(api_key=api_key)
# Initialize Google Gen AI client
client = genai.Client(api_key=api_key)
print("Starting streaming request to Claude for HTML editing...")
print("Starting streaming request to Google Gen AI for HTML editing...")
edited_html = ""
thinking_content = ""
# Convert base64 images to bytes
current_ui_bytes = base64.b64decode(current_ui_base64)
# Build content array - always include text and current UI image
content = [
{
"type": "text",
"text": f"Current HTML to edit:\n\n{html_content}\n\nText prompt for changes: {prompt}"
},
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": current_ui_base64
}
}
contents = [
types.Part.from_text(text=f"Current HTML to edit:\n\n{html_content}\n\nText prompt for changes: {prompt}"),
types.Part.from_bytes(
mime_type=media_type,
data=current_ui_bytes,
)
]
# Only add sketch image if provided
if sketch_base64:
content.append({
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": sketch_base64
}
})
sketch_bytes = base64.b64decode(sketch_base64)
contents.append(
types.Part.from_bytes(
mime_type=media_type,
data=sketch_bytes,
)
)
with client.messages.stream(
model="claude-sonnet-4-20250514",
max_tokens=64000,
temperature=1,
system=HTML_EDIT_SYSTEM_PROMPT,
messages=[
{
"role": "user",
"content": content
}
],
thinking={
"type": "enabled",
"budget_tokens": 16000
}
) as stream:
print("Streaming started, collecting edited HTML response...")
# Collect all streamed content
for event in stream:
if event.type == "content_block_delta":
if event.delta.type == "thinking_delta":
thinking_content += event.delta.thinking
print(f"[HTML EDIT THINKING] {event.delta.thinking}", end="", flush=True)
elif event.delta.type == "text_delta":
edited_html += event.delta.text
print(f"[HTML EDIT] {event.delta.text}", end="", flush=True)
elif event.type == "content_block_start":
if hasattr(event.content_block, 'type'):
print(f"\n[HTML EDIT BLOCK START] {event.content_block.type}")
elif event.type == "content_block_stop":
print(f"\n[HTML EDIT BLOCK STOP] Index: {event.index}")
elif event.type == "message_start":
print("[HTML EDIT MESSAGE START]")
elif event.type == "message_stop":
print("\n[HTML EDIT MESSAGE STOP] - Streaming complete")
# Generate content config with thinking enabled
generate_content_config = types.GenerateContentConfig(
system_instruction=HTML_EDIT_SYSTEM_PROMPT,
max_output_tokens=65536,
temperature=1.0,
thinking_config=types.ThinkingConfig(
thinking_budget=16000,
),
)
print("Streaming started, collecting edited HTML response...")
# Stream the response
for chunk in client.models.generate_content_stream(
model="gemini-2.5-pro",
contents=contents,
config=generate_content_config,
):
if chunk.text:
edited_html += chunk.text
print(f"[HTML EDIT] {chunk.text}", end="", flush=True)
print(f"\nCollected edited HTML content length: {len(edited_html)}")
print(f"Collected HTML edit thinking content length: {len(thinking_content)}")
if not edited_html:
raise HTTPException(
status_code=500,
detail="No edited HTML content generated by Claude API"
detail="No edited HTML content generated by Google Gen AI API"
)
return edited_html
except anthropic.APITimeoutError as e:
raise HTTPException(
status_code=408,
detail=f"Claude API timeout during HTML editing: {str(e)}"
)
except anthropic.APIConnectionError as e:
raise HTTPException(
status_code=503,
detail=f"Claude API connection error during HTML editing: {str(e)}"
)
except anthropic.APIError as e:
raise HTTPException(
status_code=500,
detail=f"Anthropic API error during HTML editing: {str(e)}"
)
except Exception as e:
# Handle various API errors
error_msg = str(e)
if "timeout" in error_msg.lower():
raise HTTPException(
status_code=408,
detail=f"Google Gen AI API timeout during HTML editing: {error_msg}"
)
elif "connection" in error_msg.lower():
raise HTTPException(
status_code=503,
detail=f"Google Gen AI API connection error during HTML editing: {error_msg}"
)
else:
raise HTTPException(
status_code=500,
detail=f"Google Gen AI API error during HTML editing: {error_msg}"
)
# ENDPOINT 1: Slide to HTML conversion
@ -431,12 +382,12 @@ async def convert_slide_to_html(request: SlideToHtmlRequest):
SlideToHtmlResponse with generated HTML
"""
try:
# Get Anthropic API key from environment
api_key = os.getenv("ANTHROPIC_API_KEY")
# Get Google Gen AI API key from environment
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise HTTPException(
status_code=500,
detail="ANTHROPIC_API_KEY environment variable not set"
detail="GOOGLE_API_KEY environment variable not set"
)
# Resolve image path to actual file system path
@ -521,12 +472,12 @@ async def convert_html_to_react(request: HtmlToReactRequest):
HtmlToReactResponse with generated React component
"""
try:
# Get Anthropic API key from environment
api_key = os.getenv("ANTHROPIC_API_KEY")
# Get Google Gen AI API key from environment
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise HTTPException(
status_code=500,
detail="ANTHROPIC_API_KEY environment variable not set"
detail="GOOGLE_API_KEY environment variable not set"
)
# Validate HTML content
@ -583,12 +534,12 @@ async def edit_html_with_images_endpoint(
HtmlEditResponse with edited HTML
"""
try:
# Get Anthropic API key from environment
api_key = os.getenv("ANTHROPIC_API_KEY")
# Get Google Gen AI API key from environment
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise HTTPException(
status_code=500,
detail="ANTHROPIC_API_KEY environment variable not set"
detail="GOOGLE_API_KEY environment variable not set"
)
# Validate inputs