From e8670626bca7be44ebbbd70a622fca0ce914a146 Mon Sep 17 00:00:00 2001 From: Suraj Jha Date: Thu, 31 Jul 2025 23:46:57 +0545 Subject: [PATCH] fix: html edit with single image, add schemas in tsx export --- .../api/v1/ppt/endpoints/slide_to_html.py | 114 ++++++++++++------ 1 file changed, 76 insertions(+), 38 deletions(-) diff --git a/servers/fastapi/api/v1/ppt/endpoints/slide_to_html.py b/servers/fastapi/api/v1/ppt/endpoints/slide_to_html.py index c8bd8a8a..ce155996 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/slide_to_html.py +++ b/servers/fastapi/api/v1/ppt/endpoints/slide_to_html.py @@ -66,13 +66,36 @@ Convert given static HTML and Tailwind slide to a TSX React component so that it 4) For image and icons icons should be a different schema with two dunder fields for prompt and url separately. 5) Default value for schema fields should be populated with the respective static value in HTML input. 6) In schema max and min value for characters in string and items in array should be specified as per the given image of the slide. You should accurately evaluate the maximum and minimum possible characters respective fields can handle visually through the image. +7) For image and icons schema should be compulsorily declared with two dunder fields for prompt and url separately. +8) Layout Id, layout name and layout description should be declared and should describe the structure of the layout not its purpose. Do not describe numbers of any items in the layout. + -Description should not have any purpose for elements in it, so use 'cards' instead of 'goal cards' and 'bullet points' instead of 'solution bullet points'. + -layoutName constant should be same as the component name in the layout. + -Layout Id examples: header-description-bullet-points-slide, header-description-image-slide + -Layout Name examples: HeaderDescriptionBulletPointsLayout, HeaderDescriptionImageLayout + -Layout Description examples: A slide with a header, description, and bullet points and A slide with a header, description, and image For example: Input:

Effects of Global Warming

global warming effects on earth

Global warming triggers a cascade of effects on our planet. These changes impact everything from our oceans to our ecosystems.

sea level rising icon

Rising Sea Levels

Rising sea levels threaten coastal communities and ecosystems due to melting glaciers and thermal expansion.

heatwave icon

Intense Heatwaves

Heatwaves are becoming more frequent and intense, posing significant risks to human health and agriculture.

precipitation changes icon

Changes in Precipitation

Altered precipitation patterns lead to increased droughts in some regions and severe flooding in others, affecting water resources.

Output: import React from 'react' import * as z from "zod"; -import { ImageSchema, IconSchema } from 'defaultSchemes'; +const ImageSchema = z.object({ + __image_url__: z.url().meta({ + description: "URL to image", + }), + __image_prompt__: z.string().meta({ + description: "Prompt used to generate the image", + }).min(10).max(50), +}) + +const IconSchema = z.object({ + __icon_url__: z.string().meta({ + description: "URL to icon", + }), + __icon_query__: z.string().meta({ + description: "Query used to search the icon", + }).min(5).max(20), +}) export const layoutId = 'bullet-with-icons-slide' export const layoutName = 'Bullet with Icons' export const layoutDescription = 'A bullets style slide with main content, supporting image, and bullet points with icons and descriptions.' @@ -236,7 +259,7 @@ export default BulletWithIconsSlideLayout """ HTML_EDIT_SYSTEM_PROMPT = """ -You need to edit given html with respect to the indication and sketch in the given UI. You'll be given the code for current UI which is in presentation size, along with its visualization in image form. Over that you'll also be given another image which has indications of what might change in form of sketch in the UI. You will have to return the edited html with tailwind with the changes as indicated on the image and through prompt. Make sure you think through the design before making the change and also make sure you don't change the non-indicated part. Try to follow the design style of current content for generated content. Only give out code and nothing else. +You need to edit given html with respect to the indication and sketch in the given UI. You'll be given the code for current UI which is in presentation size, along with its visualization in image form. Over that you'll also be given another image which has indications of what might change in form of sketch in the UI. You will have to return the edited html with tailwind with the changes as indicated on the image and through prompt. Make sure you think through the design before making the change and also make sure you don't change the non-indicated part. Try to follow the design style of current content for generated content. If sketch image is not provided, then you need to edit the html with respect to the prompt. Only give out code and nothing else. """ @@ -292,7 +315,7 @@ async def generate_html_from_slide(base64_image: str, media_type: str, xml_conte ], thinking={ "type": "enabled", - "budget_tokens": 50000 + "budget_tokens": 55000 } ) as stream: print("Streaming started, collecting HTML response...") @@ -437,13 +460,13 @@ async def generate_react_component_from_html(html_content: str, api_key: str) -> ) -async def edit_html_with_images(current_ui_base64: str, sketch_base64: str, media_type: str, html_content: str, prompt: str, api_key: str) -> str: +async def edit_html_with_images(current_ui_base64: str, sketch_base64: Optional[str], media_type: str, html_content: str, prompt: str, api_key: str) -> str: """ - Edit HTML content based on two images and a text prompt using Anthropic Claude API. - + Edit HTML content based on one or two images and a text prompt using Anthropic Claude API. + Args: current_ui_base64: Base64 encoded current UI image data - sketch_base64: Base64 encoded sketch/indication image data + sketch_base64: Base64 encoded sketch/indication image data (optional) media_type: MIME type of the images (e.g., 'image/png') html_content: Current HTML content to edit prompt: Text prompt describing the changes @@ -464,6 +487,33 @@ async def edit_html_with_images(current_ui_base64: str, sketch_base64: str, medi edited_html = "" thinking_content = "" + # Build content array - always include text and current UI image + content = [ + { + "type": "text", + "text": f"Current HTML to edit:\n\n{html_content}\n\nText prompt for changes: {prompt}" + }, + { + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": current_ui_base64 + } + } + ] + + # Only add sketch image if provided + if sketch_base64: + content.append({ + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": sketch_base64 + } + }) + with client.messages.stream( model="claude-sonnet-4-20250514", max_tokens=64000, @@ -472,28 +522,7 @@ async def edit_html_with_images(current_ui_base64: str, sketch_base64: str, medi messages=[ { "role": "user", - "content": [ - { - "type": "text", - "text": f"Current HTML to edit:\n\n{html_content}\n\nText prompt for changes: {prompt}" - }, - { - "type": "image", - "source": { - "type": "base64", - "media_type": media_type, - "data": current_ui_base64 - } - }, - { - "type": "image", - "source": { - "type": "base64", - "media_type": media_type, - "data": sketch_base64 - } - } - ] + "content": content } ], thinking={ @@ -620,6 +649,8 @@ async def convert_slide_to_html(request: SlideToHtmlRequest): xml_content=request.xml, api_key=api_key ) + + html_content = html_content.replace("```html", "").replace("```", "") return SlideToHtmlResponse( success=True, @@ -651,7 +682,6 @@ async def convert_html_to_react(request: HtmlToReactRequest): Returns: HtmlToReactResponse with generated React component """ - print("reachedhere") try: # Get Anthropic API key from environment api_key = os.getenv("ANTHROPIC_API_KEY") @@ -673,6 +703,8 @@ async def convert_html_to_react(request: HtmlToReactRequest): html_content=request.html, api_key=api_key ) + + react_component = react_component.replace("```tsx", "").replace("```", "") return HtmlToReactResponse( success=True, @@ -696,16 +728,16 @@ async def convert_html_to_react(request: HtmlToReactRequest): @HTML_EDIT_ROUTER.post("/", response_model=HtmlEditResponse) async def edit_html_with_images_endpoint( current_ui_image: UploadFile = File(..., description="Current UI image file"), - sketch_image: UploadFile = File(..., description="Sketch/indication image file"), + sketch_image: Optional[UploadFile] = File(None, description="Sketch/indication image file (optional)"), html: str = Form(..., description="Current HTML content to edit"), prompt: str = Form(..., description="Text prompt describing the changes") ): """ - Edit HTML content based on two uploaded images and a text prompt using Anthropic Claude API. + Edit HTML content based on one or two uploaded images and a text prompt using Anthropic Claude API. Args: current_ui_image: Uploaded current UI image file - sketch_image: Uploaded sketch/indication image file + sketch_image: Uploaded sketch/indication image file (optional) html: Current HTML content to edit (form data) prompt: Text prompt describing the changes (form data) @@ -734,25 +766,29 @@ async def edit_html_with_images_endpoint( detail="Text prompt cannot be empty" ) - # Validate image files + # Validate current UI image file if not current_ui_image.content_type or not current_ui_image.content_type.startswith("image/"): raise HTTPException( status_code=400, detail="Current UI file must be an image" ) - if not sketch_image.content_type or not sketch_image.content_type.startswith("image/"): + # Validate sketch image file only if provided + if sketch_image and (not sketch_image.content_type or not sketch_image.content_type.startswith("image/")): raise HTTPException( status_code=400, detail="Sketch file must be an image" ) - # Read and encode both images to base64 + # Read and encode current UI image to base64 current_ui_content = await current_ui_image.read() current_ui_base64 = base64.b64encode(current_ui_content).decode('utf-8') - sketch_content = await sketch_image.read() - sketch_base64 = base64.b64encode(sketch_content).decode('utf-8') + # Read and encode sketch image to base64 only if provided + sketch_base64 = None + if sketch_image: + sketch_content = await sketch_image.read() + sketch_base64 = base64.b64encode(sketch_content).decode('utf-8') # Use the content type from the uploaded files media_type = current_ui_image.content_type @@ -766,6 +802,8 @@ async def edit_html_with_images_endpoint( prompt=prompt, api_key=api_key ) + + edited_html = edited_html.replace("```html", "").replace("```", "") return HtmlEditResponse( success=True,