From e8670626bca7be44ebbbd70a622fca0ce914a146 Mon Sep 17 00:00:00 2001
From: Suraj Jha <getsurajjha@gmail.com>
Date: Thu, 31 Jul 2025 23:46:57 +0545
Subject: [PATCH] fix: html edit with single image, add schemas in tsx export

---
 .../api/v1/ppt/endpoints/slide_to_html.py     | 114 ++++++++++++------
 1 file changed, 76 insertions(+), 38 deletions(-)
diff --git a/servers/fastapi/api/v1/ppt/endpoints/slide_to_html.py b/servers/fastapi/api/v1/ppt/endpoints/slide_to_html.py
index c8bd8a8a..ce155996 100644
--- a/servers/fastapi/api/v1/ppt/endpoints/slide_to_html.py
+++ b/servers/fastapi/api/v1/ppt/endpoints/slide_to_html.py
@@ -66,13 +66,36 @@ Convert given static HTML and Tailwind slide to a TSX React component so that it
 4) For image and icons icons should be a different schema with two dunder fields for prompt and url separately.
 5) Default value for schema fields should be populated with the respective static value in HTML input.
 6) In schema max and min value for characters in string and items in array should be specified as per the given image of the slide. You should accurately evaluate the maximum and minimum possible characters respective fields can handle visually through the image.
+7) For image and icons schema should be compulsorily declared with two dunder fields for prompt and url separately.
+8) Layout Id, layout name and layout description should be declared and should describe the structure of the layout not its purpose. Do not describe numbers of any items in the layout.
+    -Description should not have any purpose for elements in it, so use 'cards' instead of 'goal cards' and 'bullet points' instead of 'solution bullet points'.
+    -layoutName constant should be same as the component name in the layout.
+    -Layout Id examples: header-description-bullet-points-slide, header-description-image-slide
+    -Layout Name examples: HeaderDescriptionBulletPointsLayout, HeaderDescriptionImageLayout
+    -Layout Description examples: A slide with a header, description, and bullet points and A slide with a header, description, and image
 
 For example: 
 Input: <div class="w-full rounded-sm max-w-[1280px] shadow-lg max-h-[720px] aspect-video bg-gradient-to-br from-gray-50 to-white relative z-20 mx-auto overflow-hidden" style="font-family: Poppins, sans-serif;"><div class="flex flex-col h-full px-8 sm:px-12 lg:px-20 pt-8 pb-8"><div class="mb-8"><div class="text-4xl sm:text-5xl lg:text-6xl font-bold text-gray-900" style="font-size: 60px; font-weight: 700; font-family: Poppins, sans-serif; color: rgb(17, 24, 39); line-height: 60px; text-align: start; margin: 0px; padding: 0px; border-radius: 0px; border: 0px solid rgb(229, 231, 235); background-color: rgba(0, 0, 0, 0); opacity: 1; box-shadow: none; text-shadow: none; text-decoration: none solid rgb(17, 24, 39); text-transform: none; letter-spacing: normal; word-spacing: 0px; text-overflow: clip; white-space: normal; word-break: normal; overflow: visible;"><div class="tiptap-text-editor w-full" style="line-height: inherit; font-size: inherit; font-weight: inherit; font-family: inherit; color: inherit; text-align: inherit;"><div contenteditable="true" data-placeholder="Enter text..." translate="no" class="tiptap ProseMirror outline-none focus:outline-none transition-all duration-200" tabindex="0"><p>Effects of Global Warming</p></div></div></div></div><div class="flex flex-1"><div class="flex-1 relative"><div class="absolute top-0 left-0 w-full h-full"><svg class="w-full h-full opacity-30" viewBox="0 0 200 200"><defs><pattern id="grid" width="20" height="20" patternUnits="userSpaceOnUse"><path d="M 20 0 L 0 0 0 20" fill="none" stroke="#8b5cf6" stroke-width="0.5"></path></pattern></defs><rect width="100%" height="100%" fill="url(#grid)"></rect></svg></div><div class="relative z-10 h-full flex items-center justify-center p-4"><div class="w-full max-w-md h-80 rounded-2xl overflow-hidden shadow-lg"><img src="/app_data/images/08b1c132-84e0-4d04-8082-6f34330817ef.jpg" alt="global warming effects on earth" class="w-full h-full object-cover" data-editable-processed="true" data-editable-id="2-image-image-0" style="cursor: pointer; transition: opacity 0.2s, transform 0.2s;"></div></div><div class="absolute top-20 right-8 text-purple-600"><svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0l3.09 6.26L22 9l-6.91 2.74L12 18l-3.09-6.26L2 9l6.91-2.74L12 0z"></path></svg></div></div><div class="flex-1 flex flex-col justify-center pl-8 lg:pl-16"><div class="text-lg text-gray-700 leading-relaxed mb-8" style="font-size: 18px; font-weight: 400; font-family: Poppins, sans-serif; color: rgb(55, 65, 81); line-height: 29.25px; text-align: start; margin: 0px 0px 32px; padding: 0px; border-radius: 0px; border: 0px solid rgb(229, 231, 235); background-color: rgba(0, 0, 0, 0); opacity: 1; box-shadow: none; text-shadow: none; text-decoration: none solid rgb(55, 65, 81); text-transform: none; letter-spacing: normal; word-spacing: 0px; text-overflow: clip; white-space: normal; word-break: normal; overflow: visible;"><div class="tiptap-text-editor w-full" style="line-height: inherit; font-size: inherit; font-weight: inherit; font-family: inherit; color: inherit; text-align: inherit;"><div contenteditable="true" data-placeholder="Enter text..." translate="no" class="tiptap ProseMirror outline-none focus:outline-none transition-all duration-200" tabindex="0"><p>Global warming triggers a cascade of effects on our planet. These changes impact everything from our oceans to our ecosystems.</p></div></div></div><div class="space-y-6"><div class="flex items-start space-x-4"><div class="flex-shrink-0 w-12 h-12 bg-white rounded-lg shadow-md flex items-center justify-center"><img src="/static/icons/bold/dots-three-vertical-bold.png" alt="sea level rising icon" class="w-6 h-6 object-contain text-gray-700" data-editable-processed="true" data-editable-id="2-icon-bulletPoints[0].icon-1" style="cursor: pointer; transition: opacity 0.2s, transform 0.2s;"></div><div class="flex-1"><div class="text-xl font-semibold text-gray-900 mb-2" style="font-size: 20px; font-weight: 600; font-family: Poppins, sans-serif; color: rgb(17, 24, 39); line-height: 28px; text-align: start; margin: 0px 0px 8px; padding: 0px; border-radius: 0px; border: 0px solid rgb(229, 231, 235); background-color: rgba(0, 0, 0, 0); opacity: 1; box-shadow: none; text-shadow: none; text-decoration: none solid rgb(17, 24, 39); text-transform: none; letter-spacing: normal; word-spacing: 0px; text-overflow: clip; white-space: normal; word-break: normal; overflow: visible;"><div class="tiptap-text-editor w-full" style="line-height: inherit; font-size: inherit; font-weight: inherit; font-family: inherit; color: inherit; text-align: inherit;"><div contenteditable="true" data-placeholder="Enter text..." translate="no" class="tiptap ProseMirror outline-none focus:outline-none transition-all duration-200" tabindex="0"><p>Rising Sea Levels</p></div></div></div><div class="w-12 h-0.5 bg-purple-600 mb-3"></div><div class="text-base text-gray-700 leading-relaxed" style="font-size: 16px; font-weight: 400; font-family: Poppins, sans-serif; color: rgb(55, 65, 81); line-height: 26px; text-align: start; margin: 0px; padding: 0px; border-radius: 0px; border: 0px solid rgb(229, 231, 235); background-color: rgba(0, 0, 0, 0); opacity: 1; box-shadow: none; text-shadow: none; text-decoration: none solid rgb(55, 65, 81); text-transform: none; letter-spacing: normal; word-spacing: 0px; text-overflow: clip; white-space: normal; word-break: normal; overflow: visible;"><div class="tiptap-text-editor w-full" style="line-height: inherit; font-size: inherit; font-weight: inherit; font-family: inherit; color: inherit; text-align: inherit;"><div contenteditable="true" data-placeholder="Enter text..." translate="no" class="tiptap ProseMirror outline-none focus:outline-none transition-all duration-200" tabindex="0"><p>Rising sea levels threaten coastal communities and ecosystems due to melting glaciers and thermal expansion.</p></div></div></div></div></div><div class="flex items-start space-x-4"><div class="flex-shrink-0 w-12 h-12 bg-white rounded-lg shadow-md flex items-center justify-center"><img src="/static/icons/bold/discord-logo-bold.png" alt="heatwave icon" class="w-6 h-6 object-contain text-gray-700" data-editable-processed="true" data-editable-id="2-icon-bulletPoints[1].icon-2" style="cursor: pointer; transition: opacity 0.2s, transform 0.2s;"></div><div class="flex-1"><div class="text-xl font-semibold text-gray-900 mb-2" style="font-size: 20px; font-weight: 600; font-family: Poppins, sans-serif; color: rgb(17, 24, 39); line-height: 28px; text-align: start; margin: 0px 0px 8px; padding: 0px; border-radius: 0px; border: 0px solid rgb(229, 231, 235); background-color: rgba(0, 0, 0, 0); opacity: 1; box-shadow: none; text-shadow: none; text-decoration: none solid rgb(17, 24, 39); text-transform: none; letter-spacing: normal; word-spacing: 0px; text-overflow: clip; white-space: normal; word-break: normal; overflow: visible;"><div class="tiptap-text-editor w-full" style="line-height: inherit; font-size: inherit; font-weight: inherit; font-family: inherit; color: inherit; text-align: inherit;"><div contenteditable="true" data-placeholder="Enter text..." translate="no" class="tiptap ProseMirror outline-none focus:outline-none transition-all duration-200" tabindex="0"><p>Intense Heatwaves</p></div></div></div><div class="w-12 h-0.5 bg-purple-600 mb-3"></div><div class="text-base text-gray-700 leading-relaxed" style="font-size: 16px; font-weight: 400; font-family: Poppins, sans-serif; color: rgb(55, 65, 81); line-height: 26px; text-align: start; margin: 0px; padding: 0px; border-radius: 0px; border: 0px solid rgb(229, 231, 235); background-color: rgba(0, 0, 0, 0); opacity: 1; box-shadow: none; text-shadow: none; text-decoration: none solid rgb(55, 65, 81); text-transform: none; letter-spacing: normal; word-spacing: 0px; text-overflow: clip; white-space: normal; word-break: normal; overflow: visible;"><div class="tiptap-text-editor w-full" style="line-height: inherit; font-size: inherit; font-weight: inherit; font-family: inherit; color: inherit; text-align: inherit;"><div contenteditable="true" data-placeholder="Enter text..." translate="no" class="tiptap ProseMirror outline-none focus:outline-none transition-all duration-200" tabindex="0"><p>Heatwaves are becoming more frequent and intense, posing significant risks to human health and agriculture.</p></div></div></div></div></div><div class="flex items-start space-x-4"><div class="flex-shrink-0 w-12 h-12 bg-white rounded-lg shadow-md flex items-center justify-center"><img src="/static/icons/bold/cloud-rain-bold.png" alt="precipitation changes icon" class="w-6 h-6 object-contain text-gray-700" data-editable-processed="true" data-editable-id="2-icon-bulletPoints[2].icon-3" style="cursor: pointer; transition: opacity 0.2s, transform 0.2s;"></div><div class="flex-1"><div class="text-xl font-semibold text-gray-900 mb-2" style="font-size: 20px; font-weight: 600; font-family: Poppins, sans-serif; color: rgb(17, 24, 39); line-height: 28px; text-align: start; margin: 0px 0px 8px; padding: 0px; border-radius: 0px; border: 0px solid rgb(229, 231, 235); background-color: rgba(0, 0, 0, 0); opacity: 1; box-shadow: none; text-shadow: none; text-decoration: none solid rgb(17, 24, 39); text-transform: none; letter-spacing: normal; word-spacing: 0px; text-overflow: clip; white-space: normal; word-break: normal; overflow: visible;"><div class="tiptap-text-editor w-full" style="line-height: inherit; font-size: inherit; font-weight: inherit; font-family: inherit; color: inherit; text-align: inherit;"><div contenteditable="true" data-placeholder="Enter text..." translate="no" class="tiptap ProseMirror outline-none focus:outline-none transition-all duration-200" tabindex="0"><p>Changes in Precipitation</p></div></div></div><div class="w-12 h-0.5 bg-purple-600 mb-3"></div><div class="text-base text-gray-700 leading-relaxed" style="font-size: 16px; font-weight: 400; font-family: Poppins, sans-serif; color: rgb(55, 65, 81); line-height: 26px; text-align: start; margin: 0px; padding: 0px; border-radius: 0px; border: 0px solid rgb(229, 231, 235); background-color: rgba(0, 0, 0, 0); opacity: 1; box-shadow: none; text-shadow: none; text-decoration: none solid rgb(55, 65, 81); text-transform: none; letter-spacing: normal; word-spacing: 0px; text-overflow: clip; white-space: normal; word-break: normal; overflow: visible;"><div class="tiptap-text-editor w-full" style="line-height: inherit; font-size: inherit; font-weight: inherit; font-family: inherit; color: inherit; text-align: inherit;"><div contenteditable="true" data-placeholder="Enter text..." translate="no" class="tiptap ProseMirror outline-none focus:outline-none transition-all duration-200" tabindex="0"><p>Altered precipitation patterns lead to increased droughts in some regions and severe flooding in others, affecting water resources.</p></div></div></div></div></div></div></div></div></div></div>
 Output: import React from 'react'
 import * as z from "zod";
-import { ImageSchema, IconSchema } from 'defaultSchemes';
 
+const ImageSchema = z.object({
+    __image_url__: z.url().meta({
+        description: "URL to image",
+    }),
+    __image_prompt__: z.string().meta({
+        description: "Prompt used to generate the image",
+    }).min(10).max(50),
+})
+
+const IconSchema = z.object({
+    __icon_url__: z.string().meta({
+        description: "URL to icon",
+    }),
+    __icon_query__: z.string().meta({
+        description: "Query used to search the icon",
+    }).min(5).max(20),
+})
 export const layoutId = 'bullet-with-icons-slide'
 export const layoutName = 'Bullet with Icons'
 export const layoutDescription = 'A bullets style slide with main content, supporting image, and bullet points with icons and descriptions.'
@@ -236,7 +259,7 @@ export default BulletWithIconsSlideLayout
 """
 
 HTML_EDIT_SYSTEM_PROMPT = """
-You need to edit given html with respect to the indication and sketch in the given UI. You'll be given the code for current UI which is in presentation size, along with its visualization in image form. Over that you'll also be given another image which has indications of what might change in form of sketch in the UI. You will have to return the edited html with tailwind with the changes as indicated on the image and through prompt. Make sure you think through the design before making the change and also make sure you don't change the non-indicated part. Try to follow the design style of current content for generated content. Only give out code and nothing else.
+You need to edit given html with respect to the indication and sketch in the given UI. You'll be given the code for current UI which is in presentation size, along with its visualization in image form. Over that you'll also be given another image which has indications of what might change in form of sketch in the UI. You will have to return the edited html with tailwind with the changes as indicated on the image and through prompt. Make sure you think through the design before making the change and also make sure you don't change the non-indicated part. Try to follow the design style of current content for generated content. If sketch image is not provided, then you need to edit the html with respect to the prompt. Only give out code and nothing else.
 """
 
 
@@ -292,7 +315,7 @@ async def generate_html_from_slide(base64_image: str, media_type: str, xml_conte
             ],
             thinking={
                 "type": "enabled",
-                "budget_tokens": 50000
+                "budget_tokens": 55000
             }
         ) as stream:
             print("Streaming started, collecting HTML response...")
@@ -437,13 +460,13 @@ async def generate_react_component_from_html(html_content: str, api_key: str) ->
         )
 
 
-async def edit_html_with_images(current_ui_base64: str, sketch_base64: str, media_type: str, html_content: str, prompt: str, api_key: str) -> str:
+async def edit_html_with_images(current_ui_base64: str, sketch_base64: Optional[str], media_type: str, html_content: str, prompt: str, api_key: str) -> str:
     """
-    Edit HTML content based on two images and a text prompt using Anthropic Claude API.
-    
+    Edit HTML content based on one or two images and a text prompt using Anthropic Claude API.
+
     Args:
         current_ui_base64: Base64 encoded current UI image data
-        sketch_base64: Base64 encoded sketch/indication image data
+        sketch_base64: Base64 encoded sketch/indication image data (optional)
         media_type: MIME type of the images (e.g., 'image/png')
         html_content: Current HTML content to edit
         prompt: Text prompt describing the changes
@@ -464,6 +487,33 @@ async def edit_html_with_images(current_ui_base64: str, sketch_base64: str, medi
         edited_html = ""
         thinking_content = ""
         
+        # Build content array - always include text and current UI image
+        content = [
+            {
+                "type": "text",
+                "text": f"Current HTML to edit:\n\n{html_content}\n\nText prompt for changes: {prompt}"
+            },
+            {
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": media_type,
+                    "data": current_ui_base64
+                }
+            }
+        ]
+        
+        # Only add sketch image if provided
+        if sketch_base64:
+            content.append({
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": media_type,
+                    "data": sketch_base64
+                }
+            })
+        
         with client.messages.stream(
             model="claude-sonnet-4-20250514",
             max_tokens=64000,
@@ -472,28 +522,7 @@ async def edit_html_with_images(current_ui_base64: str, sketch_base64: str, medi
             messages=[
                 {
                     "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": f"Current HTML to edit:\n\n{html_content}\n\nText prompt for changes: {prompt}"
-                        },
-                        {
-                            "type": "image",
-                            "source": {
-                                "type": "base64",
-                                "media_type": media_type,
-                                "data": current_ui_base64
-                            }
-                        },
-                        {
-                            "type": "image",
-                            "source": {
-                                "type": "base64",
-                                "media_type": media_type,
-                                "data": sketch_base64
-                            }
-                        }
-                    ]
+                    "content": content
                 }
             ],
             thinking={
@@ -620,6 +649,8 @@ async def convert_slide_to_html(request: SlideToHtmlRequest):
             xml_content=request.xml,
             api_key=api_key
             )
+
+        html_content = html_content.replace("```html", "").replace("```", "")
         
         return SlideToHtmlResponse(
             success=True,
@@ -651,7 +682,6 @@ async def convert_html_to_react(request: HtmlToReactRequest):
     Returns:
         HtmlToReactResponse with generated React component
     """
-    print("reachedhere")
     try:
         # Get Anthropic API key from environment
         api_key = os.getenv("ANTHROPIC_API_KEY")
@@ -673,6 +703,8 @@ async def convert_html_to_react(request: HtmlToReactRequest):
             html_content=request.html,
             api_key=api_key
         )
+
+        react_component = react_component.replace("```tsx", "").replace("```", "")
         
         return HtmlToReactResponse(
             success=True,
@@ -696,16 +728,16 @@ async def convert_html_to_react(request: HtmlToReactRequest):
 @HTML_EDIT_ROUTER.post("/", response_model=HtmlEditResponse)
 async def edit_html_with_images_endpoint(
     current_ui_image: UploadFile = File(..., description="Current UI image file"),
-    sketch_image: UploadFile = File(..., description="Sketch/indication image file"),
+    sketch_image: Optional[UploadFile] = File(None, description="Sketch/indication image file (optional)"),
     html: str = Form(..., description="Current HTML content to edit"),
     prompt: str = Form(..., description="Text prompt describing the changes")
 ):
     """
-    Edit HTML content based on two uploaded images and a text prompt using Anthropic Claude API.
+    Edit HTML content based on one or two uploaded images and a text prompt using Anthropic Claude API.
     
     Args:
         current_ui_image: Uploaded current UI image file
-        sketch_image: Uploaded sketch/indication image file
+        sketch_image: Uploaded sketch/indication image file (optional)
         html: Current HTML content to edit (form data)
         prompt: Text prompt describing the changes (form data)
     
@@ -734,25 +766,29 @@ async def edit_html_with_images_endpoint(
                 detail="Text prompt cannot be empty"
             )
         
-        # Validate image files
+        # Validate current UI image file
         if not current_ui_image.content_type or not current_ui_image.content_type.startswith("image/"):
             raise HTTPException(
                 status_code=400,
                 detail="Current UI file must be an image"
             )
         
-        if not sketch_image.content_type or not sketch_image.content_type.startswith("image/"):
+        # Validate sketch image file only if provided
+        if sketch_image and (not sketch_image.content_type or not sketch_image.content_type.startswith("image/")):
             raise HTTPException(
                 status_code=400,
                 detail="Sketch file must be an image"
             )
         
-        # Read and encode both images to base64
+        # Read and encode current UI image to base64
         current_ui_content = await current_ui_image.read()
         current_ui_base64 = base64.b64encode(current_ui_content).decode('utf-8')
         
-        sketch_content = await sketch_image.read()
-        sketch_base64 = base64.b64encode(sketch_content).decode('utf-8')
+        # Read and encode sketch image to base64 only if provided
+        sketch_base64 = None
+        if sketch_image:
+            sketch_content = await sketch_image.read()
+            sketch_base64 = base64.b64encode(sketch_content).decode('utf-8')
         
         # Use the content type from the uploaded files
         media_type = current_ui_image.content_type
@@ -766,6 +802,8 @@ async def edit_html_with_images_endpoint(
             prompt=prompt,
             api_key=api_key
         )
+
+        edited_html = edited_html.replace("```html", "").replace("```", "")
         
         return HtmlEditResponse(
             success=True,