From d96ac341e1dc906ff999aca7a96b5728679b70bb Mon Sep 17 00:00:00 2001 From: Suraj Jha Date: Fri, 8 Aug 2025 20:41:39 +0545 Subject: [PATCH] update: gpt5 for template generation --- .../api/v1/ppt/endpoints/pptx_slides.py | 176 +++++++++++++++++- .../fastapi/api/v1/ppt/endpoints/prompts.py | 3 +- servers/fastapi/api/v1/ppt/router.py | 2 + 3 files changed, 171 insertions(+), 10 deletions(-) diff --git a/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py b/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py index f5b6add4..5044cd52 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py +++ b/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py @@ -24,6 +24,7 @@ class SlideData(BaseModel): slide_number: int screenshot_url: str xml_content: str + normalized_fonts: List[str] class FontAnalysisResult(BaseModel): @@ -37,6 +38,75 @@ class PptxSlidesResponse(BaseModel): total_slides: int fonts: Optional[FontAnalysisResult] = None +# NEW: Fonts-only router and response for PPTX +class PptxFontsResponse(BaseModel): + success: bool + fonts: FontAnalysisResult + +PPTX_FONTS_ROUTER = APIRouter(prefix="/pptx-fonts", tags=["PPTX Fonts"]) + +# NEW: Normalize font family names by removing style/weight/stretch descriptors and splitting camel case +_STYLE_TOKENS = { + # styles + "italic", "italics", "ital", "oblique", "roman", + # combined style shortcuts + "bolditalic", "bolditalics", + # weights + "thin", "hairline", "extralight", "ultralight", "light", "demilight", "semilight", "book", + "regular", "normal", "medium", "semibold", "demibold", "bold", "extrabold", "ultrabold", + "black", "extrablack", "ultrablack", "heavy", + # width/stretch + "narrow", "condensed", "semicondensed", "extracondensed", "ultracondensed", + "expanded", "semiexpanded", "extraexpanded", "ultraexpanded", +} +# Modifiers commonly used with style tokens +_STYLE_MODIFIERS = {"semi", "demi", "extra", "ultra"} + +def _insert_spaces_in_camel_case(value: str) -> str: + # Insert space before capital letters preceded by lowercase or digits (e.g., MontserratBold -> Montserrat Bold) + value = re.sub(r"(?<=[a-z0-9])([A-Z])", r" \1", value) + # Handle sequences like BoldItalic -> Bold Italic + value = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", value) + return value + +def normalize_font_family_name(raw_name: str) -> str: + if not raw_name: + return raw_name + # Replace separators with spaces + name = raw_name.replace("_", " ").replace("-", " ") + # Insert spaces in camel case + name = _insert_spaces_in_camel_case(name) + # Collapse multiple spaces + name = re.sub(r"\s+", " ", name).strip() + # Lowercase helper for matching but keep original casing for output + lower_name = name.lower() + # Quick cut: if the full string ends with a pure style suffix, trim it + for style in sorted(_STYLE_TOKENS, key=len, reverse=True): + if lower_name.endswith(" " + style): + name = name[: -(len(style) + 1)] + lower_name = lower_name[: -(len(style) + 1)] + break + # Tokenize + tokens_original = name.split(" ") + tokens_filtered: List[str] = [] + for index, tok in enumerate(tokens_original): + lower_tok = tok.lower() + # Always keep the first token to avoid stripping families like "Black Ops One" + if index == 0: + tokens_filtered.append(tok) + continue + # Drop style tokens and standalone modifiers + if lower_tok in _STYLE_TOKENS or lower_tok in _STYLE_MODIFIERS: + continue + tokens_filtered.append(tok) + # If everything except first token was dropped and first token is a style token (unlikely), fallback to original + if not tokens_filtered: + tokens_filtered = tokens_original + normalized = " ".join(tokens_filtered).strip() + # Final cleanup of leftover multiple spaces + normalized = re.sub(r"\s+", " ", normalized) + return normalized + def extract_fonts_from_oxml(xml_content: str) -> List[str]: """ @@ -143,25 +213,30 @@ async def analyze_fonts_in_all_slides(slide_xmls: List[str]) -> FontAnalysisResu FontAnalysisResult with supported and unsupported fonts """ # Extract fonts from all slides - all_fonts = set() + raw_fonts = set() for xml_content in slide_xmls: slide_fonts = extract_fonts_from_oxml(xml_content) - all_fonts.update(slide_fonts) + raw_fonts.update(slide_fonts) + + # Normalize to root families (e.g., "Montserrat Italic" -> "Montserrat") + normalized_fonts = {normalize_font_family_name(f) for f in raw_fonts} + # Remove empties if any + normalized_fonts = {f for f in normalized_fonts if f} - if not all_fonts: + if not normalized_fonts: return FontAnalysisResult( internally_supported_fonts=[], not_supported_fonts=[] ) - # Check each font's availability in Google Fonts concurrently - tasks = [check_google_font_availability(font) for font in all_fonts] + # Check each normalized font's availability in Google Fonts concurrently + tasks = [check_google_font_availability(font) for font in normalized_fonts] results = await asyncio.gather(*tasks) internally_supported_fonts = [] not_supported_fonts = [] - for font, is_available in zip(all_fonts, results): + for font, is_available in zip(normalized_fonts, results): if is_available: formatted_name = font.replace(' ', '+') google_fonts_url = f"https://fonts.googleapis.com/css2?family={formatted_name}&display=swap" @@ -246,10 +321,15 @@ async def process_pptx_slides( # Fallback if screenshot generation failed or file is empty placeholder screenshot_url = "/static/images/placeholder.jpg" + # Compute normalized fonts for this slide + raw_slide_fonts = extract_fonts_from_oxml(xml_content) + normalized_fonts = sorted({normalize_font_family_name(f) for f in raw_slide_fonts if f}) + slides_data.append(SlideData( slide_number=i, screenshot_url=screenshot_url, - xml_content=xml_content + xml_content=xml_content, + normalized_fonts=normalized_fonts )) return PptxSlidesResponse( @@ -259,6 +339,75 @@ async def process_pptx_slides( fonts=font_analysis ) +# NEW: Fonts-only endpoint leveraging the same font extraction/analysis +@PPTX_FONTS_ROUTER.post("/process", response_model=PptxFontsResponse) +async def process_pptx_fonts( + pptx_file: UploadFile = File(..., description="PPTX file to analyze fonts from") +): + """ + Analyze a PPTX file and return only the fonts used in the document. + + Uses the exact same font extraction and analysis utilities as the /pptx-slides endpoint. + """ + # Validate PPTX file + if pptx_file.content_type not in POWERPOINT_TYPES: + raise HTTPException( + status_code=400, + detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}" + ) + + # Create temporary directory for processing + with tempfile.TemporaryDirectory() as temp_dir: + # Save uploaded PPTX file + pptx_path = os.path.join(temp_dir, "presentation.pptx") + with open(pptx_path, "wb") as f: + pptx_content = await pptx_file.read() + f.write(pptx_content) + + # Extract slide XMLs from PPTX + slide_xmls = _extract_slide_xmls(pptx_path, temp_dir) + + # Analyze fonts across all slides (same logic as in /pptx-slides) + font_analysis = await analyze_fonts_in_all_slides(slide_xmls) + + return PptxFontsResponse( + success=True, + fonts=font_analysis, + ) + +def _create_font_alias_config(raw_fonts: List[str]) -> str: + """Create a temporary fontconfig configuration that aliases variant family names to normalized root families. + Returns the path to the config file. + """ + # Build mapping from raw -> normalized where different + mappings: Dict[str, str] = {} + for f in raw_fonts: + normalized = normalize_font_family_name(f) + if normalized and normalized != f: + mappings[f] = normalized + # Create config only if we have mappings + fd, fonts_conf_path = tempfile.mkstemp(prefix="fonts_alias_", suffix=".conf") + os.close(fd) + with open(fonts_conf_path, "w", encoding="utf-8") as cfg: + cfg.write(""" + + + /etc/fonts/fonts.conf +""") + for src, dst in mappings.items(): + cfg.write(f""" + + + {src} + + + {dst} + + +""") + cfg.write("\n\n") + return fonts_conf_path + async def _install_fonts(fonts: List[UploadFile], temp_dir: str) -> None: """Install provided font files to the system.""" fonts_dir = os.path.join(temp_dir, "fonts") @@ -328,6 +477,15 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]: slide_xmls = _extract_slide_xmls(pptx_path, temp_dir) slide_count = len(slide_xmls) + # Build font alias config to force variant families to resolve to normalized root families + raw_fonts: List[str] = [] + for xml in slide_xmls: + raw_fonts.extend(extract_fonts_from_oxml(xml)) + raw_fonts = list({f for f in raw_fonts if f}) + fonts_conf_path = _create_font_alias_config(raw_fonts) + env = os.environ.copy() + env["FONTCONFIG_FILE"] = fonts_conf_path + print(f"Found {slide_count} slides in presentation") # Step 1: Convert PPTX to PDF using LibreOffice @@ -342,7 +500,7 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]: "--convert-to", "pdf", "--outdir", screenshots_dir, pptx_path - ], check=True, capture_output=True, text=True, timeout=500) + ], check=True, capture_output=True, text=True, timeout=500, env=env) print(f"LibreOffice PDF conversion output: {result.stdout}") if result.stderr: @@ -369,7 +527,7 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]: "-density", "150", actual_pdf_path, os.path.join(screenshots_dir, "slide_%03d.png") - ], check=True, capture_output=True, text=True, timeout=500) + ], check=True, capture_output=True, text=True, timeout=500, env=env) print(f"ImageMagick conversion output: {result.stdout}") if result.stderr: diff --git a/servers/fastapi/api/v1/ppt/endpoints/prompts.py b/servers/fastapi/api/v1/ppt/endpoints/prompts.py index 6f2985f0..e1c0c0ba 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/prompts.py +++ b/servers/fastapi/api/v1/ppt/endpoints/prompts.py @@ -16,13 +16,14 @@ Follow these rules strictly: - Make sure that no elements overflow or exceed slide bounding in any way. - Properly export shapes as exact SVG. - Add relevant font in tailwind to all texts. -- Wrap the output code inside these classes: \\\"relative w-full rounded-sm max-w-[1280px] shadow-lg max-h-[720px] aspect-video bg-white relative z-20 mx-auto overflow-hidden\\\". +- Wrap the output code inside these classes: \"relative w-full rounded-sm max-w-[1280px] shadow-lg max-h-[720px] aspect-video bg-white relative z-20 mx-auto overflow-hidden\". - For image everywhere use https://images.pexels.com/photos/31527637/pexels-photo-31527637.jpeg - Image should never be inside of a SVG. - Replace brand icons with a circle of same size with "i" between. Generic icons like "email", "call", etc should remain same. - If there is a box/card enclosing a text, make it grow as well when the text grows, so that the text does not overflow the box/card. - Give out only HTML and Tailwind code. No other texts or explanations. - Do not give entire HTML structure with head, body, etc. Just give the respective HTML and Tailwind code inside div with above classes. +- If a list of fonts is provided, you must use the provided fonts (normalized root families) in font-family declarations, prioritizing them over inferred fonts. Use the first matching family wherever applicable. """ HTML_TO_REACT_SYSTEM_PROMPT = """ diff --git a/servers/fastapi/api/v1/ppt/router.py b/servers/fastapi/api/v1/ppt/router.py index e493db6f..1f89a2f1 100644 --- a/servers/fastapi/api/v1/ppt/router.py +++ b/servers/fastapi/api/v1/ppt/router.py @@ -14,6 +14,7 @@ from api.v1.ppt.endpoints.images import IMAGES_ROUTER from api.v1.ppt.endpoints.ollama import OLLAMA_ROUTER from api.v1.ppt.endpoints.outlines import OUTLINES_ROUTER from api.v1.ppt.endpoints.slide import SLIDE_ROUTER +from api.v1.ppt.endpoints.pptx_slides import PPTX_FONTS_ROUTER API_V1_PPT_ROUTER = APIRouter(prefix="/api/v1/ppt") @@ -35,3 +36,4 @@ API_V1_PPT_ROUTER.include_router(PDF_SLIDES_ROUTER) API_V1_PPT_ROUTER.include_router(OPENAI_ROUTER) API_V1_PPT_ROUTER.include_router(ANTHROPIC_ROUTER) API_V1_PPT_ROUTER.include_router(GOOGLE_ROUTER) +API_V1_PPT_ROUTER.include_router(PPTX_FONTS_ROUTER)