update: gpt5 for template generation

This commit is contained in:
Suraj Jha 2025-08-08 20:41:39 +05:45
parent 464d27624f
commit d96ac341e1
3 changed files with 171 additions and 10 deletions

View file

@ -24,6 +24,7 @@ class SlideData(BaseModel):
slide_number: int
screenshot_url: str
xml_content: str
normalized_fonts: List[str]
class FontAnalysisResult(BaseModel):
@ -37,6 +38,75 @@ class PptxSlidesResponse(BaseModel):
total_slides: int
fonts: Optional[FontAnalysisResult] = None
# NEW: Fonts-only router and response for PPTX
class PptxFontsResponse(BaseModel):
success: bool
fonts: FontAnalysisResult
PPTX_FONTS_ROUTER = APIRouter(prefix="/pptx-fonts", tags=["PPTX Fonts"])
# NEW: Normalize font family names by removing style/weight/stretch descriptors and splitting camel case
_STYLE_TOKENS = {
# styles
"italic", "italics", "ital", "oblique", "roman",
# combined style shortcuts
"bolditalic", "bolditalics",
# weights
"thin", "hairline", "extralight", "ultralight", "light", "demilight", "semilight", "book",
"regular", "normal", "medium", "semibold", "demibold", "bold", "extrabold", "ultrabold",
"black", "extrablack", "ultrablack", "heavy",
# width/stretch
"narrow", "condensed", "semicondensed", "extracondensed", "ultracondensed",
"expanded", "semiexpanded", "extraexpanded", "ultraexpanded",
}
# Modifiers commonly used with style tokens
_STYLE_MODIFIERS = {"semi", "demi", "extra", "ultra"}
def _insert_spaces_in_camel_case(value: str) -> str:
# Insert space before capital letters preceded by lowercase or digits (e.g., MontserratBold -> Montserrat Bold)
value = re.sub(r"(?<=[a-z0-9])([A-Z])", r" \1", value)
# Handle sequences like BoldItalic -> Bold Italic
value = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", value)
return value
def normalize_font_family_name(raw_name: str) -> str:
if not raw_name:
return raw_name
# Replace separators with spaces
name = raw_name.replace("_", " ").replace("-", " ")
# Insert spaces in camel case
name = _insert_spaces_in_camel_case(name)
# Collapse multiple spaces
name = re.sub(r"\s+", " ", name).strip()
# Lowercase helper for matching but keep original casing for output
lower_name = name.lower()
# Quick cut: if the full string ends with a pure style suffix, trim it
for style in sorted(_STYLE_TOKENS, key=len, reverse=True):
if lower_name.endswith(" " + style):
name = name[: -(len(style) + 1)]
lower_name = lower_name[: -(len(style) + 1)]
break
# Tokenize
tokens_original = name.split(" ")
tokens_filtered: List[str] = []
for index, tok in enumerate(tokens_original):
lower_tok = tok.lower()
# Always keep the first token to avoid stripping families like "Black Ops One"
if index == 0:
tokens_filtered.append(tok)
continue
# Drop style tokens and standalone modifiers
if lower_tok in _STYLE_TOKENS or lower_tok in _STYLE_MODIFIERS:
continue
tokens_filtered.append(tok)
# If everything except first token was dropped and first token is a style token (unlikely), fallback to original
if not tokens_filtered:
tokens_filtered = tokens_original
normalized = " ".join(tokens_filtered).strip()
# Final cleanup of leftover multiple spaces
normalized = re.sub(r"\s+", " ", normalized)
return normalized
def extract_fonts_from_oxml(xml_content: str) -> List[str]:
"""
@ -143,25 +213,30 @@ async def analyze_fonts_in_all_slides(slide_xmls: List[str]) -> FontAnalysisResu
FontAnalysisResult with supported and unsupported fonts
"""
# Extract fonts from all slides
all_fonts = set()
raw_fonts = set()
for xml_content in slide_xmls:
slide_fonts = extract_fonts_from_oxml(xml_content)
all_fonts.update(slide_fonts)
raw_fonts.update(slide_fonts)
# Normalize to root families (e.g., "Montserrat Italic" -> "Montserrat")
normalized_fonts = {normalize_font_family_name(f) for f in raw_fonts}
# Remove empties if any
normalized_fonts = {f for f in normalized_fonts if f}
if not all_fonts:
if not normalized_fonts:
return FontAnalysisResult(
internally_supported_fonts=[],
not_supported_fonts=[]
)
# Check each font's availability in Google Fonts concurrently
tasks = [check_google_font_availability(font) for font in all_fonts]
# Check each normalized font's availability in Google Fonts concurrently
tasks = [check_google_font_availability(font) for font in normalized_fonts]
results = await asyncio.gather(*tasks)
internally_supported_fonts = []
not_supported_fonts = []
for font, is_available in zip(all_fonts, results):
for font, is_available in zip(normalized_fonts, results):
if is_available:
formatted_name = font.replace(' ', '+')
google_fonts_url = f"https://fonts.googleapis.com/css2?family={formatted_name}&display=swap"
@ -246,10 +321,15 @@ async def process_pptx_slides(
# Fallback if screenshot generation failed or file is empty placeholder
screenshot_url = "/static/images/placeholder.jpg"
# Compute normalized fonts for this slide
raw_slide_fonts = extract_fonts_from_oxml(xml_content)
normalized_fonts = sorted({normalize_font_family_name(f) for f in raw_slide_fonts if f})
slides_data.append(SlideData(
slide_number=i,
screenshot_url=screenshot_url,
xml_content=xml_content
xml_content=xml_content,
normalized_fonts=normalized_fonts
))
return PptxSlidesResponse(
@ -259,6 +339,75 @@ async def process_pptx_slides(
fonts=font_analysis
)
# NEW: Fonts-only endpoint leveraging the same font extraction/analysis
@PPTX_FONTS_ROUTER.post("/process", response_model=PptxFontsResponse)
async def process_pptx_fonts(
pptx_file: UploadFile = File(..., description="PPTX file to analyze fonts from")
):
"""
Analyze a PPTX file and return only the fonts used in the document.
Uses the exact same font extraction and analysis utilities as the /pptx-slides endpoint.
"""
# Validate PPTX file
if pptx_file.content_type not in POWERPOINT_TYPES:
raise HTTPException(
status_code=400,
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}"
)
# Create temporary directory for processing
with tempfile.TemporaryDirectory() as temp_dir:
# Save uploaded PPTX file
pptx_path = os.path.join(temp_dir, "presentation.pptx")
with open(pptx_path, "wb") as f:
pptx_content = await pptx_file.read()
f.write(pptx_content)
# Extract slide XMLs from PPTX
slide_xmls = _extract_slide_xmls(pptx_path, temp_dir)
# Analyze fonts across all slides (same logic as in /pptx-slides)
font_analysis = await analyze_fonts_in_all_slides(slide_xmls)
return PptxFontsResponse(
success=True,
fonts=font_analysis,
)
def _create_font_alias_config(raw_fonts: List[str]) -> str:
"""Create a temporary fontconfig configuration that aliases variant family names to normalized root families.
Returns the path to the config file.
"""
# Build mapping from raw -> normalized where different
mappings: Dict[str, str] = {}
for f in raw_fonts:
normalized = normalize_font_family_name(f)
if normalized and normalized != f:
mappings[f] = normalized
# Create config only if we have mappings
fd, fonts_conf_path = tempfile.mkstemp(prefix="fonts_alias_", suffix=".conf")
os.close(fd)
with open(fonts_conf_path, "w", encoding="utf-8") as cfg:
cfg.write("""<?xml version='1.0'?>
<!DOCTYPE fontconfig SYSTEM "urn:fontconfig:fonts.dtd">
<fontconfig>
<include>/etc/fonts/fonts.conf</include>
""")
for src, dst in mappings.items():
cfg.write(f"""
<match target="pattern">
<test name="family" compare="eq">
<string>{src}</string>
</test>
<edit name="family" mode="assign" binding="strong">
<string>{dst}</string>
</edit>
</match>
""")
cfg.write("\n</fontconfig>\n")
return fonts_conf_path
async def _install_fonts(fonts: List[UploadFile], temp_dir: str) -> None:
"""Install provided font files to the system."""
fonts_dir = os.path.join(temp_dir, "fonts")
@ -328,6 +477,15 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]:
slide_xmls = _extract_slide_xmls(pptx_path, temp_dir)
slide_count = len(slide_xmls)
# Build font alias config to force variant families to resolve to normalized root families
raw_fonts: List[str] = []
for xml in slide_xmls:
raw_fonts.extend(extract_fonts_from_oxml(xml))
raw_fonts = list({f for f in raw_fonts if f})
fonts_conf_path = _create_font_alias_config(raw_fonts)
env = os.environ.copy()
env["FONTCONFIG_FILE"] = fonts_conf_path
print(f"Found {slide_count} slides in presentation")
# Step 1: Convert PPTX to PDF using LibreOffice
@ -342,7 +500,7 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]:
"--convert-to", "pdf",
"--outdir", screenshots_dir,
pptx_path
], check=True, capture_output=True, text=True, timeout=500)
], check=True, capture_output=True, text=True, timeout=500, env=env)
print(f"LibreOffice PDF conversion output: {result.stdout}")
if result.stderr:
@ -369,7 +527,7 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]:
"-density", "150",
actual_pdf_path,
os.path.join(screenshots_dir, "slide_%03d.png")
], check=True, capture_output=True, text=True, timeout=500)
], check=True, capture_output=True, text=True, timeout=500, env=env)
print(f"ImageMagick conversion output: {result.stdout}")
if result.stderr:

View file

@ -16,13 +16,14 @@ Follow these rules strictly:
- Make sure that no elements overflow or exceed slide bounding in any way.
- Properly export shapes as exact SVG.
- Add relevant font in tailwind to all texts.
- Wrap the output code inside these classes: \\\"relative w-full rounded-sm max-w-[1280px] shadow-lg max-h-[720px] aspect-video bg-white relative z-20 mx-auto overflow-hidden\\\".
- Wrap the output code inside these classes: \"relative w-full rounded-sm max-w-[1280px] shadow-lg max-h-[720px] aspect-video bg-white relative z-20 mx-auto overflow-hidden\".
- For image everywhere use https://images.pexels.com/photos/31527637/pexels-photo-31527637.jpeg
- Image should never be inside of a SVG.
- Replace brand icons with a circle of same size with "i" between. Generic icons like "email", "call", etc should remain same.
- If there is a box/card enclosing a text, make it grow as well when the text grows, so that the text does not overflow the box/card.
- Give out only HTML and Tailwind code. No other texts or explanations.
- Do not give entire HTML structure with head, body, etc. Just give the respective HTML and Tailwind code inside div with above classes.
- If a list of fonts is provided, you must use the provided fonts (normalized root families) in font-family declarations, prioritizing them over inferred fonts. Use the first matching family wherever applicable.
"""
HTML_TO_REACT_SYSTEM_PROMPT = """

View file

@ -14,6 +14,7 @@ from api.v1.ppt.endpoints.images import IMAGES_ROUTER
from api.v1.ppt.endpoints.ollama import OLLAMA_ROUTER
from api.v1.ppt.endpoints.outlines import OUTLINES_ROUTER
from api.v1.ppt.endpoints.slide import SLIDE_ROUTER
from api.v1.ppt.endpoints.pptx_slides import PPTX_FONTS_ROUTER
API_V1_PPT_ROUTER = APIRouter(prefix="/api/v1/ppt")
@ -35,3 +36,4 @@ API_V1_PPT_ROUTER.include_router(PDF_SLIDES_ROUTER)
API_V1_PPT_ROUTER.include_router(OPENAI_ROUTER)
API_V1_PPT_ROUTER.include_router(ANTHROPIC_ROUTER)
API_V1_PPT_ROUTER.include_router(GOOGLE_ROUTER)
API_V1_PPT_ROUTER.include_router(PPTX_FONTS_ROUTER)