ppt-tool/backend/services/template_codegen_service.py
Vadym Samoilenko 587f5ef6e1 Add 3 sandbox features: diagrams, mermaid, and template code-gen
Feature 1 — PPTX from Template (Code-Gen):
- backend/services/template_codegen_service.py: analyze PPTX, strip slides,
  Gemini code-gen + subprocess exec (60s timeout, auto-retry on error)
- backend/api/v1/ppt/endpoints/template_codegen.py: POST /template-codegen/generate
  (multipart: presentation_id + template_file + custom_prompt, rate-limited 3/min)
- frontend/components/TemplateCodegenExport.tsx: drag-drop modal
- Header.tsx: "Export from Template" option in export dropdown

Feature 2 — Diagrams in Slides:
- backend/models/diagram_data.py: DiagramData / FlowStep / BarChartItem models
- generate_slide_content.py: optional __diagram__ + __mermaid__ fields in LLM schema
- DiagramRenderer.tsx: pure React flowchart / bar chart / pie chart (no deps)
- SlideRenderer.tsx: chart elements render DiagramRenderer/MermaidRenderer;
  floating overlay fallback when no chart element exists in JSON layout
- V1ContentRender.tsx: diagram/mermaid overlay on built-in template slides
- generate-pptx/route.ts: addDiagramToSlide() — bar/pie via pptxgenjs addChart(),
  flowchart via addShape()+addText(), mermaid via /api/mermaid-to-image

Feature 3 — Mermaid Diagrams:
- MermaidRenderer.tsx: dynamic import mermaid@11, useEffect render, error fallback
- frontend/app/api/mermaid-to-image/route.ts: Puppeteer renders Mermaid to PNG → base64

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-19 18:47:31 +00:00

350 lines
12 KiB
Python

import asyncio
import json
import os
import subprocess
import sys
import tempfile
import uuid
import zipfile
from typing import Optional
from google import genai
from google.genai.types import GenerateContentConfig
from services.database import async_session_maker
from models.sql.slide import SlideModel
from sqlmodel import select
from utils.get_env import get_google_api_key_env, get_google_model_env
def _get_gemini_client():
key = get_google_api_key_env()
if not key:
raise ValueError("GOOGLE_API_KEY is required for template codegen")
return genai.Client()
def _get_model() -> str:
model = get_google_model_env() or "gemini-2.0-flash"
# Strip "models/" prefix if present
if model.startswith("models/"):
model = model[len("models/"):]
return model
def analyze_pptx_template(pptx_path: str) -> dict:
"""
Extract layout names, placeholder types, fonts, and colors from a PPTX template.
"""
try:
from pptx import Presentation
from pptx.util import Pt
prs = Presentation(pptx_path)
result = {
"slide_width_emu": prs.slide_width,
"slide_height_emu": prs.slide_height,
"slide_layouts": [],
"theme_colors": [],
"theme_fonts": [],
}
# Extract slide layouts
for layout in prs.slide_layouts:
layout_info = {
"name": layout.name,
"placeholders": [],
}
for ph in layout.placeholders:
ph_info = {
"idx": ph.placeholder_format.idx,
"type": str(ph.placeholder_format.type),
"name": ph.name,
}
try:
ph_info["left_emu"] = ph.left
ph_info["top_emu"] = ph.top
ph_info["width_emu"] = ph.width
ph_info["height_emu"] = ph.height
except Exception:
pass
layout_info["placeholders"].append(ph_info)
result["slide_layouts"].append(layout_info)
# Extract theme colors
try:
theme_element = prs.core_properties
except Exception:
pass
# Extract slides if any (template slides)
result["existing_slides"] = len(prs.slides)
return result
except ImportError:
raise RuntimeError("python-pptx is required for template analysis. Install with: pip install python-pptx")
except Exception as e:
raise RuntimeError(f"Failed to analyze PPTX template: {e}")
def clear_template_slides(pptx_path: str, output_path: str) -> None:
"""
Strip all slides from PPTX zip while preserving masters, layouts, and theme.
"""
import shutil
shutil.copy2(pptx_path, output_path)
with zipfile.ZipFile(output_path, 'r') as zin:
names = zin.namelist()
# Identify slide files to remove
slide_files = [n for n in names if n.startswith("ppt/slides/slide") and n.endswith(".xml")]
slide_rels = [n for n in names if n.startswith("ppt/slides/_rels/slide") and n.endswith(".rels")]
to_remove = set(slide_files + slide_rels)
# Rewrite zip without slide files
tmp_path = output_path + ".tmp"
with zipfile.ZipFile(output_path, 'r') as zin:
with zipfile.ZipFile(tmp_path, 'w', zipfile.ZIP_DEFLATED) as zout:
for item in zin.infolist():
if item.filename in to_remove:
continue
# Fix [Content_Types].xml to remove slide references
if item.filename == "[Content_Types].xml":
content = zin.read(item.filename).decode("utf-8")
for sf in slide_files:
# Remove Override for each slide
part_name = "/" + sf
import re
content = re.sub(
r'<Override\s+PartName="' + re.escape(part_name) + r'"[^/]*/?>',
'',
content
)
# Remove from ppt/_rels/presentation.xml.rels
zout.writestr(item, content.encode("utf-8"))
elif item.filename == "ppt/_rels/presentation.xml.rels":
content = zin.read(item.filename).decode("utf-8")
import re
# Remove Relationship entries pointing to slides/slide*.xml
content = re.sub(
r'<Relationship\s[^>]*Target="slides/slide\d+\.xml"[^/]*/?>',
'',
content
)
zout.writestr(item, content.encode("utf-8"))
elif item.filename == "ppt/presentation.xml":
content = zin.read(item.filename).decode("utf-8")
import re
# Remove <p:sldIdLst> entries
content = re.sub(r'<p:sldId\s[^/]*/>', '', content)
zout.writestr(item, content.encode("utf-8"))
else:
zout.writestr(item, zin.read(item.filename))
import shutil
shutil.move(tmp_path, output_path)
def _build_slide_context(slides: list) -> str:
"""Build a text summary of slide content for the LLM."""
lines = []
for i, slide in enumerate(slides):
lines.append(f"\n## Slide {i + 1}")
content = slide.content or {}
for key, val in content.items():
if key.startswith("__") or not val:
continue
if isinstance(val, list):
lines.append(f" {key}:")
for item in val[:6]:
if isinstance(item, dict):
text = item.get("text") or item.get("title") or item.get("description") or str(item)
lines.append(f" - {text[:200]}")
else:
lines.append(f" - {str(item)[:200]}")
elif isinstance(val, dict):
text = val.get("text") or val.get("title") or str(val)
lines.append(f" {key}: {str(text)[:300]}")
else:
lines.append(f" {key}: {str(val)[:300]}")
return "\n".join(lines)
def _build_codegen_prompt(template_info: dict, slide_context: str, custom_prompt: str = "") -> str:
layout_summary = json.dumps(template_info.get("slide_layouts", [])[:8], indent=2)
return f"""You are a Python expert specializing in python-pptx.
Generate Python code that populates a PowerPoint template with the provided content.
## Template Information
- Slide layouts available: {len(template_info.get('slide_layouts', []))}
- Layout details:
{layout_summary}
## Presentation Content
{slide_context}
## Requirements
1. Use python-pptx to open the template file at `template_path` variable (already defined)
2. Save the output to `output_path` variable (already defined)
3. Add one slide per content slide shown above
4. Match layout names from the template to content type (title slide, content slide, etc.)
5. Set text for all placeholders appropriately
6. Handle font sizes and colors from template — do NOT override them unless necessary
7. If a layout has no matching content, use the first available layout
8. Keep bullet points as separate text runs
{f"## Additional Instructions{chr(10)}{custom_prompt}" if custom_prompt else ""}
## Output Format
Output ONLY valid Python code. No explanations, no markdown code blocks.
The code should start with `from pptx import Presentation` and end with `prs.save(output_path)`.
Variables already defined before your code runs:
- `template_path: str` — path to the cleared template PPTX
- `output_path: str` — path where the result should be saved
"""
async def generate_pptx_from_template(
template_path: str,
presentation_id: str,
custom_prompt: str = "",
output_path: Optional[str] = None,
) -> dict:
"""
Full pipeline: analyze template → load slides → LLM code-gen → execute → return path.
"""
if output_path is None:
tmp_dir = os.environ.get("TEMP_DIRECTORY", tempfile.gettempdir())
output_path = os.path.join(tmp_dir, f"codegen_{uuid.uuid4().hex}.pptx")
# 1. Analyze template
template_info = analyze_pptx_template(template_path)
# 2. Clear slides from template copy
cleared_path = template_path + ".cleared.pptx"
try:
clear_template_slides(template_path, cleared_path)
except Exception as e:
# If clearing fails, use the original template
import shutil
shutil.copy2(template_path, cleared_path)
# 3. Load slides from DB
try:
async with async_session_maker() as session:
result = await session.execute(
select(SlideModel)
.where(SlideModel.presentation == uuid.UUID(presentation_id))
.where(SlideModel.deleted_at == None)
.order_by(SlideModel.index)
)
slides = result.scalars().all()
except Exception as e:
raise RuntimeError(f"Failed to load slides from database: {e}")
if not slides:
raise ValueError("Presentation has no slides")
slide_context = _build_slide_context(list(slides))
prompt = _build_codegen_prompt(template_info, slide_context, custom_prompt)
# 4. Call LLM
client = _get_gemini_client()
model = _get_model()
try:
response = client.models.generate_content(
model=model,
contents=prompt,
config=GenerateContentConfig(
max_output_tokens=8192,
temperature=0.1,
),
)
code = response.text or ""
except Exception as e:
raise RuntimeError(f"LLM code generation failed: {e}")
# Clean up code block markers if present
code = code.strip()
if code.startswith("```python"):
code = code[9:]
if code.startswith("```"):
code = code[3:]
if code.endswith("```"):
code = code[:-3]
code = code.strip()
# 5. Execute generated code
exec_result = _execute_generated_code(code, cleared_path, output_path)
if not exec_result["success"]:
# Retry once with error feedback
retry_prompt = prompt + f"\n\n## Error from previous attempt\nYour code failed with this error:\n{exec_result['error']}\nFix the issue and output corrected code only."
try:
response2 = client.models.generate_content(
model=model,
contents=retry_prompt,
config=GenerateContentConfig(
max_output_tokens=8192,
temperature=0.1,
),
)
code2 = response2.text or ""
code2 = code2.strip()
if code2.startswith("```python"):
code2 = code2[9:]
if code2.startswith("```"):
code2 = code2[3:]
if code2.endswith("```"):
code2 = code2[:-3]
code2 = code2.strip()
except Exception:
raise RuntimeError(f"Code generation failed: {exec_result['error']}")
exec_result2 = _execute_generated_code(code2, cleared_path, output_path)
if not exec_result2["success"]:
raise RuntimeError(f"Code execution failed after retry: {exec_result2['error']}")
# Cleanup temp
try:
os.unlink(cleared_path)
except Exception:
pass
return {"output_path": output_path, "slide_count": len(slides)}
def _execute_generated_code(code: str, template_path: str, output_path: str) -> dict:
"""Execute the LLM-generated python-pptx code in a subprocess."""
# Wrap code with variable definitions
full_code = f"""
template_path = {repr(template_path)}
output_path = {repr(output_path)}
{code}
"""
try:
result = subprocess.run(
[sys.executable, "-c", full_code],
capture_output=True,
text=True,
timeout=60,
)
if result.returncode != 0:
return {"success": False, "error": result.stderr[:2000]}
if not os.path.exists(output_path):
return {"success": False, "error": "Code ran but did not produce output file"}
return {"success": True, "error": None}
except subprocess.TimeoutExpired:
return {"success": False, "error": "Code execution timed out (60s)"}
except Exception as e:
return {"success": False, "error": str(e)}