forge/backend/app/services/markdown_tools.py
DJP 3f88af3258 feat: Add Forge Document mode to Markdown Converter
- Backend:
  - Added 'forge' output format support (maps to HTML with Forge theme).
  - Implemented 'forge' theme with Montserrat font and white-paper styling.
  - Fixed 'Plain Text' mode not returning output.
  - Added fallback 'output' return when markdown library is missing.
- Frontend:
  - Added 'Forge Document' as the default output format.
  - Implemented 'Copy Formatted' button for rich text clipboard support (Word/Excel compatible).
  - Switched to single-column layout for better document visibility.
  - Used iframe for document preview to isolate styles and prevent layout issues.
2025-12-11 16:08:24 -05:00

670 lines
20 KiB
Python

"""Markdown & Mermaid Tools Service
Text processing utilities for Markdown and Mermaid diagram generation.
Features:
- Markdown to HTML conversion
- Markdown to PDF export
- Mermaid diagram generation (flowcharts, sequence diagrams, etc.)
- AI-powered content generation
- Template support
Mermaid Diagram Types:
- flowchart: Process flows and decision trees
- sequence: Interaction sequences between actors
- class: UML class diagrams
- state: State machine diagrams
- er: Entity relationship diagrams
- journey: User journey mapping
- gantt: Project timelines
- pie: Pie charts
- mindmap: Mind maps and concept trees
- timeline: Historical timelines
- quadrant: Quadrant charts
- gitgraph: Git branch visualization
"""
import httpx
import os
from uuid import uuid4
from datetime import datetime
from typing import Optional, Dict, Any, List
from app.database import SessionLocal
from app.models.job import Job
from app.models.asset import Asset
from app.config import settings
# Mermaid diagram templates
MERMAID_TEMPLATES = {
"flowchart": {
"name": "Flowchart",
"description": "Process flows and decision trees",
"template": """flowchart TD
A[Start] --> B{Decision}
B -->|Yes| C[Process 1]
B -->|No| D[Process 2]
C --> E[End]
D --> E""",
"directions": ["TD", "TB", "BT", "LR", "RL"]
},
"sequence": {
"name": "Sequence Diagram",
"description": "Interaction sequences between actors",
"template": """sequenceDiagram
participant A as Actor
participant B as System
A->>B: Request
B-->>A: Response
A->>B: Action
B-->>A: Result"""
},
"class": {
"name": "Class Diagram",
"description": "UML class diagrams",
"template": """classDiagram
class Animal {
+String name
+int age
+makeSound()
}
class Dog {
+String breed
+bark()
}
Animal <|-- Dog"""
},
"state": {
"name": "State Diagram",
"description": "State machine diagrams",
"template": """stateDiagram-v2
[*] --> Idle
Idle --> Processing : start
Processing --> Completed : success
Processing --> Failed : error
Completed --> [*]
Failed --> Idle : retry"""
},
"er": {
"name": "ER Diagram",
"description": "Entity relationship diagrams",
"template": """erDiagram
CUSTOMER ||--o{ ORDER : places
ORDER ||--|{ LINE-ITEM : contains
PRODUCT ||--o{ LINE-ITEM : includes"""
},
"journey": {
"name": "User Journey",
"description": "User journey mapping",
"template": """journey
title User Journey
section Sign Up
Visit site: 5: User
Create account: 3: User
Verify email: 4: User
section First Use
Login: 5: User
Explore features: 4: User
Complete task: 5: User"""
},
"gantt": {
"name": "Gantt Chart",
"description": "Project timelines",
"template": """gantt
title Project Timeline
dateFormat YYYY-MM-DD
section Phase 1
Research: 2024-01-01, 30d
Design: 2024-02-01, 20d
section Phase 2
Development: 2024-02-21, 60d
Testing: 2024-04-22, 30d"""
},
"pie": {
"name": "Pie Chart",
"description": "Pie charts for data visualization",
"template": """pie title Distribution
"Category A" : 40
"Category B" : 30
"Category C" : 20
"Category D" : 10"""
},
"mindmap": {
"name": "Mind Map",
"description": "Mind maps and concept trees",
"template": """mindmap
root((Central Idea))
Topic 1
Subtopic 1.1
Subtopic 1.2
Topic 2
Subtopic 2.1
Subtopic 2.2
Topic 3"""
},
"timeline": {
"name": "Timeline",
"description": "Historical timelines",
"template": """timeline
title History of Events
2020 : Event 1
: Description
2021 : Event 2
: Description
2022 : Event 3"""
},
"gitgraph": {
"name": "Git Graph",
"description": "Git branch visualization",
"template": """gitGraph
commit
branch develop
checkout develop
commit
commit
checkout main
merge develop
commit"""
}
}
async def render_mermaid(
code: str,
output_format: str = "svg",
theme: str = "default",
background: str = "transparent"
) -> Dict[str, Any]:
"""Render Mermaid diagram to image
Args:
code: Mermaid diagram code
output_format: 'svg', 'png', 'pdf'
theme: 'default', 'dark', 'forest', 'neutral'
background: 'transparent', 'white', or hex color
Returns:
Dictionary with rendered image data or URL
"""
try:
# Use mermaid.ink for rendering (free API)
import base64
import urllib.parse
# Encode the mermaid code
encoded = base64.urlsafe_b64encode(code.encode()).decode()
# Build URL
base_url = "https://mermaid.ink"
if output_format == "svg":
url = f"{base_url}/svg/{encoded}"
else:
url = f"{base_url}/img/{encoded}"
# Add theme parameter
params = []
if theme == "forge":
# Inject Forge theme directive if not present
forge_theme_config = """%%{
init: {
'theme': 'base',
'themeVariables': {
'primaryColor': '#FFC407',
'primaryTextColor': '#000000',
'primaryBorderColor': '#FFC407',
'lineColor': '#FFC407',
'secondaryColor': '#ffffff',
'tertiaryColor': '#ffffff'
}
}
}%%
"""
if not code.strip().startswith("%%{"):
code = forge_theme_config + code
# Re-encode with new code
encoded = base64.urlsafe_b64encode(code.encode()).decode()
# Re-build URL
if output_format == "svg":
url = f"{base_url}/svg/{encoded}"
else:
url = f"{base_url}/img/{encoded}"
# Don't pass theme param, rely on directive
elif theme != "default":
params.append(f"theme={theme}")
if background != "transparent":
params.append(f"bgColor={background.replace('#', '')}")
if params:
url += "?" + "&".join(params)
async with httpx.AsyncClient(timeout=30) as client:
response = await client.get(url)
response.raise_for_status()
return {
"success": True,
"data": base64.b64encode(response.content).decode(),
"mime_type": "image/svg+xml" if output_format == "svg" else "image/png",
"url": url,
"image_url": url # Frontend expects image_url
}
except Exception as e:
return {
"success": False,
"error": str(e),
"code": code
}
async def generate_mermaid_with_ai(
description: str,
diagram_type: str = "flowchart",
style: str = "detailed"
) -> Dict[str, Any]:
"""Generate Mermaid diagram code using AI
Args:
description: Natural language description of the diagram
diagram_type: Type of diagram (flowchart, sequence, class, etc.)
style: 'simple', 'detailed', 'complex'
Returns:
Dictionary with generated Mermaid code
"""
template = MERMAID_TEMPLATES.get(diagram_type, MERMAID_TEMPLATES["flowchart"])
# Try Gemini first, then OpenAI
if settings.google_api_key:
return await _generate_mermaid_gemini(description, diagram_type, template, style)
elif settings.openai_api_key:
return await _generate_mermaid_openai(description, diagram_type, template, style)
else:
# Return template as fallback
return {
"success": True,
"code": template["template"],
"diagram_type": diagram_type,
"note": "API keys not configured - returning template"
}
async def _generate_mermaid_gemini(
description: str,
diagram_type: str,
template: dict,
style: str
) -> Dict[str, Any]:
"""Generate Mermaid using Gemini"""
try:
import google.generativeai as genai
genai.configure(api_key=settings.google_api_key)
model = genai.GenerativeModel("gemini-2.0-flash-exp")
prompt = f"""Generate a Mermaid {template['name']} diagram based on this description:
"{description}"
Requirements:
- Use valid Mermaid syntax for {diagram_type}
- Style: {style} (simple=few nodes, detailed=moderate, complex=comprehensive)
- Return ONLY the Mermaid code, no explanations
- Start with the diagram type declaration
Example format:
{template['template']}
Generate the diagram code:"""
response = model.generate_content(prompt)
code = response.text.strip()
# Clean up response
if "```mermaid" in code:
code = code.split("```mermaid")[1].split("```")[0].strip()
elif "```" in code:
code = code.split("```")[1].split("```")[0].strip()
return {
"success": True,
"code": code,
"diagram_type": diagram_type,
"description": description
}
except Exception as e:
return {
"success": False,
"error": str(e),
"code": template["template"]
}
async def _generate_mermaid_openai(
description: str,
diagram_type: str,
template: dict,
style: str
) -> Dict[str, Any]:
"""Generate Mermaid using OpenAI"""
try:
async with httpx.AsyncClient(timeout=60) as client:
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {settings.openai_api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o-mini",
"messages": [
{
"role": "system",
"content": f"You are a Mermaid diagram expert. Generate valid Mermaid {diagram_type} diagrams. Return ONLY the code, no explanations."
},
{
"role": "user",
"content": f"Create a {style} {template['name']} diagram for: {description}"
}
],
"temperature": 0.7,
"max_tokens": 1000
}
)
response.raise_for_status()
data = response.json()
code = data["choices"][0]["message"]["content"].strip()
# Clean up
if "```mermaid" in code:
code = code.split("```mermaid")[1].split("```")[0].strip()
elif "```" in code:
code = code.split("```")[1].split("```")[0].strip()
return {
"success": True,
"code": code,
"diagram_type": diagram_type,
"description": description
}
except Exception as e:
return {
"success": False,
"error": str(e),
"code": template["template"]
}
async def convert_markdown(
content: str,
output_format: str = "html",
theme: str = "github"
) -> Dict[str, Any]:
"""Convert Markdown to various formats
Args:
content: Markdown content
output_format: 'html', 'plain', 'json' (AST)
theme: CSS theme for HTML output
Returns:
Dictionary with converted content
"""
try:
if output_format == "forge":
output_format = "html"
theme = "forge"
import markdown
from markdown.extensions import tables, fenced_code, toc
if output_format == "html":
# Convert to HTML with extensions
md = markdown.Markdown(extensions=[
'tables',
'fenced_code',
'toc',
'nl2br',
'sane_lists'
])
html = md.convert(content)
# Define styles based on theme
extra_head = ""
font_family = "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif"
if theme == "forge":
extra_head = '<link href="https://fonts.googleapis.com/css2?family=Montserrat:wght@400;700&display=swap" rel="stylesheet">'
font_family = "'Montserrat', sans-serif"
styled_html = f"""<!DOCTYPE html>
<html>
<head>
{extra_head}
<style>
body {{ font-family: {font_family}; line-height: 1.6; max-width: 800px; margin: 0 auto; padding: 20px; }}
code {{ background: #f4f4f4; padding: 2px 6px; border-radius: 3px; }}
pre {{ background: #f4f4f4; padding: 16px; border-radius: 6px; overflow-x: auto; }}
table {{ border-collapse: collapse; width: 100%; }}
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
th {{ background: #f4f4f4; }}
blockquote {{ border-left: 4px solid #ddd; margin: 0; padding-left: 16px; color: #666; }}
</style>
</head>
<body>
{html}
</body>
</html>"""
return {
"success": True,
"content": styled_html,
"output": styled_html, # Frontend expects output
"format": "html",
"toc": md.toc if hasattr(md, 'toc') else None
}
elif output_format == "plain":
# Strip markdown to plain text
import re
# Remove images
text = re.sub(r'!\[.*?\]\(.*?\)', '', content)
# Remove links but keep text
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
# Remove formatting
text = re.sub(r'[*_~`#>-]', '', text)
# Clean up whitespace
text = re.sub(r'\n{3,}', '\n\n', text)
return {
"success": True,
"output": text.strip(),
"content": text.strip(),
"format": "plain"
}
else:
return {
"success": False,
"error": f"Unsupported format: {output_format}"
}
except ImportError:
# Fallback without markdown library
return {
"success": True,
"output": content,
"content": content,
"format": output_format,
"note": "markdown library not installed"
}
except Exception as e:
return {
"success": False,
"error": str(e)
}
async def generate_markdown_with_ai(
topic: str,
content_type: str = "article",
length: str = "medium",
include_toc: bool = True
) -> Dict[str, Any]:
"""Generate Markdown content using AI
Args:
topic: Topic or subject to write about
content_type: 'article', 'documentation', 'readme', 'tutorial', 'report'
length: 'short', 'medium', 'long'
include_toc: Include table of contents
Returns:
Dictionary with generated markdown content
"""
length_guide = {
"short": "2-3 paragraphs, ~200 words",
"medium": "5-7 paragraphs, ~500 words",
"long": "10+ paragraphs, ~1000 words"
}
type_guide = {
"article": "engaging article with introduction, body, and conclusion",
"documentation": "technical documentation with clear sections and code examples",
"readme": "GitHub README with badges, installation, usage, and contributing sections",
"tutorial": "step-by-step tutorial with numbered instructions and examples",
"report": "professional report with executive summary, findings, and recommendations"
}
if settings.google_api_key:
return await _generate_markdown_gemini(topic, content_type, type_guide, length_guide.get(length, length_guide["medium"]), include_toc)
elif settings.openai_api_key:
return await _generate_markdown_openai(topic, content_type, type_guide, length_guide.get(length, length_guide["medium"]), include_toc)
else:
return {
"success": False,
"error": "No API keys configured",
"content": f"# {topic}\n\nContent generation requires API keys."
}
async def _generate_markdown_gemini(
topic: str,
content_type: str,
type_guide: dict,
length_guide: str,
include_toc: bool
) -> Dict[str, Any]:
"""Generate markdown using Gemini"""
try:
import google.generativeai as genai
genai.configure(api_key=settings.google_api_key)
model = genai.GenerativeModel("gemini-2.0-flash-exp")
prompt = f"""Write a {type_guide.get(content_type, 'article')} about:
"{topic}"
Requirements:
- Format: Proper Markdown with headers, lists, code blocks where appropriate
- Length: {length_guide}
- {"Include a table of contents at the start" if include_toc else "No table of contents needed"}
- Use appropriate markdown features (bold, italic, links, code, blockquotes)
- Make it informative and well-structured
Generate the markdown content:"""
response = model.generate_content(prompt)
content = response.text.strip()
return {
"success": True,
"content": content,
"content_type": content_type,
"topic": topic
}
except Exception as e:
return {
"success": False,
"error": str(e)
}
async def _generate_markdown_openai(
topic: str,
content_type: str,
type_guide: dict,
length_guide: str,
include_toc: bool
) -> Dict[str, Any]:
"""Generate markdown using OpenAI"""
try:
async with httpx.AsyncClient(timeout=60) as client:
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {settings.openai_api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o-mini",
"messages": [
{
"role": "system",
"content": f"You are a technical writer. Generate well-formatted Markdown content. {type_guide.get(content_type, '')}"
},
{
"role": "user",
"content": f"Write about '{topic}'. Length: {length_guide}. {'Include TOC.' if include_toc else ''}"
}
],
"temperature": 0.7,
"max_tokens": 2000
}
)
response.raise_for_status()
data = response.json()
content = data["choices"][0]["message"]["content"].strip()
return {
"success": True,
"content": content,
"content_type": content_type,
"topic": topic
}
except Exception as e:
return {
"success": False,
"error": str(e)
}
def get_mermaid_templates() -> List[Dict[str, str]]:
"""Get available Mermaid diagram templates"""
return [
{
"id": key,
"name": config["name"],
"description": config["description"],
"template": config["template"]
}
for key, config in MERMAID_TEMPLATES.items()
]
def get_mermaid_template(diagram_type: str) -> Optional[Dict[str, Any]]:
"""Get a specific Mermaid template"""
template = MERMAID_TEMPLATES.get(diagram_type)
if template:
return {
"id": diagram_type,
**template
}
return None