forge/backend/app/services/markdown_tools.py

"""Markdown & Mermaid Tools Service

Text processing utilities for Markdown and Mermaid diagram generation.

Features:
- Markdown to HTML conversion
- Markdown to PDF export
- Mermaid diagram generation (flowcharts, sequence diagrams, etc.)
- AI-powered content generation
- Template support

Mermaid Diagram Types:
- flowchart: Process flows and decision trees
- sequence: Interaction sequences between actors
- class: UML class diagrams
- state: State machine diagrams
- er: Entity relationship diagrams
- journey: User journey mapping
- gantt: Project timelines
- pie: Pie charts
- mindmap: Mind maps and concept trees
- timeline: Historical timelines
- quadrant: Quadrant charts
- gitgraph: Git branch visualization
"""
import httpx
import os
from uuid import uuid4
from datetime import datetime
from typing import Optional, Dict, Any, List

from app.database import SessionLocal
from app.models.job import Job
from app.models.asset import Asset
from app.config import settings


# Mermaid diagram templates
MERMAID_TEMPLATES = {
    "flowchart": {
        "name": "Flowchart",
        "description": "Process flows and decision trees",
        "template": """flowchart TD
    A[Start] --> B{Decision}
    B -->|Yes| C[Process 1]
    B -->|No| D[Process 2]
    C --> E[End]
    D --> E""",
        "directions": ["TD", "TB", "BT", "LR", "RL"]
    },
    "sequence": {
        "name": "Sequence Diagram",
        "description": "Interaction sequences between actors",
        "template": """sequenceDiagram
    participant A as Actor
    participant B as System
    A->>B: Request
    B-->>A: Response
    A->>B: Action
    B-->>A: Result"""
    },
    "class": {
        "name": "Class Diagram",
        "description": "UML class diagrams",
        "template": """classDiagram
    class Animal {
        +String name
        +int age
        +makeSound()
    }
    class Dog {
        +String breed
        +bark()
    }
    Animal <|-- Dog"""
    },
    "state": {
        "name": "State Diagram",
        "description": "State machine diagrams",
        "template": """stateDiagram-v2
    [*] --> Idle
    Idle --> Processing : start
    Processing --> Completed : success
    Processing --> Failed : error
    Completed --> [*]
    Failed --> Idle : retry"""
    },
    "er": {
        "name": "ER Diagram",
        "description": "Entity relationship diagrams",
        "template": """erDiagram
    CUSTOMER ||--o{ ORDER : places
    ORDER ||--|{ LINE-ITEM : contains
    PRODUCT ||--o{ LINE-ITEM : includes"""
    },
    "journey": {
        "name": "User Journey",
        "description": "User journey mapping",
        "template": """journey
    title User Journey
    section Sign Up
        Visit site: 5: User
        Create account: 3: User
        Verify email: 4: User
    section First Use
        Login: 5: User
        Explore features: 4: User
        Complete task: 5: User"""
    },
    "gantt": {
        "name": "Gantt Chart",
        "description": "Project timelines",
        "template": """gantt
    title Project Timeline
    dateFormat YYYY-MM-DD
    section Phase 1
        Research: 2024-01-01, 30d
        Design: 2024-02-01, 20d
    section Phase 2
        Development: 2024-02-21, 60d
        Testing: 2024-04-22, 30d"""
    },
    "pie": {
        "name": "Pie Chart",
        "description": "Pie charts for data visualization",
        "template": """pie title Distribution
    "Category A" : 40
    "Category B" : 30
    "Category C" : 20
    "Category D" : 10"""
    },
    "mindmap": {
        "name": "Mind Map",
        "description": "Mind maps and concept trees",
        "template": """mindmap
    root((Central Idea))
        Topic 1
            Subtopic 1.1
            Subtopic 1.2
        Topic 2
            Subtopic 2.1
            Subtopic 2.2
        Topic 3"""
    },
    "timeline": {
        "name": "Timeline",
        "description": "Historical timelines",
        "template": """timeline
    title History of Events
    2020 : Event 1
         : Description
    2021 : Event 2
         : Description
    2022 : Event 3"""
    },
    "gitgraph": {
        "name": "Git Graph",
        "description": "Git branch visualization",
        "template": """gitGraph
    commit
    branch develop
    checkout develop
    commit
    commit
    checkout main
    merge develop
    commit"""
    }
}


async def render_mermaid(
    code: str,
    output_format: str = "svg",
    theme: str = "default",
    background: str = "transparent"
) -> Dict[str, Any]:
    """Render Mermaid diagram to image

    Args:
        code: Mermaid diagram code
        output_format: 'svg', 'png', 'pdf'
        theme: 'default', 'dark', 'forest', 'neutral'
        background: 'transparent', 'white', or hex color

    Returns:
        Dictionary with rendered image data or URL
    """
    try:
        # Use mermaid.ink for rendering (free API)
        import base64
        import urllib.parse

        # Encode the mermaid code
        encoded = base64.urlsafe_b64encode(code.encode()).decode()

        # Build URL
        base_url = "https://mermaid.ink"
        if output_format == "svg":
            url = f"{base_url}/svg/{encoded}"
        else:
            url = f"{base_url}/img/{encoded}"

        # Add theme parameter
        params = []
        if theme == "forge":
            # Inject Forge theme directive if not present
            forge_theme_config = """%%{
  init: {
    'theme': 'base',
    'themeVariables': {
      'primaryColor': '#FFC407',
      'primaryTextColor': '#000000',
      'primaryBorderColor': '#FFC407',
      'lineColor': '#FFC407',
      'secondaryColor': '#ffffff',
      'tertiaryColor': '#ffffff'
    }
  }
}%%
"""
            if not code.strip().startswith("%%{"):
                 code = forge_theme_config + code
                 # Re-encode with new code
                 encoded = base64.urlsafe_b64encode(code.encode()).decode()
                 # Re-build URL
                 if output_format == "svg":
                    url = f"{base_url}/svg/{encoded}"
                 else:
                    url = f"{base_url}/img/{encoded}"

            # Don't pass theme param, rely on directive
        elif theme != "default":
            params.append(f"theme={theme}")

        if background != "transparent":
            params.append(f"bgColor={background.replace('#', '')}")

        if params:
            url += "?" + "&".join(params)

        async with httpx.AsyncClient(timeout=30) as client:
            response = await client.get(url)
            response.raise_for_status()

            return {
                "success": True,
                "data": base64.b64encode(response.content).decode(),
                "mime_type": "image/svg+xml" if output_format == "svg" else "image/png",
                "url": url,
                "image_url": url # Frontend expects image_url
            }

    except Exception as e:
        return {
            "success": False,
            "error": str(e),
            "code": code
        }


async def generate_mermaid_with_ai(
    description: str,
    diagram_type: str = "flowchart",
    style: str = "detailed"
) -> Dict[str, Any]:
    """Generate Mermaid diagram code using AI

    Args:
        description: Natural language description of the diagram
        diagram_type: Type of diagram (flowchart, sequence, class, etc.)
        style: 'simple', 'detailed', 'complex'

    Returns:
        Dictionary with generated Mermaid code
    """
    template = MERMAID_TEMPLATES.get(diagram_type, MERMAID_TEMPLATES["flowchart"])

    # Try Gemini first, then OpenAI
    if settings.google_api_key:
        return await _generate_mermaid_gemini(description, diagram_type, template, style)
    elif settings.openai_api_key:
        return await _generate_mermaid_openai(description, diagram_type, template, style)
    else:
        # Return template as fallback
        return {
            "success": True,
            "code": template["template"],
            "diagram_type": diagram_type,
            "note": "API keys not configured - returning template"
        }


async def _generate_mermaid_gemini(
    description: str,
    diagram_type: str,
    template: dict,
    style: str
) -> Dict[str, Any]:
    """Generate Mermaid using Gemini"""
    try:
        import google.generativeai as genai
        genai.configure(api_key=settings.google_api_key)
        model = genai.GenerativeModel("gemini-2.0-flash-exp")

        prompt = f"""Generate a Mermaid {template['name']} diagram based on this description:

"{description}"

Requirements:
- Use valid Mermaid syntax for {diagram_type}
- Style: {style} (simple=few nodes, detailed=moderate, complex=comprehensive)
- Return ONLY the Mermaid code, no explanations
- Start with the diagram type declaration

Example format:
{template['template']}

Generate the diagram code:"""

        response = model.generate_content(prompt)
        code = response.text.strip()

        # Clean up response
        if "```mermaid" in code:
            code = code.split("```mermaid")[1].split("```")[0].strip()
        elif "```" in code:
            code = code.split("```")[1].split("```")[0].strip()

        return {
            "success": True,
            "code": code,
            "diagram_type": diagram_type,
            "description": description
        }

    except Exception as e:
        return {
            "success": False,
            "error": str(e),
            "code": template["template"]
        }


async def _generate_mermaid_openai(
    description: str,
    diagram_type: str,
    template: dict,
    style: str
) -> Dict[str, Any]:
    """Generate Mermaid using OpenAI"""
    try:
        async with httpx.AsyncClient(timeout=60) as client:
            response = await client.post(
                "https://api.openai.com/v1/chat/completions",
                headers={
                    "Authorization": f"Bearer {settings.openai_api_key}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": "gpt-4o-mini",
                    "messages": [
                        {
                            "role": "system",
                            "content": f"You are a Mermaid diagram expert. Generate valid Mermaid {diagram_type} diagrams. Return ONLY the code, no explanations."
                        },
                        {
                            "role": "user",
                            "content": f"Create a {style} {template['name']} diagram for: {description}"
                        }
                    ],
                    "temperature": 0.7,
                    "max_tokens": 1000
                }
            )
            response.raise_for_status()
            data = response.json()

            code = data["choices"][0]["message"]["content"].strip()

            # Clean up
            if "```mermaid" in code:
                code = code.split("```mermaid")[1].split("```")[0].strip()
            elif "```" in code:
                code = code.split("```")[1].split("```")[0].strip()

            return {
                "success": True,
                "code": code,
                "diagram_type": diagram_type,
                "description": description
            }

    except Exception as e:
        return {
            "success": False,
            "error": str(e),
            "code": template["template"]
        }


async def convert_markdown(
    content: str,
    output_format: str = "html",
    theme: str = "github"
) -> Dict[str, Any]:
    """Convert Markdown to various formats

    Args:
        content: Markdown content
        output_format: 'html', 'plain', 'json' (AST)
        theme: CSS theme for HTML output

    Returns:
        Dictionary with converted content
    """
    try:
        if output_format == "forge":
            output_format = "html"
            theme = "forge"

        import markdown
        from markdown.extensions import tables, fenced_code, toc

        if output_format == "html":
            # Convert to HTML with extensions
            md = markdown.Markdown(extensions=[
                'tables',
                'fenced_code',
                'toc',
                'nl2br',
                'sane_lists'
            ])
            html = md.convert(content)

            # Define styles based on theme
            extra_head = ""
            font_family = "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif"

            if theme == "forge":
                extra_head = '<link href="https://fonts.googleapis.com/css2?family=Montserrat:wght@400;700&display=swap" rel="stylesheet">'
                font_family = "'Montserrat', sans-serif"

            styled_html = f"""<!DOCTYPE html>
<html>
<head>
{extra_head}
<style>
body {{ font-family: {font_family}; line-height: 1.6; max-width: 800px; margin: 0 auto; padding: 20px; }}
code {{ background: #f4f4f4; padding: 2px 6px; border-radius: 3px; }}
pre {{ background: #f4f4f4; padding: 16px; border-radius: 6px; overflow-x: auto; }}
table {{ border-collapse: collapse; width: 100%; }}
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
th {{ background: #f4f4f4; }}
blockquote {{ border-left: 4px solid #ddd; margin: 0; padding-left: 16px; color: #666; }}
</style>
</head>
<body>
{html}
</body>
</html>"""

            return {
                "success": True,
                "content": styled_html,
                "output": styled_html, # Frontend expects output
                "format": "html",
                "toc": md.toc if hasattr(md, 'toc') else None
            }

        elif output_format == "plain":
            # Strip markdown to plain text
            import re
            # Remove images
            text = re.sub(r'!\[.*?\]\(.*?\)', '', content)
            # Remove links but keep text
            text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
            # Remove formatting
            text = re.sub(r'[*_~`#>-]', '', text)
            # Clean up whitespace
            text = re.sub(r'\n{3,}', '\n\n', text)

            return {
                "success": True,
                "output": text.strip(),
                "content": text.strip(),
                "format": "plain"
            }

        else:
            return {
                "success": False,
                "error": f"Unsupported format: {output_format}"
            }

    except ImportError:
        # Fallback without markdown library
        return {
            "success": True,
            "output": content,
            "content": content,
            "format": output_format,
            "note": "markdown library not installed"
        }
    except Exception as e:
        return {
            "success": False,
            "error": str(e)
        }


async def generate_markdown_with_ai(
    topic: str,
    content_type: str = "article",
    length: str = "medium",
    include_toc: bool = True
) -> Dict[str, Any]:
    """Generate Markdown content using AI

    Args:
        topic: Topic or subject to write about
        content_type: 'article', 'documentation', 'readme', 'tutorial', 'report'
        length: 'short', 'medium', 'long'
        include_toc: Include table of contents

    Returns:
        Dictionary with generated markdown content
    """
    length_guide = {
        "short": "2-3 paragraphs, ~200 words",
        "medium": "5-7 paragraphs, ~500 words",
        "long": "10+ paragraphs, ~1000 words"
    }

    type_guide = {
        "article": "engaging article with introduction, body, and conclusion",
        "documentation": "technical documentation with clear sections and code examples",
        "readme": "GitHub README with badges, installation, usage, and contributing sections",
        "tutorial": "step-by-step tutorial with numbered instructions and examples",
        "report": "professional report with executive summary, findings, and recommendations"
    }

    if settings.google_api_key:
        return await _generate_markdown_gemini(topic, content_type, type_guide, length_guide.get(length, length_guide["medium"]), include_toc)
    elif settings.openai_api_key:
        return await _generate_markdown_openai(topic, content_type, type_guide, length_guide.get(length, length_guide["medium"]), include_toc)
    else:
        return {
            "success": False,
            "error": "No API keys configured",
            "content": f"# {topic}\n\nContent generation requires API keys."
        }


async def _generate_markdown_gemini(
    topic: str,
    content_type: str,
    type_guide: dict,
    length_guide: str,
    include_toc: bool
) -> Dict[str, Any]:
    """Generate markdown using Gemini"""
    try:
        import google.generativeai as genai
        genai.configure(api_key=settings.google_api_key)
        model = genai.GenerativeModel("gemini-2.0-flash-exp")

        prompt = f"""Write a {type_guide.get(content_type, 'article')} about:

"{topic}"

Requirements:
- Format: Proper Markdown with headers, lists, code blocks where appropriate
- Length: {length_guide}
- {"Include a table of contents at the start" if include_toc else "No table of contents needed"}
- Use appropriate markdown features (bold, italic, links, code, blockquotes)
- Make it informative and well-structured

Generate the markdown content:"""

        response = model.generate_content(prompt)
        content = response.text.strip()

        return {
            "success": True,
            "content": content,
            "content_type": content_type,
            "topic": topic
        }

    except Exception as e:
        return {
            "success": False,
            "error": str(e)
        }


async def _generate_markdown_openai(
    topic: str,
    content_type: str,
    type_guide: dict,
    length_guide: str,
    include_toc: bool
) -> Dict[str, Any]:
    """Generate markdown using OpenAI"""
    try:
        async with httpx.AsyncClient(timeout=60) as client:
            response = await client.post(
                "https://api.openai.com/v1/chat/completions",
                headers={
                    "Authorization": f"Bearer {settings.openai_api_key}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": "gpt-4o-mini",
                    "messages": [
                        {
                            "role": "system",
                            "content": f"You are a technical writer. Generate well-formatted Markdown content. {type_guide.get(content_type, '')}"
                        },
                        {
                            "role": "user",
                            "content": f"Write about '{topic}'. Length: {length_guide}. {'Include TOC.' if include_toc else ''}"
                        }
                    ],
                    "temperature": 0.7,
                    "max_tokens": 2000
                }
            )
            response.raise_for_status()
            data = response.json()

            content = data["choices"][0]["message"]["content"].strip()

            return {
                "success": True,
                "content": content,
                "content_type": content_type,
                "topic": topic
            }

    except Exception as e:
        return {
            "success": False,
            "error": str(e)
        }


def get_mermaid_templates() -> List[Dict[str, str]]:
    """Get available Mermaid diagram templates"""
    return [
        {
            "id": key,
            "name": config["name"],
            "description": config["description"],
            "template": config["template"]
        }
        for key, config in MERMAID_TEMPLATES.items()
    ]


def get_mermaid_template(diagram_type: str) -> Optional[Dict[str, Any]]:
    """Get a specific Mermaid template"""
    template = MERMAID_TEMPLATES.get(diagram_type)
    if template:
        return {
            "id": diagram_type,
            **template
        }
    return None