cohorta/backend/scripts/generate_architecture_doc.py

#!/usr/bin/env python3
"""
Semblance - Technical Architecture Document Generator
Generates a professionally formatted PDF with Mermaid diagrams rendered as PNGs.

Usage:
    cd backend && source venv/bin/activate
    python scripts/generate_architecture_doc.py [output_path]
"""

import sys
import os
import subprocess
import json
import tempfile
import shutil

from reportlab.platypus import (
    BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer, PageBreak,
    NextPageTemplate, Table, TableStyle, Flowable, KeepTogether, Image,
)
from reportlab.platypus.tableofcontents import TableOfContents
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import ParagraphStyle
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY
from reportlab.lib.colors import HexColor, white, black
from reportlab.lib.units import inch, mm
from reportlab.lib import colors

# ============================================================================
# DESIGN SYSTEM (matches user manual)
# ============================================================================

COLORS = {
    "primary": HexColor("#E8A0B4"),
    "primary_dark": HexColor("#9B4D63"),
    "primary_deeper": HexColor("#6B2D3F"),
    "secondary": HexColor("#F5EEF0"),
    "accent": HexColor("#3B82F6"),
    "text_primary": HexColor("#2D1F24"),
    "text_secondary": HexColor("#7A6068"),
    "text_light": HexColor("#FFFFFF"),
    "background": HexColor("#FAFAFA"),
    "border": HexColor("#E8D9DE"),
    "code_bg": HexColor("#F8F4F5"),
    "tip_bg": HexColor("#EFF6FF"),
    "tip_border": HexColor("#3B82F6"),
    "note_bg": HexColor("#FFF7ED"),
    "note_border": HexColor("#F59E0B"),
    "warning_bg": HexColor("#FEF2F2"),
    "warning_border": HexColor("#EF4444"),
    "success_bg": HexColor("#F0FDF4"),
    "success_border": HexColor("#22C55E"),
    "chapter_bg": HexColor("#6B2D3F"),
    "table_header_bg": HexColor("#6B2D3F"),
    "table_alt_row": HexColor("#FAF5F7"),
}

FONTS = {
    "heading": "Helvetica-Bold",
    "body": "Helvetica",
    "body_italic": "Helvetica-Oblique",
    "mono": "Courier",
    "mono_bold": "Courier-Bold",
}

PAGE_WIDTH, PAGE_HEIGHT = A4
MARGIN_LEFT = 60
MARGIN_RIGHT = 60
MARGIN_TOP = 72
MARGIN_BOTTOM = 72
CONTENT_WIDTH = PAGE_WIDTH - MARGIN_LEFT - MARGIN_RIGHT
FRAME_HEIGHT = PAGE_HEIGHT - MARGIN_TOP - MARGIN_BOTTOM


# ============================================================================
# PARAGRAPH STYLES
# ============================================================================

def get_styles():
    return {
        "h1": ParagraphStyle(
            "Heading1", fontName=FONTS["heading"], fontSize=24, leading=30,
            textColor=COLORS["primary_deeper"], spaceBefore=28, spaceAfter=14,
            keepWithNext=True,
        ),
        "h2": ParagraphStyle(
            "Heading2", fontName=FONTS["heading"], fontSize=18, leading=24,
            textColor=COLORS["primary_dark"], spaceBefore=22, spaceAfter=10,
            keepWithNext=True,
        ),
        "h3": ParagraphStyle(
            "Heading3", fontName=FONTS["heading"], fontSize=14, leading=18,
            textColor=COLORS["primary_dark"], spaceBefore=16, spaceAfter=8,
            keepWithNext=True,
        ),
        "body": ParagraphStyle(
            "Body", fontName=FONTS["body"], fontSize=10.5, leading=15,
            textColor=COLORS["text_primary"], spaceBefore=3, spaceAfter=7,
            alignment=TA_JUSTIFY,
        ),
        "body_bold": ParagraphStyle(
            "BodyBold", fontName=FONTS["heading"], fontSize=10.5, leading=15,
            textColor=COLORS["text_primary"], spaceBefore=3, spaceAfter=4,
        ),
        "bullet": ParagraphStyle(
            "Bullet", fontName=FONTS["body"], fontSize=10.5, leading=15,
            textColor=COLORS["text_primary"], leftIndent=20, bulletIndent=8,
            spaceBefore=2, spaceAfter=2,
        ),
        "bullet2": ParagraphStyle(
            "Bullet2", fontName=FONTS["body"], fontSize=10, leading=14,
            textColor=COLORS["text_secondary"], leftIndent=40, bulletIndent=28,
            spaceBefore=1, spaceAfter=1,
        ),
        "caption": ParagraphStyle(
            "Caption", fontName=FONTS["body_italic"], fontSize=9, leading=12,
            textColor=COLORS["text_secondary"], alignment=TA_CENTER,
            spaceBefore=4, spaceAfter=14,
        ),
        "code_block": ParagraphStyle(
            "CodeBlock", fontName=FONTS["mono"], fontSize=8, leading=11,
            textColor=COLORS["text_primary"], spaceBefore=4, spaceAfter=4,
            leftIndent=12, backColor=COLORS["code_bg"],
        ),
        "toc0": ParagraphStyle(
            "TOC0", fontName=FONTS["heading"], fontSize=13, leading=22,
            leftIndent=0, spaceBefore=10, textColor=COLORS["primary_deeper"],
        ),
        "toc1": ParagraphStyle(
            "TOC1", fontName=FONTS["body"], fontSize=11, leading=17,
            leftIndent=20, spaceBefore=3, textColor=COLORS["text_primary"],
        ),
        "toc2": ParagraphStyle(
            "TOC2", fontName=FONTS["body"], fontSize=10, leading=15,
            leftIndent=40, spaceBefore=2, textColor=COLORS["text_secondary"],
        ),
        "table_header": ParagraphStyle(
            "TableHeader", fontName=FONTS["heading"], fontSize=9.5, leading=13,
            textColor=COLORS["text_light"],
        ),
        "table_cell": ParagraphStyle(
            "TableCell", fontName=FONTS["body"], fontSize=9.5, leading=13,
            textColor=COLORS["text_primary"],
        ),
        "table_cell_mono": ParagraphStyle(
            "TableCellMono", fontName=FONTS["mono"], fontSize=8.5, leading=12,
            textColor=COLORS["text_primary"],
        ),
        "callout_body": ParagraphStyle(
            "CalloutBody", fontName=FONTS["body"], fontSize=10, leading=14,
            textColor=COLORS["text_primary"],
        ),
    }

STYLES = get_styles()


# ============================================================================
# CUSTOM FLOWABLES
# ============================================================================

class ChapterTitlePage(Flowable):
    """Chapter opener with large number and colored banner."""

    def __init__(self, number, title, subtitle=""):
        super().__init__()
        self.number = number
        self.title = title
        self.subtitle = subtitle

    def wrap(self, availWidth, availHeight):
        self.width = availWidth
        self.height = FRAME_HEIGHT
        return self.width, self.height

    def draw(self):
        c = self.canv
        h = self.height
        w = self.width

        banner_h = h * 0.40
        c.setFillColor(COLORS["chapter_bg"])
        c.rect(-MARGIN_LEFT, h - banner_h,
               w + MARGIN_LEFT + MARGIN_RIGHT, banner_h + MARGIN_TOP,
               fill=1, stroke=0)

        c.setFillColor(COLORS["primary"])
        c.setFont(FONTS["body"], 14)
        c.drawCentredString(w / 2, h - banner_h + banner_h * 0.72,
                            f"CHAPTER {self.number}")

        c.setFillColorRGB(1, 1, 1, 0.12)
        c.setFont(FONTS["heading"], 160)
        c.drawCentredString(w / 2, h - banner_h + banner_h * 0.18,
                            str(self.number))

        c.setFillColor(white)
        c.setFont(FONTS["heading"], 30)
        c.drawCentredString(w / 2, h - banner_h + banner_h * 0.38,
                            self.title)

        if self.subtitle:
            c.setFillColorRGB(1, 1, 1, 0.75)
            c.setFont(FONTS["body"], 13)
            c.drawCentredString(w / 2, h - banner_h + banner_h * 0.22,
                                self.subtitle)

        c.setStrokeColor(COLORS["primary"])
        c.setLineWidth(3)
        lw = 60
        c.line(w / 2 - lw / 2, h - banner_h - 20, w / 2 + lw / 2, h - banner_h - 20)


class CalloutBox(Flowable):
    """Styled callout box with colored left border."""

    CONFIGS = {
        "tip": {"bg": COLORS["tip_bg"], "border": COLORS["tip_border"], "label": "TIP"},
        "note": {"bg": COLORS["note_bg"], "border": COLORS["note_border"], "label": "NOTE"},
        "warning": {"bg": COLORS["warning_bg"], "border": COLORS["warning_border"], "label": "WARNING"},
    }

    def __init__(self, text, callout_type="tip"):
        super().__init__()
        self.text = text
        self.config = self.CONFIGS[callout_type]
        self._para = Paragraph(self.text, STYLES["callout_body"])

    def wrap(self, availWidth, availHeight):
        inner_w = availWidth - 26
        w, h = self._para.wrap(inner_w, availHeight)
        self.width = availWidth
        self.height = h + 32
        return self.width, self.height

    def draw(self):
        c = self.canv
        c.setFillColor(self.config["bg"])
        c.roundRect(0, 0, self.width, self.height, 4, fill=1, stroke=0)
        c.setFillColor(self.config["border"])
        c.rect(0, 0, 4, self.height, fill=1, stroke=0)
        c.setFont(FONTS["heading"], 8.5)
        c.setFillColor(self.config["border"])
        c.drawString(14, self.height - 15, self.config["label"])
        self._para.drawOn(c, 14, 6)


class HorizontalRule(Flowable):
    def __init__(self):
        super().__init__()

    def wrap(self, availWidth, availHeight):
        self.width = availWidth
        self.height = 12
        return self.width, self.height

    def draw(self):
        self.canv.setStrokeColor(COLORS["border"])
        self.canv.setLineWidth(0.5)
        self.canv.line(0, 6, self.width, 6)


class DiagramImage(Flowable):
    """Embed a PNG diagram with optional caption, scaled to fit content width."""

    def __init__(self, image_path, caption="", max_width=None):
        super().__init__()
        self.image_path = image_path
        self.caption_text = caption
        self.max_width = max_width or CONTENT_WIDTH
        self._img = None
        self._cap = None
        if caption:
            self._cap = Paragraph(caption, STYLES["caption"])

    def wrap(self, availWidth, availHeight):
        max_w = min(self.max_width, availWidth)
        if os.path.exists(self.image_path):
            self._img = Image(self.image_path)
            iw, ih = self._img.imageWidth, self._img.imageHeight
            if iw > max_w:
                scale = max_w / iw
                self._img.drawWidth = iw * scale
                self._img.drawHeight = ih * scale
            else:
                self._img.drawWidth = iw
                self._img.drawHeight = ih
            # Cap height to prevent overly tall images
            max_h = availHeight * 0.65
            if self._img.drawHeight > max_h:
                scale = max_h / self._img.drawHeight
                self._img.drawWidth *= scale
                self._img.drawHeight *= scale
            self._img.wrap(availWidth, availHeight)
            img_h = self._img.drawHeight
        else:
            img_h = 40  # placeholder height

        cap_h = 0
        if self._cap:
            _, cap_h = self._cap.wrap(availWidth, availHeight)
            cap_h += 6

        self.width = availWidth
        self.height = img_h + cap_h + 12
        return self.width, self.height

    def draw(self):
        c = self.canv
        cap_h = 0
        if self._cap:
            _, cap_h = self._cap.wrap(self.width, 9999)
            cap_h += 6
            self._cap.drawOn(c, 0, 0)

        if self._img and os.path.exists(self.image_path):
            x_offset = (self.width - self._img.drawWidth) / 2
            self._img.drawOn(c, x_offset, cap_h + 4)
        elif not os.path.exists(self.image_path):
            c.setFillColor(COLORS["code_bg"])
            c.roundRect(0, cap_h + 4, self.width, 30, 4, fill=1, stroke=0)
            c.setFont(FONTS["body_italic"], 9)
            c.setFillColor(COLORS["text_secondary"])
            c.drawCentredString(self.width / 2, cap_h + 16,
                                f"[Diagram not found: {os.path.basename(self.image_path)}]")


# ============================================================================
# TABLE HELPER
# ============================================================================

def styled_table(header, rows, col_widths=None):
    data = [[Paragraph(h, STYLES["table_header"]) for h in header]]
    for row in rows:
        data.append([Paragraph(str(c), STYLES["table_cell"]) for c in row])

    t = Table(data, colWidths=col_widths, repeatRows=1)
    cmds = [
        ("BACKGROUND", (0, 0), (-1, 0), COLORS["table_header_bg"]),
        ("TEXTCOLOR", (0, 0), (-1, 0), white),
        ("FONTNAME", (0, 0), (-1, 0), FONTS["heading"]),
        ("FONTSIZE", (0, 0), (-1, 0), 9.5),
        ("BOTTOMPADDING", (0, 0), (-1, 0), 8),
        ("TOPPADDING", (0, 0), (-1, 0), 8),
        ("FONTNAME", (0, 1), (-1, -1), FONTS["body"]),
        ("FONTSIZE", (0, 1), (-1, -1), 9.5),
        ("TOPPADDING", (0, 1), (-1, -1), 6),
        ("BOTTOMPADDING", (0, 1), (-1, -1), 6),
        ("LEFTPADDING", (0, 0), (-1, -1), 8),
        ("RIGHTPADDING", (0, 0), (-1, -1), 8),
        ("GRID", (0, 0), (-1, -1), 0.5, COLORS["border"]),
        ("LINEBELOW", (0, 0), (-1, 0), 1.5, COLORS["primary_dark"]),
        ("VALIGN", (0, 0), (-1, -1), "TOP"),
        ("ALIGN", (0, 0), (-1, -1), "LEFT"),
    ]
    for i in range(1, len(data)):
        if i % 2 == 0:
            cmds.append(("BACKGROUND", (0, i), (-1, i), COLORS["table_alt_row"]))
    t.setStyle(TableStyle(cmds))
    return t


def mono_table(header, rows, col_widths=None):
    """Table with monospace font in the first column (for paths/code)."""
    data = [[Paragraph(h, STYLES["table_header"]) for h in header]]
    for row in rows:
        cells = []
        for j, c in enumerate(row):
            style = STYLES["table_cell_mono"] if j == 0 else STYLES["table_cell"]
            cells.append(Paragraph(str(c), style))
        data.append(cells)

    t = Table(data, colWidths=col_widths, repeatRows=1)
    cmds = [
        ("BACKGROUND", (0, 0), (-1, 0), COLORS["table_header_bg"]),
        ("TEXTCOLOR", (0, 0), (-1, 0), white),
        ("FONTNAME", (0, 0), (-1, 0), FONTS["heading"]),
        ("FONTSIZE", (0, 0), (-1, 0), 9.5),
        ("BOTTOMPADDING", (0, 0), (-1, 0), 8),
        ("TOPPADDING", (0, 0), (-1, 0), 8),
        ("FONTNAME", (0, 1), (-1, -1), FONTS["body"]),
        ("FONTSIZE", (0, 1), (-1, -1), 9.5),
        ("TOPPADDING", (0, 1), (-1, -1), 6),
        ("BOTTOMPADDING", (0, 1), (-1, -1), 6),
        ("LEFTPADDING", (0, 0), (-1, -1), 8),
        ("RIGHTPADDING", (0, 0), (-1, -1), 8),
        ("GRID", (0, 0), (-1, -1), 0.5, COLORS["border"]),
        ("LINEBELOW", (0, 0), (-1, 0), 1.5, COLORS["primary_dark"]),
        ("VALIGN", (0, 0), (-1, -1), "TOP"),
        ("ALIGN", (0, 0), (-1, -1), "LEFT"),
    ]
    for i in range(1, len(data)):
        if i % 2 == 0:
            cmds.append(("BACKGROUND", (0, i), (-1, i), COLORS["table_alt_row"]))
    t.setStyle(TableStyle(cmds))
    return t


# ============================================================================
# BOOKMARKED HEADING
# ============================================================================

class BookmarkedHeading(Paragraph):
    """Paragraph that registers with TOC and creates PDF bookmarks."""

    def __init__(self, text, style, level=0, bookmark_name=None):
        self.bm_name = bookmark_name or text.replace(" ", "_").replace("/", "_")[:60]
        self.toc_level = level
        self.plain_text = text
        super().__init__(text, style)

    def draw(self):
        self.canv.bookmarkPage(self.bm_name)
        self.canv.addOutlineEntry(self.plain_text, self.bm_name, self.toc_level, 0)
        super().draw()


# ============================================================================
# DOCUMENT TEMPLATE
# ============================================================================

class ArchDocTemplate(BaseDocTemplate):

    def __init__(self, filename):
        super().__init__(
            filename, pagesize=A4,
            leftMargin=MARGIN_LEFT, rightMargin=MARGIN_RIGHT,
            topMargin=MARGIN_TOP, bottomMargin=MARGIN_BOTTOM,
            title="Semblance Technical Architecture",
            author="Semblance",
        )
        frame = Frame(
            MARGIN_LEFT, MARGIN_BOTTOM, CONTENT_WIDTH, FRAME_HEIGHT, id="main",
            leftPadding=0, rightPadding=0, topPadding=0, bottomPadding=0,
        )
        self.addPageTemplates([
            PageTemplate(id="cover", frames=[frame], onPage=self._draw_cover),
            PageTemplate(id="toc", frames=[frame], onPage=self._on_toc),
            PageTemplate(id="chapter", frames=[frame], onPage=lambda c, d: None),
            PageTemplate(id="content", frames=[frame], onPage=self._on_content),
        ])

    def afterFlowable(self, flowable):
        if isinstance(flowable, BookmarkedHeading):
            self.notify("TOCEntry", (
                flowable.toc_level, flowable.plain_text,
                self.page, flowable.bm_name,
            ))

    @staticmethod
    def _draw_cover(canvas, doc):
        c = canvas
        c.saveState()
        w, h = PAGE_WIDTH, PAGE_HEIGHT

        # Gradient background (deep wine to rose)
        num = 120
        strip = h / num
        for i in range(num):
            t = i / num
            r = (107 + (245 - 107) * t) / 255
            g = (45 + (238 - 45) * t) / 255
            b = (63 + (240 - 63) * t) / 255
            c.setFillColorRGB(r, g, b)
            c.rect(0, h - (i + 1) * strip, w, strip + 1, fill=1, stroke=0)

        # Decorative network of circles (representing architecture nodes)
        c.setStrokeColorRGB(1, 1, 1, 0.15)
        c.setLineWidth(1.5)
        circles = [
            (120, 640, 35), (420, 680, 30), (370, 560, 50),
            (100, 470, 22), (490, 510, 28), (250, 720, 20),
            (340, 440, 25), (200, 550, 40), (480, 620, 18),
        ]
        for cx, cy, r in circles:
            c.circle(cx, cy, r, stroke=1, fill=0)
        connections = [(0, 2), (1, 2), (0, 3), (2, 4), (5, 0), (6, 4), (7, 2), (8, 1), (7, 0)]
        for a, bi in connections:
            ax, ay, _ = circles[a]
            bx, by, _ = circles[bi]
            c.line(ax, ay, bx, by)

        # Title
        c.setFillColor(white)
        c.setFont(FONTS["heading"], 44)
        c.drawCentredString(w / 2, 440, "Semblance")

        # Subtitle
        c.setFont(FONTS["body"], 20)
        c.drawCentredString(w / 2, 400, "Technical Architecture Document")

        # Line
        c.setStrokeColorRGB(1, 1, 1, 0.6)
        c.setLineWidth(2)
        c.line(w / 2 - 80, 380, w / 2 + 80, 380)

        # Description
        c.setFont(FONTS["body"], 12)
        c.setFillColorRGB(1, 1, 1, 0.85)
        c.drawCentredString(w / 2, 355, "AI-Powered Synthetic Focus Group Research Platform")

        # Version
        c.setFont(FONTS["body"], 11)
        c.setFillColorRGB(1, 1, 1, 0.7)
        c.drawCentredString(w / 2, 140, "Version 1.0")
        c.drawCentredString(w / 2, 122, "February 2026")

        c.restoreState()

    @staticmethod
    def _on_toc(canvas, doc):
        canvas.saveState()
        canvas.setFont(FONTS["body"], 9)
        canvas.setFillColor(COLORS["text_secondary"])
        canvas.drawCentredString(PAGE_WIDTH / 2, 40, f"{doc.page}")
        canvas.restoreState()

    @staticmethod
    def _on_content(canvas, doc):
        canvas.saveState()
        # Header line
        canvas.setStrokeColor(COLORS["primary"])
        canvas.setLineWidth(0.5)
        canvas.line(MARGIN_LEFT, PAGE_HEIGHT - 50, PAGE_WIDTH - MARGIN_RIGHT, PAGE_HEIGHT - 50)
        canvas.setFont(FONTS["body"], 8)
        canvas.setFillColor(COLORS["text_secondary"])
        canvas.drawString(MARGIN_LEFT, PAGE_HEIGHT - 44, "Semblance \u2014 Technical Architecture")
        canvas.drawRightString(PAGE_WIDTH - MARGIN_RIGHT, PAGE_HEIGHT - 44, "v1.0")
        # Footer
        canvas.line(MARGIN_LEFT, 55, PAGE_WIDTH - MARGIN_RIGHT, 55)
        canvas.setFont(FONTS["body"], 9)
        canvas.drawCentredString(PAGE_WIDTH / 2, 40, f"{doc.page}")
        canvas.restoreState()


# ============================================================================
# SHORTHAND HELPERS
# ============================================================================

S = STYLES

def h1(text, bm=None):
    return BookmarkedHeading(text, S["h1"], level=0, bookmark_name=bm)

def h2(text, bm=None):
    return BookmarkedHeading(text, S["h2"], level=1, bookmark_name=bm)

def h3(text, bm=None):
    return BookmarkedHeading(text, S["h3"], level=2, bookmark_name=bm)

def p(text):
    return Paragraph(text, S["body"])

def pb(text):
    return Paragraph(text, S["body_bold"])

def bullet(text):
    return Paragraph(f"\u2022  {text}", S["bullet"])

def bullet2(text):
    return Paragraph(f"\u2013  {text}", S["bullet2"])

def tip(text):
    return CalloutBox(text, "tip")

def note(text):
    return CalloutBox(text, "note")

def warning(text):
    return CalloutBox(text, "warning")

def sp(pts=8):
    return Spacer(1, pts)


# ============================================================================
# MERMAID DIAGRAM DEFINITIONS
# ============================================================================

DIAGRAMS = {
    "system_architecture": """graph TB
    subgraph Client["Browser"]
        SPA["React SPA<br/>TypeScript + Vite"]
        SIO_C["Socket.IO Client"]
    end

    subgraph Backend["Quart Backend (ASGI)"]
        API["REST API<br/>7 Route Groups"]
        SIO_S["Socket.IO Server"]
        Services["Service Layer<br/>19 Services"]
        Prompts["Prompt Templates<br/>20 Markdown Files"]
    end

    subgraph External["External Services"]
        Gemini["Google Gemini<br/>gemini-3-pro-preview"]
        OpenAI["OpenAI<br/>GPT-4.1 / GPT-5.2"]
        Azure["Azure AD<br/>Microsoft MSAL"]
    end

    MongoDB[("MongoDB<br/>4 Collections")]

    SPA -->|"REST API"| API
    SIO_C <-->|"WebSocket"| SIO_S
    API --> Services
    SIO_S --> Services
    Services --> Prompts
    Services -->|"LLM Calls"| Gemini
    Services -->|"LLM Calls"| OpenAI
    Services -->|"CRUD"| MongoDB
    SPA -->|"OAuth"| Azure

    classDef client fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
    classDef backend fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
    classDef external fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
    classDef db fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B

    class SPA,SIO_C client
    class API,SIO_S,Services,Prompts backend
    class Gemini,OpenAI,Azure external
    class MongoDB db
""",

    "deployment_architecture": """graph TB
    User["User Browser"]

    subgraph Production["Production Server (ai-sandbox.oliver.solutions)"]
        Nginx["Nginx<br/>Reverse Proxy"]

        subgraph Static["Static Assets"]
            Vite["Vite Build<br/>/semblance/"]
        end

        subgraph App["Application Server"]
            Hypercorn["Hypercorn ASGI<br/>Port 5137"]
            Quart["Quart App"]
            SocketIO["python-socketio<br/>AsyncServer"]
        end

        Mongo[("MongoDB")]
    end

    subgraph APIs["Cloud APIs"]
        GeminiAPI["Google Gemini API"]
        OpenAIAPI["OpenAI API"]
        AzureAPI["Azure AD / MSAL"]
    end

    User -->|"HTTPS"| Nginx
    Nginx -->|"/semblance/*"| Vite
    Nginx -->|"/semblance_back/*"| Hypercorn
    Hypercorn --> Quart
    Hypercorn --> SocketIO
    Quart --> Mongo
    Quart -->|"API Keys"| GeminiAPI
    Quart -->|"API Keys"| OpenAIAPI
    User -->|"OAuth Popup"| AzureAPI

    classDef user fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
    classDef infra fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
    classDef app fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
    classDef db fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
    classDef cloud fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D

    class User user
    class Nginx,Vite infra
    class Hypercorn,Quart,SocketIO app
    class Mongo db
    class GeminiAPI,OpenAIAPI,AzureAPI cloud
""",

    "frontend_components": """graph TD
    App["<b>App.tsx</b>"]

    Providers["<b>Providers</b><br/>QueryClient, BrowserRouter,<br/>MSAL, Auth, WebSocket, Navigation"]

    subgraph Pages["Page Routes"]
        SU["SyntheticUsers"]
        FGS["FocusGroupSession"]
        FG["FocusGroups"]
        Dash["Dashboard"]
    end

    subgraph Session["Session Panels (FocusGroupSession)"]
        direction LR
        DP["Discussion"]
        Parts["Participants"]
        Themes["Themes"]
        Analytics["Analytics"]
        Notes["Notes"]
    end

    subgraph PersonaUI["Persona Components (SyntheticUsers)"]
        direction LR
        AIR["AI Recruiter"]
        UC["Manual Creator"]
        PE["Persona Editor"]
    end

    App --> Providers --> Pages
    FGS --> Session
    SU --> PersonaUI

    classDef appNode fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
    classDef provider fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
    classDef route fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
    classDef panel fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
    classDef persona fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F

    class App appNode
    class Providers provider
    class SU,FGS,FG,Dash route
    class DP,Parts,Themes,Analytics,Notes panel
    class AIR,UC,PE persona
""",

    "backend_services": """graph TD
    Routes["<b>API Routes</b><br/>auth, personas, ai-personas,<br/>focus-groups, focus-group-ai,<br/>folders, tasks"]

    subgraph Core["Core Services"]
        direction LR
        LLM["<b>LLMService</b><br/>Gemini + OpenAI"]
        WSM["<b>WebSocketManager</b><br/>Room messaging"]
        TM["<b>TaskManager</b><br/>Cancellable tasks"]
    end

    subgraph AI["AI / Conversation Services (5)"]
        direction LR
        AIR["<b>AIRunner</b><br/>Dedicated thread"]
        ACC["<b>Autonomous</b><br/><b>Controller</b>"]
        CDS["<b>Decision +</b><br/><b>Context Services</b>"]
    end

    subgraph Domain["Domain Services (11)"]
        direction LR
        PS["<b>Persona Services</b><br/>Generation, Export,<br/>Modification"]
        FGS["<b>Focus Group Services</b><br/>Responses, Themes,<br/>Moderator, Summary"]
    end

    subgraph External["External Systems"]
        direction LR
        Gemini["Google Gemini"]
        OAI["OpenAI"]
        DB[("MongoDB")]
    end

    Routes --> Core
    Routes --> AI
    Routes --> Domain
    AI --> LLM
    AI --> WSM
    Domain --> LLM
    Domain --> WSM
    AIR --> ACC --> CDS
    LLM --> Gemini
    LLM --> OAI
    Core --> DB

    classDef routes fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
    classDef core fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
    classDef ai fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
    classDef domain fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
    classDef ext fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D

    class Routes routes
    class LLM,WSM,TM core
    class AIR,ACC,CDS ai
    class PS,FGS domain
    class Gemini,OAI,DB ext
""",

    "entity_relationship": """erDiagram
    USER {
        ObjectId _id PK
        string username UK
        string email UK
        string password_hash
        string role
        string auth_type
        string microsoft_id
    }

    PERSONA {
        ObjectId _id PK
        string name
        string age
        string gender
        string occupation
        string location
        string personality
        number techSavviness
        object oceanTraits
        array goals
        array frustrations
        array motivations
        string aiSynthesizedBio
        array folder_ids FK
        ObjectId created_by FK
        datetime created_at
    }

    FOCUS_GROUP {
        ObjectId _id PK
        string name
        string description
        string status
        string llm_model
        string reasoning_effort
        array participants
        array messages
        array themes
        object discussion_guide
        ObjectId created_by FK
        datetime created_at
    }

    FOLDER {
        ObjectId _id PK
        string name
        ObjectId parent_folder_id FK
        number level
        ObjectId created_by FK
        datetime created_at
    }

    USER ||--o{ PERSONA : "creates"
    USER ||--o{ FOCUS_GROUP : "manages"
    USER ||--o{ FOLDER : "creates"
    PERSONA }o--o{ FOCUS_GROUP : "participates in"
    PERSONA }o--o{ FOLDER : "belongs to"
    FOLDER ||--o{ FOLDER : "contains"
""",

    "auth_flow": """sequenceDiagram
    autonumber
    participant U as User Browser
    participant R as React SPA
    participant A as Quart /api/auth
    participant M as Azure AD
    participant DB as MongoDB

    Note over U,DB: Local JWT Authentication
    U->>R: Enter credentials
    R->>A: POST /auth/login
    A->>DB: Find user by username
    DB-->>A: User document
    A->>A: Verify bcrypt hash
    A->>A: Generate JWT (HS256, 24h)
    A-->>R: {token, user}
    R->>R: Store in localStorage

    Note over U,DB: Microsoft OAuth (MSAL)
    U->>R: Click "Sign in with Microsoft"
    R->>M: MSAL popup login
    M-->>R: idToken
    R->>A: POST /auth/microsoft {idToken}
    A->>A: Validate MSAL token
    A->>DB: Find/create user
    A->>A: Generate JWT (HS256, 24h)
    A-->>R: {token, user}
    R->>R: Store in localStorage

    Note over U,DB: WebSocket Authentication
    R->>A: Socket.IO connect {auth: token}
    A->>A: Validate JWT
    A-->>R: connected event
""",

    "websocket_flow": """sequenceDiagram
    autonumber
    participant C as React Client
    participant WS as WebSocket Service
    participant SIO as Socket.IO Server
    participant Room as Focus Group Room
    participant AI as AI Runner Thread
    participant DB as MongoDB

    C->>WS: joinFocusGroup(groupId)
    WS->>SIO: emit("join_focus_group")
    SIO->>Room: Add session to room
    SIO-->>WS: "joined_focus_group"
    WS->>C: window.dispatchEvent("ws:joined")

    Note over C,DB: Manual Mode Message
    C->>SIO: POST /generate-response
    SIO->>AI: Generate persona response
    AI->>DB: Load persona + context
    AI->>AI: LLM generate response
    AI->>DB: Save message
    AI->>Room: emit("message_update")
    Room-->>WS: "message_update"
    WS->>C: window.dispatchEvent("ws:message_update")

    Note over C,DB: Autonomous Mode
    C->>SIO: POST /start-autonomous-mode
    SIO->>AI: Start conversation loop
    loop Every 3-10 seconds
        AI->>AI: Decision engine
        AI->>DB: Save message
        AI->>Room: emit("message_update")
        Room-->>WS: "message_update"
        WS->>C: window.dispatchEvent
        AI->>Room: emit("ai_status_update")
    end
""",

    "llm_pipeline": """flowchart TD
    Request["LLM Request<br/>(prompt, model, params)"]

    Decision{{"Model Type?"}}

    subgraph Gemini["Google Gemini Path"]
        GClient["Create Gemini Client"]
        GConfig["GenerateContentConfig<br/>temperature, max_tokens"]
        GCall["genai.generate_content()"]
    end

    subgraph OpenAI4["OpenAI GPT-4.1 Path"]
        O4Client["Create AsyncOpenAI Client"]
        O4Call["chat.completions.create()<br/>model=gpt-4.1"]
    end

    subgraph OpenAI5["OpenAI GPT-5.2 Path"]
        O5Client["Create AsyncOpenAI Client"]
        O5Call["responses.create()<br/>reasoning=effort<br/>verbosity=level"]
    end

    Retry{{"Success?"}}
    Parse["Parse Response<br/>Extract JSON if needed"]
    Return["Return Result"]
    RetryLogic["Retry with Backoff<br/>1s, 2s, 4s (max 3)"]

    Request --> Decision
    Decision -->|"gemini-*"| GClient --> GConfig --> GCall --> Retry
    Decision -->|"gpt-4.1"| O4Client --> O4Call --> Retry
    Decision -->|"gpt-5.2"| O5Client --> O5Call --> Retry
    Retry -->|"Yes"| Parse --> Return
    Retry -->|"No"| RetryLogic --> Decision

    classDef req fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
    classDef gemini fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
    classDef oai fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
    classDef oai5 fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
    classDef logic fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F

    class Request req
    class GClient,GConfig,GCall gemini
    class O4Client,O4Call oai
    class O5Client,O5Call oai5
    class Decision,Retry,RetryLogic logic
    class Parse,Return req
""",

    "persona_generation": """flowchart LR
    Brief["Audience Brief<br/>+ Research Objective"]
    Enhance["AI Brief<br/>Enhancement"]
    Stage1["Stage 1:<br/>Generate Basic<br/>Profiles"]
    Review["User Reviews<br/>Basic Profiles"]
    Stage2["Stage 2:<br/>Generate Detailed<br/>Personas"]
    Save["Save to<br/>MongoDB"]
    Library["View in<br/>Persona Library"]

    Brief --> Enhance --> Stage1 --> Review --> Stage2 --> Save --> Library

    subgraph Stage1Detail["Stage 1 Output"]
        S1["Name, Age, Gender<br/>Occupation, Location<br/>Personality Summary<br/>Interests, Tech Savviness"]
    end

    subgraph Stage2Detail["Stage 2 Output"]
        S2["OCEAN Traits (0-100)<br/>Goals, Frustrations<br/>Motivations, Scenarios<br/>Think-Feel-Do<br/>AI Synthesized Bio"]
    end

    Stage1 -.-> S1
    Stage2 -.-> S2

    classDef input fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
    classDef process fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
    classDef review fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
    classDef output fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
    classDef detail fill:#FAFAFA,stroke:#E8D9DE,stroke-width:1px,color:#2D1F24

    class Brief,Enhance input
    class Stage1,Stage2 process
    class Review review
    class Save,Library output
    class S1,S2 detail
""",

    "focus_group_states": """stateDiagram-v2
    [*] --> new : Create Focus Group

    new --> setup : Configure Settings
    setup --> setup : Edit Guide / Add Participants
    setup --> manual_mode : Start Manual Session
    setup --> ai_mode : Start Autonomous Mode

    manual_mode --> ai_mode : Switch to Autonomous
    ai_mode --> manual_mode : Switch to Manual

    manual_mode --> completed : End Session
    ai_mode --> completed : End Session / Guide Complete

    ai_mode --> error : AI Error / Timeout

    completed --> [*]
    error --> manual_mode : Resume Manually
    error --> [*]
""",

    "autonomous_conversation": """sequenceDiagram
    autonumber
    participant Controller as Autonomous Controller
    participant LLM as LLM + Decision Services
    participant DB as MongoDB
    participant WS as WebSocket

    Note over Controller,WS: One Conversation Loop Iteration

    Controller->>DB: Load context (messages + participants)
    DB-->>Controller: Conversation state

    Controller->>LLM: decide_next_action(context)
    LLM-->>Controller: {action, participant, reasoning}

    alt respond / moderate / probe
        Controller->>LLM: Generate message for action
        LLM-->>Controller: Message text
        Controller->>DB: Save message
        Controller->>WS: emit message_update
    else end_session
        Controller->>DB: status = completed
        Controller->>WS: emit ai_status_update
    end

    Controller->>Controller: Jitter wait (3-10s)
    Controller->>Controller: Safety checks (action + silence limits)

    Note over Controller: Loop until end_session or limit reached
""",
}


# ============================================================================
# MERMAID RENDERING
# ============================================================================

def render_mermaid_diagrams(output_dir):
    """Render all Mermaid diagrams as PNGs using mmdc CLI."""
    os.makedirs(output_dir, exist_ok=True)

    # Custom Mermaid config — light backgrounds with dark text for PDF readability
    config = {
        "theme": "base",
        "themeVariables": {
            "primaryColor": "#F5EEF0",
            "primaryTextColor": "#2D1F24",
            "primaryBorderColor": "#9B4D63",
            "lineColor": "#6B2D3F",
            "secondaryColor": "#EFF6FF",
            "secondaryTextColor": "#2D1F24",
            "secondaryBorderColor": "#3B82F6",
            "tertiaryColor": "#F0FDF4",
            "tertiaryTextColor": "#2D1F24",
            "tertiaryBorderColor": "#22C55E",
            "noteBkgColor": "#FFF7ED",
            "noteTextColor": "#2D1F24",
            "noteBorderColor": "#F59E0B",
            "actorBkg": "#F5EEF0",
            "actorBorder": "#9B4D63",
            "actorTextColor": "#2D1F24",
            "signalColor": "#6B2D3F",
            "signalTextColor": "#2D1F24",
            "activationBkgColor": "#F5EEF0",
            "activationBorderColor": "#9B4D63",
            "sequenceNumberColor": "#FFFFFF",
            "labelBoxBkgColor": "#F5EEF0",
            "labelBoxBorderColor": "#9B4D63",
            "labelTextColor": "#2D1F24",
            "loopTextColor": "#6B2D3F",
            "noteBkgColor": "#FFF7ED",
            "noteTextColor": "#2D1F24",
            "noteBorderColor": "#D97706",
            "fontSize": "16px",
            "fontFamily": "Helvetica, Arial, sans-serif",
        }
    }

    config_path = os.path.join(output_dir, "mermaid-config.json")
    with open(config_path, "w") as f:
        json.dump(config, f)

    rendered = {}
    for diagram_id, source in DIAGRAMS.items():
        input_path = os.path.join(output_dir, f"{diagram_id}.mmd")
        output_path = os.path.join(output_dir, f"{diagram_id}.png")

        with open(input_path, "w") as f:
            f.write(source)

        cmd = [
            "npx", "-y", "@mermaid-js/mermaid-cli", "mmdc",
            "-i", input_path,
            "-o", output_path,
            "-c", config_path,
            "-w", "2400",
            "-s", "3",
            "-b", "transparent",
        ]

        print(f"    Rendering {diagram_id}...")
        try:
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
            if os.path.exists(output_path):
                rendered[diagram_id] = output_path
            else:
                print(f"    WARNING: {diagram_id} not rendered: {result.stderr[:200]}")
        except subprocess.TimeoutExpired:
            print(f"    WARNING: {diagram_id} timed out")
        except Exception as exc:
            print(f"    WARNING: {diagram_id} failed: {exc}")

    return rendered


# ============================================================================
# DIAGRAM HELPER
# ============================================================================

def diagram(rendered, diagram_id, caption):
    """Return flowable list for a rendered Mermaid diagram."""
    path = rendered.get(diagram_id, "")
    return [sp(6), DiagramImage(path, caption), sp(4)]


# ============================================================================
# CHAPTERS
# ============================================================================

def build_chapter_1(rendered):
    """Chapter 1: Executive Overview"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(1, "Executive Overview", "System Purpose and Capabilities"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("Executive Overview", "ch1"))

    e.append(h2("Purpose", "ch1_purpose"))
    e.append(p(
        "Semblance is an AI-powered synthetic focus group research platform that enables researchers, "
        "product teams, and UX professionals to create detailed synthetic personas using large language "
        "models, organize them into focus groups, and conduct moderated or fully autonomous research "
        "sessions \u2014 all without recruiting real participants."
    ))
    e.append(p(
        "The platform supports multi-model AI integration (Google Gemini, OpenAI GPT-4.1 and GPT-5.2), "
        "real-time WebSocket communication for live session collaboration, and comprehensive analysis "
        "tools including sentiment analysis, theme extraction, and participation analytics."
    ))

    e.append(h2("Key Capabilities", "ch1_capabilities"))
    for b in [
        "<b>AI Persona Generation</b> \u2014 Two-stage pipeline: basic demographic profiles then full personality expansion (OCEAN traits, goals, frustrations, motivations, scenarios, AI-synthesized biography).",
        "<b>Focus Group Simulation</b> \u2014 Manual moderation or fully autonomous AI-driven conversations with real-time WebSocket updates.",
        "<b>Multi-Model LLM Support</b> \u2014 Unified service abstracting Google Gemini and OpenAI models with retry logic and model-specific parameter handling.",
        "<b>Real-Time Collaboration</b> \u2014 Socket.IO room-based messaging for live session observation with event-driven UI updates.",
        "<b>Comprehensive Analysis</b> \u2014 AI-powered theme extraction, sentiment analysis, participation balance scoring, and exportable reports.",
        "<b>Enterprise Authentication</b> \u2014 Dual auth: local JWT credentials and Microsoft Entra ID (MSAL) OAuth.",
    ]:
        e.append(bullet(b))
    e.append(sp())

    e.append(h2("Technology Summary", "ch1_tech"))
    e.append(styled_table(
        ["Layer", "Technologies"],
        [
            ["Frontend", "React 18, TypeScript, Vite, Tailwind CSS, shadcn-ui (Radix UI), React Router, TanStack Query, Socket.IO Client"],
            ["Backend", "Python, Quart (async Flask), Hypercorn ASGI, python-socketio, PyMongo"],
            ["Database", "MongoDB (4 collections: users, personas, focus_groups, folders)"],
            ["AI / LLM", "Google Gemini (gemini-3-pro-preview), OpenAI GPT-4.1, OpenAI GPT-5.2"],
            ["Authentication", "Custom JWT (HS256, 24h expiry), Microsoft MSAL (Entra ID)"],
            ["Real-Time", "Socket.IO (WebSocket with polling fallback)"],
        ],
        col_widths=[100, CONTENT_WIDTH - 108],
    ))
    e.append(sp())

    e.append(h2("System Architecture Overview", "ch1_arch"))
    e.append(p(
        "Semblance follows a three-tier architecture: a React single-page application communicates "
        "with a Python Quart backend through REST APIs and WebSocket connections. The backend "
        "orchestrates multiple LLM providers and persists all data in MongoDB."
    ))
    e.extend(diagram(rendered, "system_architecture",
                      "Figure 1.1 \u2014 High-Level System Architecture"))

    return e


def build_chapter_2(rendered):
    """Chapter 2: System Architecture"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(2, "System Architecture", "Deployment and Infrastructure"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("System Architecture", "ch2"))

    e.append(h2("Three-Tier Architecture", "ch2_tiers"))
    e.append(p(
        "The application is organized into three distinct tiers, each independently deployable:"
    ))
    e.append(bullet(
        "<b>Presentation Tier</b> \u2014 React SPA built with Vite, served as static assets. "
        "Handles all UI rendering, client-side routing, and WebSocket event dispatching."
    ))
    e.append(bullet(
        "<b>Application Tier</b> \u2014 Quart (async Flask) application running under Hypercorn ASGI server. "
        "Hosts the REST API (7 blueprint groups), Socket.IO server, 19 business logic services, and "
        "a dedicated AI runner thread for autonomous conversations."
    ))
    e.append(bullet(
        "<b>Data Tier</b> \u2014 MongoDB document database storing users, personas, focus groups, and folders. "
        "Accessed via PyMongo (sync) in route handlers and Motor (async) in the AI runner thread."
    ))
    e.append(sp())

    e.append(h2("Deployment Topology", "ch2_deploy"))
    e.append(p(
        "In production, the application is deployed at <b>ai-sandbox.oliver.solutions</b> behind an "
        "Nginx reverse proxy that routes requests to either the static frontend assets or the backend "
        "application server."
    ))
    e.extend(diagram(rendered, "deployment_architecture",
                      "Figure 2.1 \u2014 Production Deployment Architecture"))

    e.append(h2("Environment Configuration", "ch2_env"))
    e.append(p(
        "The application supports dual environments through Vite environment files. "
        "Configuration switches base paths, API URLs, WebSocket paths, and authentication redirects."
    ))
    e.append(styled_table(
        ["Setting", "Development", "Production"],
        [
            ["Base Path", "/", "/semblance/"],
            ["API Base URL", "/api", "https://ai-sandbox.oliver.solutions/semblance_back/api"],
            ["WebSocket Path", "/socket.io/", "/semblance_back/socket.io/"],
            ["Frontend Port", "5173 (Vite dev server)", "Static assets via Nginx"],
            ["Backend Port", "5137 (Hypercorn)", "5137 (proxied via Nginx)"],
            ["MSAL Redirect", "http://localhost:5173/", "https://ai-sandbox.oliver.solutions/semblance"],
        ],
        col_widths=[110, (CONTENT_WIDTH - 118) / 2, (CONTENT_WIDTH - 118) / 2],
    ))
    e.append(sp())

    e.append(h2("Application Factory Pattern", "ch2_factory"))
    e.append(p(
        "The backend uses an application factory pattern (<b>create_app()</b> in app/__init__.py) "
        "that initializes the Quart app, configures CORS, registers 7 route blueprints, sets up "
        "JWT authentication, initializes the WebSocket manager, and starts the AI runner service. "
        "Key configuration:"
    ))
    e.append(bullet("<b>JWT Secret</b> \u2014 From JWT_SECRET_KEY environment variable"))
    e.append(bullet("<b>Token Expiry</b> \u2014 86,400 seconds (24 hours)"))
    e.append(bullet("<b>Max Upload</b> \u2014 16 MB"))
    e.append(bullet("<b>Request Timeout</b> \u2014 300 seconds (5 minutes)"))
    e.append(bullet("<b>CORS</b> \u2014 allow_origin=\"*\" for all methods"))
    e.append(sp())

    return e


def build_chapter_3(rendered):
    """Chapter 3: Frontend Architecture"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(3, "Frontend Architecture", "React SPA Structure and Patterns"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("Frontend Architecture", "ch3"))

    e.append(h2("Technology Stack", "ch3_stack"))
    e.append(p(
        "The frontend is a React 18 single-page application built with TypeScript and Vite. "
        "UI components use shadcn-ui (Radix UI primitives) styled with Tailwind CSS. "
        "Data fetching uses TanStack Query with form handling via React Hook Form and Zod validation."
    ))
    e.append(sp())

    e.append(h2("Provider Hierarchy", "ch3_providers"))
    e.append(p(
        "App.tsx wraps the entire application in a nested provider hierarchy. Each provider "
        "adds a layer of functionality accessible throughout the component tree:"
    ))
    e.append(styled_table(
        ["Provider", "Purpose", "Key State"],
        [
            ["QueryClientProvider", "TanStack Query data fetching and caching", "Query cache, stale-while-revalidate"],
            ["BrowserRouter", "Client-side routing with dynamic base path", "Route location, navigation"],
            ["MsalProvider", "Microsoft Azure AD authentication", "MSAL instance, account info"],
            ["AuthProvider", "JWT token management and session persistence", "user, token, isAuthenticated"],
            ["WebSocketProvider", "Singleton Socket.IO connection management", "socketId, connection state"],
            ["NavigationProvider", "Navigation state and focus group context", "previousRoute, focusGroupId, folderId"],
        ],
        col_widths=[110, (CONTENT_WIDTH - 200) * 0.55, (CONTENT_WIDTH - 200) * 0.45],
    ))
    e.append(sp())
    e.extend(diagram(rendered, "frontend_components",
                      "Figure 3.1 \u2014 Frontend Component Hierarchy"))

    e.append(h2("Route Structure", "ch3_routes"))
    e.append(styled_table(
        ["Path", "Component", "Auth", "Description"],
        [
            ["/", "Index", "No", "Landing page with platform overview"],
            ["/login", "Login", "No", "Authentication (local + Microsoft OAuth)"],
            ["/synthetic-users", "SyntheticUsers", "Yes", "Persona library and management"],
            ["/synthetic-users/:id", "PersonaProfile", "Yes", "Individual persona detail view"],
            ["/focus-groups", "FocusGroups", "Yes", "Focus group listing and creation"],
            ["/focus-groups/:id", "FocusGroupSession", "Yes", "Live session interface (multi-panel)"],
            ["/dashboard", "Dashboard", "Yes", "Analytics and research metrics"],
        ],
        col_widths=[115, 100, 30, CONTENT_WIDTH - 253],
    ))
    e.append(sp())

    e.append(h2("State Management Strategy", "ch3_state"))
    e.append(p(
        "The application uses a layered state management approach rather than a single global store:"
    ))
    e.append(bullet(
        "<b>Global Persisted State</b> \u2014 AuthContext (JWT + user in localStorage), "
        "NavigationContext (route history in localStorage), TanStack Query cache (server data)."
    ))
    e.append(bullet(
        "<b>Component State</b> \u2014 React hooks for UI state (tabs, modals, filters), "
        "React Hook Form for form state, temporary editing data."
    ))
    e.append(bullet(
        "<b>WebSocket State</b> \u2014 Connection status, real-time updates dispatched as "
        "window CustomEvents (ws:message_update, ws:ai_status_update, etc.)."
    ))
    e.append(sp())

    e.append(h2("Component Organization", "ch3_components"))
    e.append(styled_table(
        ["Directory", "Contents"],
        [
            ["src/components/ui/", "Reusable shadcn-ui components (Button, Card, Dialog, Tabs, etc.) plus custom components (ProgressModal, MentionInput, SaveStatusIndicator)"],
            ["src/components/focus-group-session/", "25+ components for the session interface: DiscussionPanel, ParticipantPanel, ThemesPanel, AnalyticsPanel, ReasoningPanel, NotesPanel, AutonomousDashboard"],
            ["src/components/persona/", "Persona profile viewing and editing: PersonaProfile, PersonaEditor, PersonaPersonality, PersonaAttitudinalProfile, PersonaScenarios"],
            ["src/components/dashboard/", "Dashboard analytics: StatCard, OverviewTab, UsersTab, FocusGroupsTab"],
            ["src/components/auth/", "Authentication: MsalProvider (Azure AD setup)"],
            ["src/hooks/", "Custom hooks: useWebSocket, useCancellableGeneration, usePersonaFiltering, useFocusGroupAutoSave, useFolderManagement"],
            ["src/services/", "WebSocket singleton service with event dispatching via window CustomEvents"],
            ["src/types/", "TypeScript type definitions: Persona (70+ fields), CancellableTask, NavigationState"],
        ],
        col_widths=[160, CONTENT_WIDTH - 168],
    ))
    e.append(sp())

    return e


def build_chapter_4(rendered):
    """Chapter 4: Backend Architecture"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(4, "Backend Architecture", "Services, Routes, and Prompt Templates"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("Backend Architecture", "ch4"))

    e.append(h2("ASGI Application Stack", "ch4_stack"))
    e.append(p(
        "The backend is a Quart application (async Flask) running under Hypercorn, an ASGI server. "
        "The ASGI stack layers python-socketio on top of the Quart app, enabling both HTTP and "
        "WebSocket communication through a single server process on port 5137."
    ))
    e.append(p(
        "A dedicated AI runner thread with its own asyncio event loop handles autonomous conversation "
        "execution, isolated from the main request-handling event loop. This avoids Motor (async MongoDB) "
        "event loop affinity issues and prevents long-running AI operations from blocking HTTP requests."
    ))
    e.append(sp())

    e.append(h2("Service Layer", "ch4_services"))
    e.append(p(
        "Business logic is organized into 19 service modules, each responsible for a specific domain. "
        "Services are stateless (except the AI Runner singleton) and communicate through function calls, "
        "the LLM service, and the WebSocket manager."
    ))
    e.extend(diagram(rendered, "backend_services",
                      "Figure 4.1 \u2014 Backend Service Architecture"))

    e.append(h3("Core Services", "ch4_core"))
    e.append(mono_table(
        ["Service", "File", "Purpose"],
        [
            ["LLMService", "llm_service.py", "Multi-model abstraction (Gemini, GPT-4.1, GPT-5.2) with retry logic and JSON parsing"],
            ["WebSocketManager", "websocket_manager_async.py", "Room-based messaging, event emission, connection tracking"],
            ["TaskManager", "task_manager.py", "CancellableTask wrapper for long-running operations with per-user tracking"],
            ["PromptLoader", "utils/prompt_loader.py", "Loads and interpolates 20 markdown prompt templates"],
        ],
        col_widths=[110, 120, CONTENT_WIDTH - 238],
    ))
    e.append(sp())

    e.append(h3("AI / Conversation Services", "ch4_ai"))
    e.append(mono_table(
        ["Service", "File", "Purpose"],
        [
            ["AIRunnerService", "ai_runner_service.py", "Singleton: dedicated thread + event loop for autonomous conversations"],
            ["AutonomousConversation Controller", "autonomous_conversation_controller.py", "State machine orchestrating multi-persona conversation flow"],
            ["ConversationDecision Service", "conversation_decision_service.py", "LLM-driven decision engine: next speaker, action type, probing"],
            ["ConversationContext Service", "conversation_context_service.py", "Aggregates messages, participants, and state for LLM context"],
            ["ConversationState Manager", "conversation_state_manager.py", "Tracks participation metrics, sentiment, energy levels"],
        ],
        col_widths=[110, 140, CONTENT_WIDTH - 258],
    ))
    e.append(sp())

    e.append(h3("Persona Services", "ch4_persona"))
    e.append(mono_table(
        ["Service", "File", "Purpose"],
        [
            ["AIPersonaService", "ai_persona_service.py", "Two-stage persona generation with customer data integration"],
            ["PersonaModification Service", "persona_modification_service.py", "AI-assisted persona editing"],
            ["PersonaExportService", "persona_export_service.py", "Individual persona profile export"],
            ["BulkExportService", "bulk_persona_export_service.py", "Batch persona export (MD/JSON/CSV)"],
            ["CustomerDataService", "customer_data_service.py", "Upload and integrate research data into generation"],
        ],
        col_widths=[110, 145, CONTENT_WIDTH - 263],
    ))
    e.append(sp())

    e.append(h3("Focus Group Services", "ch4_fg"))
    e.append(mono_table(
        ["Service", "File", "Purpose"],
        [
            ["FocusGroupService", "focus_group_service.py", "CRUD operations and discussion guide generation"],
            ["FocusGroupResponse Service", "focus_group_response_service.py", "Generate persona responses with personality-driven prompts"],
            ["KeyThemeService", "key_theme_service.py", "AI-powered theme extraction from conversation messages"],
            ["AIModeratorService", "ai_moderator_service.py", "AI moderator intervention and discussion guidance"],
            ["FocusGroupSummary Service", "focus_group_summary_service.py", "Comprehensive session summary generation"],
            ["ImageDescription Service", "image_description_service.py", "Multimodal image description for uploaded assets"],
        ],
        col_widths=[110, 145, CONTENT_WIDTH - 263],
    ))
    e.append(sp())

    e.append(h2("Prompt Template System", "ch4_prompts"))
    e.append(p(
        "The backend uses 20 markdown prompt templates stored in <b>/backend/prompts/</b>. "
        "The PromptLoader utility loads these files and interpolates context variables (persona data, "
        "conversation history, discussion guide) before sending them to the LLM service."
    ))
    e.append(styled_table(
        ["Template", "Used By"],
        [
            ["persona-basic-generation.md", "Stage 1 persona generation (demographics)"],
            ["persona-detailed-generation.md", "Stage 2 persona expansion (full profile)"],
            ["persona-system.md", "System prompt for persona-as-character responses"],
            ["focus-group-response.md", "In-session persona response generation"],
            ["conversation-decision-engine.md", "Autonomous mode: next action decision"],
            ["conversation-participant-selection.md", "Autonomous mode: speaker selection"],
            ["ai-moderator-system.md", "AI moderator system prompt"],
            ["probe-generation-prompt.md", "Probing question generation"],
            ["key-theme-extraction.md", "Theme extraction from conversation"],
            ["discussion-guide-generation.md", "Structured discussion guide creation"],
            ["focus-group-summary-generation.md", "Post-session summary generation"],
            ["audience-brief-enhancement.md", "Research brief AI enhancement"],
            ["image-description.md", "Multimodal image analysis"],
            ["persona-interaction-prompt.md", "Persona-to-persona interaction"],
            ["persona-to-persona-response.md", "Inter-persona conversation"],
            ["persona-modification.md", "AI-assisted persona editing"],
            ["persona-summary-generation.md", "Persona summary for display"],
            ["persona-download-summary.md", "Export summary format"],
            ["persona-profile-export.md", "Full profile export format"],
            ["key-theme-system.md", "Theme extraction system prompt"],
        ],
        col_widths=[175, CONTENT_WIDTH - 183],
    ))
    e.append(sp())

    return e


def build_chapter_5(rendered):
    """Chapter 5: Data Model"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(5, "Data Model", "MongoDB Collections and Relationships"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("Data Model", "ch5"))

    e.append(h2("Collections Overview", "ch5_overview"))
    e.append(p(
        "Semblance stores data in MongoDB across four primary collections. Documents use MongoDB "
        "ObjectId references for relationships rather than foreign key constraints."
    ))
    e.extend(diagram(rendered, "entity_relationship",
                      "Figure 5.1 \u2014 Entity Relationship Diagram"))

    e.append(h2("User Collection", "ch5_user"))
    e.append(p(
        "Stores authentication credentials and profile information. Supports dual auth types."
    ))
    e.append(styled_table(
        ["Field", "Type", "Description"],
        [
            ["_id", "ObjectId", "Primary key"],
            ["username", "String (unique)", "Login identifier"],
            ["email", "String (unique)", "Email address"],
            ["password_hash", "String", "bcrypt-hashed password"],
            ["role", "String", "User role (default: \"user\")"],
            ["auth_type", "String", "\"local\" or \"microsoft\""],
            ["microsoft_id", "String", "Azure AD object ID (optional)"],
        ],
        col_widths=[100, 100, CONTENT_WIDTH - 208],
    ))
    e.append(sp())

    e.append(h2("Persona Collection", "ch5_persona"))
    e.append(p(
        "The richest data model with 70+ fields covering demographics, OCEAN personality traits, "
        "behavioral attributes, motivations, scenarios, and AI-generated content."
    ))
    e.append(h3("Core Fields", "ch5_persona_core"))
    e.append(styled_table(
        ["Field", "Type", "Description"],
        [
            ["_id", "ObjectId", "Primary key"],
            ["name", "String", "Persona display name"],
            ["age, gender, occupation", "String", "Demographics"],
            ["location, education", "String", "Geographic and educational background"],
            ["personality", "String", "Personality summary text"],
            ["techSavviness", "Number", "Technology comfort level (0\u2013100)"],
            ["created_by", "ObjectId", "Reference to User who created this persona"],
            ["folder_ids", "Array[ObjectId]", "Folders this persona belongs to (many-to-many)"],
        ],
        col_widths=[115, 100, CONTENT_WIDTH - 223],
    ))
    e.append(sp())

    e.append(h3("Personality & Psychographic Fields", "ch5_persona_ocean"))
    e.append(styled_table(
        ["Field", "Type", "Description"],
        [
            ["oceanTraits", "Object", "OCEAN scores (0\u2013100): openness, conscientiousness, extraversion, agreeableness, neuroticism"],
            ["thinkFeelDo", "Object", "Arrays of thinks, feels, does statements"],
            ["goals", "Array[String]", "Life and professional goals"],
            ["frustrations", "Array[String]", "Pain points and frustrations"],
            ["motivations", "Array[String]", "Driving motivations"],
            ["selfDeterminationNeeds", "Object", "Autonomy, competence, relatedness assessments"],
            ["scenarios", "Array[String]", "Behavioral scenario descriptions"],
            ["aiSynthesizedBio", "String", "AI-generated narrative biography (2\u20133 lines)"],
        ],
        col_widths=[125, 85, CONTENT_WIDTH - 218],
    ))
    e.append(sp())

    e.append(h2("Focus Group Collection", "ch5_fg"))
    e.append(p(
        "Stores session configuration, participant references, conversation messages, themes, and "
        "discussion guide. Messages and themes are embedded documents within the focus group."
    ))
    e.append(styled_table(
        ["Field", "Type", "Description"],
        [
            ["_id", "ObjectId", "Primary key"],
            ["name, description", "String", "Session title and research topic"],
            ["status", "String", "\"new\", \"manual_mode\", \"ai_mode\", or \"completed\""],
            ["llm_model", "String", "Selected LLM model (default: gemini-3-pro-preview)"],
            ["reasoning_effort", "String", "GPT-5.2 reasoning level (minimal/low/medium/high)"],
            ["participants", "Array[Object]", "Participant objects with persona_id references"],
            ["messages", "Array[Object]", "Conversation messages (sender, text, timestamp, sentiment)"],
            ["themes", "Array[Object]", "Extracted themes with supporting quotes"],
            ["discussion_guide", "Object", "Structured guide with sections and items"],
            ["created_by", "ObjectId", "Reference to User"],
            ["autonomous_started_at", "DateTime", "Timestamp of autonomous mode start"],
        ],
        col_widths=[120, 90, CONTENT_WIDTH - 218],
    ))
    e.append(sp())

    e.append(h2("Folder Collection", "ch5_folder"))
    e.append(p(
        "Hierarchical folder structure for organizing personas. Supports two-level nesting."
    ))
    e.append(styled_table(
        ["Field", "Type", "Description"],
        [
            ["_id", "ObjectId", "Primary key"],
            ["name", "String", "Folder display name"],
            ["parent_folder_id", "ObjectId", "Parent folder reference (null for root)"],
            ["level", "Number", "Depth level (0 = root, 1 = child, max depth: 2)"],
            ["created_by", "ObjectId", "Reference to User"],
        ],
        col_widths=[115, 80, CONTENT_WIDTH - 203],
    ))
    e.append(sp())

    e.append(h2("Relationships", "ch5_rels"))
    e.append(bullet("<b>User \u2192 Persona</b> (1:N) \u2014 created_by field on Persona"))
    e.append(bullet("<b>User \u2192 Focus Group</b> (1:N) \u2014 created_by field on Focus Group"))
    e.append(bullet("<b>User \u2192 Folder</b> (1:N) \u2014 created_by field on Folder"))
    e.append(bullet("<b>Persona \u2194 Focus Group</b> (M:N) \u2014 participants array in Focus Group references persona IDs"))
    e.append(bullet("<b>Persona \u2194 Folder</b> (M:N) \u2014 folder_ids array on Persona references folder IDs"))
    e.append(bullet("<b>Folder \u2192 Folder</b> (1:N) \u2014 parent_folder_id for hierarchy"))
    e.append(sp())

    return e


def build_chapter_6(rendered):
    """Chapter 6: Authentication & Authorization"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(6, "Authentication", "JWT, Microsoft OAuth, and WebSocket Auth"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("Authentication & Authorization", "ch6"))

    e.append(h2("Dual Authentication System", "ch6_dual"))
    e.append(p(
        "Semblance supports two authentication methods that produce identical JWT tokens, "
        "allowing the rest of the application to be auth-method-agnostic:"
    ))
    e.append(bullet(
        "<b>Local JWT</b> \u2014 Username/password authentication with bcrypt password hashing. "
        "Tokens are HS256-signed with a 24-hour expiry."
    ))
    e.append(bullet(
        "<b>Microsoft OAuth (MSAL)</b> \u2014 Azure AD authentication via MSAL popup flow. "
        "The frontend obtains an idToken from Azure, sends it to the backend for validation, "
        "and receives a Semblance JWT in return."
    ))
    e.append(sp())

    e.extend(diagram(rendered, "auth_flow",
                      "Figure 6.1 \u2014 Authentication Flow (Local JWT and Microsoft OAuth)"))

    e.append(h2("JWT Token Lifecycle", "ch6_jwt"))
    e.append(styled_table(
        ["Property", "Value"],
        [
            ["Algorithm", "HS256"],
            ["Expiration", "24 hours"],
            ["Claims", "sub (user_id as string)"],
            ["Storage", "localStorage (auth_token key)"],
            ["Header Format", "Authorization: Bearer {token}"],
            ["Validation", "Checked on every API request via interceptor; validated server-side via @jwt_required decorator"],
        ],
        col_widths=[120, CONTENT_WIDTH - 128],
    ))
    e.append(sp())

    e.append(h2("Frontend Token Management", "ch6_frontend"))
    e.append(p(
        "The Axios API client includes request and response interceptors for automatic token management:"
    ))
    e.append(bullet("<b>Request interceptor</b> \u2014 Extracts JWT from localStorage, validates expiration by decoding the payload, attaches as Bearer token header."))
    e.append(bullet("<b>Response interceptor</b> \u2014 Catches 401 responses, dispatches auth_error_event, clears localStorage, redirects to /login."))
    e.append(bullet("<b>Session restoration</b> \u2014 On app mount, checks for existing token in localStorage, validates via GET /auth/me, restores session if valid."))
    e.append(sp())

    e.append(h2("WebSocket Authentication", "ch6_ws"))
    e.append(p(
        "Socket.IO connections authenticate via JWT token passed in the auth parameter during "
        "the connection handshake. The server validates the token before accepting the connection:"
    ))
    e.append(bullet("Client sends: <b>auth: { token: jwt_token }</b> in Socket.IO connection options"))
    e.append(bullet("Server extracts and validates the JWT on the connect event"))
    e.append(bullet("On success: emits <b>connected</b> event with session info"))
    e.append(bullet("On failure: emits <b>auth_error</b> event and disconnects"))
    e.append(sp())

    e.append(h2("Route Protection", "ch6_protection"))
    e.append(p(
        "Backend routes are protected with the <b>@jwt_required()</b> decorator (custom Quart-compatible "
        "implementation using PyJWT directly, not Flask-JWT-Extended). The decorator validates the "
        "Authorization header and makes the user identity available via <b>get_jwt_identity()</b>."
    ))
    e.append(p(
        "Frontend routes use the <b>ProtectedRoute</b> wrapper component that checks AuthContext "
        "isAuthenticated state and redirects unauthenticated users to /login."
    ))
    e.append(sp())

    return e


def build_chapter_7(rendered):
    """Chapter 7: Real-Time Communication"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(7, "Real-Time Communication", "WebSocket Architecture and Events"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("Real-Time Communication", "ch7"))

    e.append(h2("Socket.IO Architecture", "ch7_arch"))
    e.append(p(
        "The application uses Socket.IO for bidirectional real-time communication between the "
        "React frontend and Quart backend. The backend uses python-socketio's AsyncServer "
        "(native ASGI compatibility), while the frontend uses the socket.io-client library "
        "wrapped in a singleton service."
    ))
    e.append(sp())

    e.append(h2("Room-Based Messaging", "ch7_rooms"))
    e.append(p(
        "Each focus group session has a dedicated Socket.IO room. When a user opens a session, "
        "the client emits a <b>join_focus_group</b> event, and the server adds the connection to "
        "the room. All subsequent events (messages, status updates, theme discoveries) are broadcast "
        "to the room, enabling multiple observers to watch a live session."
    ))
    e.append(sp())

    e.extend(diagram(rendered, "websocket_flow",
                      "Figure 7.1 \u2014 WebSocket Communication Flow"))

    e.append(h2("Frontend Event Dispatching", "ch7_dispatch"))
    e.append(p(
        "The frontend uses a hybrid approach: the WebSocket singleton service binds specific listeners "
        "for known events, then re-dispatches all events as window CustomEvents with a <b>ws:</b> prefix. "
        "This decouples React components from the WebSocket implementation:"
    ))
    e.append(note(
        "Pattern: Socket.IO event \"message_update\" \u2192 window.dispatchEvent(new CustomEvent(\"ws:message_update\", {detail: payload})). "
        "Components listen via window.addEventListener without needing direct socket references."
    ))
    e.append(sp())

    e.append(h2("WebSocket Event Catalog", "ch7_events"))
    e.append(h3("Client \u2192 Server", "ch7_c2s"))
    e.append(styled_table(
        ["Event", "Payload", "Purpose"],
        [
            ["connect", "auth: {token}", "Authenticate WebSocket connection"],
            ["join_focus_group", "{focus_group_id}", "Join session room"],
            ["leave_focus_group", "{focus_group_id}", "Leave session room"],
            ["cancel_task", "{task_id}", "Cancel running AI task"],
        ],
        col_widths=[120, 120, CONTENT_WIDTH - 248],
    ))
    e.append(sp())

    e.append(h3("Server \u2192 Client", "ch7_s2c"))
    e.append(styled_table(
        ["Event", "Purpose"],
        [
            ["connected", "Connection success confirmation with session info"],
            ["auth_error", "Authentication failure notification"],
            ["joined_focus_group", "Room join confirmation"],
            ["message_update", "New message in conversation (includes sender, text, timestamp)"],
            ["ai_status_update", "AI mode status change (running/paused/completed/error)"],
            ["moderator_status_update", "Moderator action notification"],
            ["theme_update", "Key theme discovered or updated"],
            ["focus_group_update", "Focus group properties changed"],
            ["mode_event_update", "Session mode switch (manual \u2194 autonomous)"],
            ["analytics_update", "Conversation analytics data"],
            ["conversation_state_update", "Conversation state change"],
            ["task_started / task_completed", "Long-running task lifecycle events"],
            ["task_cancelled / task_failed", "Task termination events"],
            ["bulk_export_progress", "Export operation progress percentage"],
        ],
        col_widths=[155, CONTENT_WIDTH - 163],
    ))
    e.append(sp())

    e.append(h2("Reconnection Strategy", "ch7_reconnect"))
    e.append(p(
        "The Socket.IO client is configured with automatic reconnection. On reconnect, the service "
        "rebinds all event listeners (to survive reconnection cycles) and auto-rejoins the previous "
        "focus group room. Token refresh is attempted before reconnection to handle expired sessions."
    ))
    e.append(styled_table(
        ["Setting", "Value"],
        [
            ["Transport", "WebSocket only (no polling fallback)"],
            ["Reconnection", "Enabled (automatic)"],
            ["Connection Timeout", "60 seconds"],
            ["Ping Interval", "45 seconds"],
            ["Ping Timeout", "120 seconds"],
        ],
        col_widths=[140, CONTENT_WIDTH - 148],
    ))
    e.append(sp())

    return e


def build_chapter_8(rendered):
    """Chapter 8: AI/LLM Integration"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(8, "AI/LLM Integration", "Multi-Model Service and Task Management"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("AI/LLM Integration", "ch8"))

    e.append(h2("Multi-Model LLM Service", "ch8_llm"))
    e.append(p(
        "The <b>LLMService</b> provides a unified interface for all AI operations, abstracting away "
        "provider differences between Google Gemini and OpenAI. Each call creates a fresh client "
        "instance to avoid event loop affinity issues in the async ASGI environment."
    ))
    e.append(styled_table(
        ["Model", "Provider", "API", "Special Parameters"],
        [
            ["gemini-3-pro-preview", "Google", "genai.Client.generate_content()", "temperature, max_tokens"],
            ["gpt-4.1", "OpenAI", "chat.completions.create()", "temperature, max_tokens"],
            ["gpt-5.2", "OpenAI", "responses.create()", "reasoning_effort (minimal/low/medium/high), verbosity (low/medium/high)"],
        ],
        col_widths=[100, 55, 135, CONTENT_WIDTH - 298],
    ))
    e.append(sp())
    e.extend(diagram(rendered, "llm_pipeline",
                      "Figure 8.1 \u2014 LLM Request Pipeline"))

    e.append(h2("Retry and Error Handling", "ch8_retry"))
    e.append(p(
        "All LLM calls are wrapped in a retry mechanism with exponential backoff. On failure, "
        "the service retries up to 3 times with delays of 1s, 2s, and 4s. The service also "
        "handles JSON parsing of responses, stripping markdown code blocks when needed."
    ))
    e.append(sp())

    e.append(h2("AI Runner Service", "ch8_runner"))
    e.append(p(
        "The <b>AIRunnerService</b> is a singleton that manages a dedicated background thread with "
        "its own asyncio event loop. This isolation ensures:"
    ))
    e.append(bullet("Autonomous conversations don't block HTTP request handling"))
    e.append(bullet("Motor (async MongoDB driver) runs on a consistent event loop"))
    e.append(bullet("Thread-safe task registry enables concurrent conversation management"))
    e.append(sp())
    e.append(p("Lifecycle:"))
    e.append(bullet("<b>init_ai_runner()</b> \u2014 Called on app startup, creates dedicated thread and event loop"))
    e.append(bullet("<b>submit_conversation()</b> \u2014 Schedules conversation coroutine on the AI event loop"))
    e.append(bullet("<b>stop_conversation()</b> \u2014 Cancels a specific conversation by focus group ID"))
    e.append(bullet("<b>Shutdown</b> \u2014 Graceful cleanup: cancels all running tasks, stops event loop, joins thread"))
    e.append(sp())

    e.append(h2("Task Management", "ch8_tasks"))
    e.append(p(
        "Long-running operations (persona generation, theme extraction, summary generation) are wrapped "
        "in <b>CancellableTask</b> objects tracked by the TaskManager. Each task has a unique ID, "
        "is associated with a user, and can be cancelled via the REST API or WebSocket."
    ))
    e.append(p(
        "On the frontend, the <b>useCancellableGeneration</b> hook manages task lifecycle state "
        "and listens for WebSocket task events (task_started, task_completed, task_failed, task_cancelled)."
    ))
    e.append(sp())

    return e


def build_chapter_9(rendered):
    """Chapter 9: Core Feature Flows"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(9, "Core Feature Flows", "Personas, Focus Groups, and Autonomous AI"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("Core Feature Flows", "ch9"))

    # --- Persona Generation Pipeline ---
    e.append(h2("Persona Generation Pipeline", "ch9_persona"))
    e.append(p(
        "Persona generation uses a two-stage pipeline that progressively builds detail. "
        "The first stage generates basic demographic profiles from an audience brief, allowing "
        "the user to review before the second stage expands each into a fully detailed persona."
    ))
    e.extend(diagram(rendered, "persona_generation",
                      "Figure 9.1 \u2014 Two-Stage Persona Generation Pipeline"))
    e.append(p(
        "<b>Stage 1 \u2014 Basic Profiles:</b> The LLM receives the audience brief (optionally AI-enhanced) "
        "and generates N basic profiles with name, age, gender, occupation, location, personality summary, "
        "interests, and tech savviness. Optional customer data files are included for grounding."
    ))
    e.append(p(
        "<b>Stage 2 \u2014 Detailed Personas:</b> Each basic profile is expanded individually. The LLM adds "
        "OCEAN personality traits (scored 0\u2013100), goals, frustrations, motivations, behavioral scenarios, "
        "Think-Feel-Do mappings, self-determination needs, and an AI-synthesized biography. Each completed "
        "persona is saved to MongoDB immediately."
    ))
    e.append(sp())

    # --- Focus Group Lifecycle ---
    e.append(h2("Focus Group Session Lifecycle", "ch9_lifecycle"))
    e.append(p(
        "Focus groups progress through a state machine with four primary states. Users can switch "
        "between manual and autonomous modes during a live session."
    ))
    e.extend(diagram(rendered, "focus_group_states",
                      "Figure 9.2 \u2014 Focus Group State Machine"))
    e.append(styled_table(
        ["State", "Description", "Transitions"],
        [
            ["new", "Freshly created, no session started", "Configure \u2192 setup"],
            ["setup", "Configuring guide, participants, settings", "Start Manual or Start Autonomous"],
            ["manual_mode", "User-controlled moderation", "Switch to AI, End Session"],
            ["ai_mode", "Autonomous AI-driven conversation", "Switch to Manual, End Session, Guide Complete"],
            ["completed", "Session finished", "Terminal state"],
            ["error", "AI error or timeout", "Resume Manually, Terminal"],
        ],
        col_widths=[85, (CONTENT_WIDTH - 93) * 0.5, (CONTENT_WIDTH - 93) * 0.5],
    ))
    e.append(sp())

    # --- Autonomous Conversation System ---
    e.append(h2("Autonomous Conversation System", "ch9_auto"))
    e.append(p(
        "The autonomous conversation controller orchestrates multi-persona discussions without human "
        "intervention. It runs in a dedicated thread (via AIRunnerService) and uses the LLM-powered "
        "decision engine to determine each action."
    ))
    e.extend(diagram(rendered, "autonomous_conversation",
                      "Figure 9.3 \u2014 Autonomous Conversation Loop (One Iteration)"))

    e.append(h3("Decision Engine Actions", "ch9_decisions"))
    e.append(styled_table(
        ["Action", "Description"],
        [
            ["participant_respond", "Selected persona provides a contextual response"],
            ["moderator_speak", "AI moderator advances discussion or redirects"],
            ["participant_interaction", "Two personas engage in direct dialogue"],
            ["probe_trigger", "Probing question to deepen exploration"],
            ["end_session", "Conclude the conversation (guide complete or limits reached)"],
        ],
        col_widths=[130, CONTENT_WIDTH - 138],
    ))
    e.append(sp())

    e.append(h3("Safety Limits", "ch9_safety"))
    e.append(styled_table(
        ["Limit", "Value", "Purpose"],
        [
            ["Max Actions", "500", "Prevents runaway conversations"],
            ["Max Consecutive Silence", "3", "Ends session if no meaningful responses"],
            ["Response Timeout", "30 seconds", "Prevents hanging on unresponsive LLM"],
            ["Inter-Action Delay", "3\u201310 seconds (random)", "Simulates natural conversation pace"],
            ["Dominance Threshold", "40%", "Flags when a participant dominates discussion"],
            ["Reasoning History", "Last 20 decisions", "Prevents repetitive decision patterns"],
        ],
        col_widths=[120, 80, CONTENT_WIDTH - 208],
    ))
    e.append(sp())

    return e


def build_chapter_10(rendered):
    """Chapter 10: API Reference"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(10, "API Reference", "REST Endpoint Catalog"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("API Reference", "ch10"))
    e.append(p(
        "The backend exposes 7 route groups via Flask/Quart blueprints. All endpoints except "
        "authentication routes require a valid JWT token in the Authorization header."
    ))

    # --- Auth ---
    e.append(h2("/api/auth", "ch10_auth"))
    e.append(styled_table(
        ["Method", "Path", "Auth", "Purpose"],
        [
            ["POST", "/register", "No", "Register new user account"],
            ["POST", "/login", "No", "Login with username/password, returns JWT"],
            ["POST", "/microsoft", "No", "Microsoft MSAL authentication, returns JWT"],
            ["GET", "/me", "Yes", "Validate token and return user profile"],
        ],
        col_widths=[45, 100, 30, CONTENT_WIDTH - 183],
    ))
    e.append(sp())

    # --- Personas ---
    e.append(h2("/api/personas", "ch10_personas"))
    e.append(styled_table(
        ["Method", "Path", "Auth", "Purpose"],
        [
            ["GET", "/", "Yes", "Get current user's personas"],
            ["GET", "/all", "Yes", "Get all personas"],
            ["GET", "/:id", "Yes", "Get specific persona by ID"],
            ["POST", "/", "Yes", "Create new persona"],
            ["POST", "/batch", "Yes", "Create multiple personas"],
            ["PUT", "/:id", "Yes", "Update persona"],
            ["DELETE", "/:id", "Yes", "Delete persona"],
            ["POST", "/:id/export-profile", "Yes", "Export persona as formatted profile"],
            ["POST", "/bulk-export", "Yes", "Export multiple personas (MD/JSON/CSV)"],
            ["POST", "/:id/modify-with-ai", "Yes", "AI-assisted persona modification"],
        ],
        col_widths=[45, 130, 30, CONTENT_WIDTH - 213],
    ))
    e.append(sp())

    # --- AI Personas ---
    e.append(h2("/api/ai-personas", "ch10_ai"))
    e.append(styled_table(
        ["Method", "Path", "Auth", "Purpose"],
        [
            ["POST", "/generate-basic-profiles", "Yes", "Stage 1: Generate basic demographic profiles"],
            ["POST", "/generate-personas", "Yes", "Stage 2: Expand profiles to full personas"],
            ["POST", "/enhance-audience-brief", "Yes", "AI-enhance the audience brief"],
            ["POST", "/upload-customer-data", "Yes", "Upload research documents for grounding"],
            ["DELETE", "/cleanup-customer-data/:id", "Yes", "Clean up uploaded customer data"],
            ["POST", "/batch-generate-summaries", "Yes", "Generate summaries for multiple personas"],
        ],
        col_widths=[50, 160, 30, CONTENT_WIDTH - 248],
    ))
    e.append(sp())

    # --- Focus Groups ---
    e.append(h2("/api/focus-groups", "ch10_fg"))
    e.append(styled_table(
        ["Method", "Path", "Auth", "Purpose"],
        [
            ["GET", "/", "Yes", "Get user's focus groups"],
            ["GET", "/:id", "Yes", "Get specific focus group"],
            ["POST", "/", "Yes", "Create focus group"],
            ["PUT", "/:id", "Yes", "Update focus group"],
            ["DELETE", "/:id", "Yes", "Delete focus group"],
            ["POST", "/:id/participants", "Yes", "Add participant to group"],
            ["DELETE", "/:id/participants/:pid", "Yes", "Remove participant"],
            ["POST", "/:id/messages", "Yes", "Send message in session"],
            ["GET", "/:id/messages", "Yes", "Get session messages"],
            ["POST", "/:id/assets", "Yes", "Upload creative assets"],
            ["POST", "/:id/generate-discussion-guide", "Yes", "Generate AI discussion guide"],
            ["POST", "/:id/notes", "Yes", "Create session note"],
        ],
        col_widths=[50, 170, 30, CONTENT_WIDTH - 258],
    ))
    e.append(sp())

    # --- Focus Group AI ---
    e.append(h2("/api/focus-group-ai", "ch10_fgai"))
    e.append(styled_table(
        ["Method", "Path", "Auth", "Purpose"],
        [
            ["POST", "/generate-response", "Yes", "Generate persona response in session"],
            ["POST", "/generate-key-themes", "Yes", "Extract themes from conversation"],
            ["POST", "/autonomous/start/:id", "Yes", "Start autonomous conversation"],
            ["POST", "/autonomous/stop/:id", "Yes", "Stop autonomous conversation"],
            ["GET", "/autonomous/status/:id", "Yes", "Get autonomous mode status"],
            ["POST", "/moderator/advance/:id", "Yes", "Advance moderator to next topic"],
            ["POST", "/moderator/end-session/:id", "Yes", "End session via moderator"],
            ["GET", "/conversation/state/:id", "Yes", "Get conversation state"],
            ["GET", "/conversation/analytics/:id", "Yes", "Get conversation analytics"],
            ["POST", "/conversation/intervene/:id", "Yes", "Manual intervention in autonomous mode"],
        ],
        col_widths=[45, 155, 30, CONTENT_WIDTH - 238],
    ))
    e.append(sp())

    # --- Folders ---
    e.append(h2("/api/folders", "ch10_folders"))
    e.append(styled_table(
        ["Method", "Path", "Auth", "Purpose"],
        [
            ["GET", "/", "Yes", "Get folder hierarchy tree"],
            ["GET", "/:id", "Yes", "Get specific folder"],
            ["POST", "/", "Yes", "Create folder"],
            ["PUT", "/:id", "Yes", "Update folder"],
            ["DELETE", "/:id", "Yes", "Delete folder"],
            ["POST", "/:id/personas", "Yes", "Add persona to folder"],
            ["DELETE", "/:id/personas/:pid", "Yes", "Remove persona from folder"],
            ["POST", "/:id/personas/batch", "Yes", "Add multiple personas to folder"],
        ],
        col_widths=[50, 135, 30, CONTENT_WIDTH - 223],
    ))
    e.append(sp())

    # --- Tasks ---
    e.append(h2("/api/tasks", "ch10_tasks"))
    e.append(styled_table(
        ["Method", "Path", "Auth", "Purpose"],
        [
            ["DELETE", "/:task_id", "Yes", "Cancel a running task"],
            ["GET", "/user/:user_id", "Yes", "Get user's active tasks"],
        ],
        col_widths=[50, 120, 30, CONTENT_WIDTH - 208],
    ))
    e.append(sp())

    return e


def build_chapter_11(rendered):
    """Chapter 11: Data Flow"""
    e = []
    e.append(NextPageTemplate("chapter"))
    e.append(PageBreak())
    e.append(ChapterTitlePage(11, "Data Flow", "End-to-End Request and Conversation Patterns"))
    e.append(NextPageTemplate("content"))
    e.append(PageBreak())

    e.append(h1("Data Flow", "ch11"))

    e.append(h2("End-to-End Request Flow", "ch11_e2e"))
    e.append(p(
        "A typical user interaction follows this path through the system:"
    ))
    e.append(bullet("<b>1. User Action</b> \u2014 Click, form submit, or navigation event in the React SPA"))
    e.append(bullet("<b>2. API Request</b> \u2014 Axios sends REST request with JWT Bearer token via the API client"))
    e.append(bullet("<b>3. Route Handler</b> \u2014 Quart blueprint validates JWT, extracts user identity, calls service layer"))
    e.append(bullet("<b>4. Service Processing</b> \u2014 Business logic executes: LLM calls, database operations, prompt templating"))
    e.append(bullet("<b>5. Data Persistence</b> \u2014 MongoDB operations via PyMongo (or Motor in AI thread)"))
    e.append(bullet("<b>6. WebSocket Broadcast</b> \u2014 For real-time operations, events are emitted to the focus group room"))
    e.append(bullet("<b>7. Response</b> \u2014 JSON response returned to frontend; WebSocket events dispatched as window events"))
    e.append(bullet("<b>8. UI Update</b> \u2014 React components re-render via TanStack Query cache invalidation or WebSocket event handlers"))
    e.append(sp())

    e.append(h2("AI Conversation Data Flow", "ch11_ai"))
    e.append(p(
        "During an autonomous conversation, data flows through a specialized pipeline:"
    ))
    e.append(bullet("<b>1. User starts autonomous mode</b> \u2014 POST /autonomous/start/:id"))
    e.append(bullet("<b>2. AI Runner</b> \u2014 Submits conversation coroutine to dedicated thread"))
    e.append(bullet("<b>3. Conversation Loop</b> \u2014 Controller runs continuously until end condition"))
    e.append(bullet("<b>4. Context Assembly</b> \u2014 ConversationContextService loads messages, participants, guide from MongoDB"))
    e.append(bullet("<b>5. Decision</b> \u2014 ConversationDecisionService sends context + prompt to LLM, gets structured action"))
    e.append(bullet("<b>6. Execution</b> \u2014 Controller executes action (generate response, moderate, probe)"))
    e.append(bullet("<b>7. Persistence</b> \u2014 Message saved to MongoDB focus group document"))
    e.append(bullet("<b>8. Broadcast</b> \u2014 WebSocketManager emits message_update and status events to room"))
    e.append(bullet("<b>9. Frontend</b> \u2014 Window events trigger React component updates in real time"))
    e.append(bullet("<b>10. Loop</b> \u2014 3\u201310 second delay, then repeat from step 4"))
    e.append(sp())

    e.append(h2("Key Architectural Patterns", "ch11_patterns"))

    e.append(h3("Singleton WebSocket Service", "ch11_ws"))
    e.append(p(
        "The frontend creates a single Socket.IO instance at module level, shared across all components. "
        "This prevents multiple socket connections and ensures consistent event routing. Events are "
        "re-dispatched as window CustomEvents, decoupling React from the WebSocket implementation."
    ))

    e.append(h3("Dedicated AI Thread", "ch11_thread"))
    e.append(p(
        "Autonomous conversations run in a dedicated Python thread with its own asyncio event loop. "
        "This prevents long-running AI operations from blocking HTTP request handling and provides "
        "a stable event loop for Motor (async MongoDB driver)."
    ))

    e.append(h3("Two-Pass Document Rendering", "ch11_twopass"))
    e.append(p(
        "Focus group sessions use an event-driven architecture where database writes and WebSocket "
        "broadcasts happen atomically. The WebSocket manager maintains room membership, ensuring "
        "only connected observers receive updates."
    ))

    e.append(h3("Prompt Template Engine", "ch11_prompts"))
    e.append(p(
        "All LLM prompts are externalized as markdown files in /backend/prompts/. The PromptLoader "
        "reads templates at runtime and interpolates context variables. This separation enables "
        "prompt iteration without code changes."
    ))
    e.append(sp())

    e.append(note(
        "This document was auto-generated from the Semblance codebase. For the most current details, "
        "consult the source code directly. Service files are in /backend/app/services/, route files "
        "in /backend/app/routes/, and frontend components in /src/components/."
    ))

    return e


# ============================================================================
# MAIN BUILD
# ============================================================================

def build_architecture_doc(output_path):
    print("=" * 60)
    print("Building Semblance Technical Architecture Document")
    print("=" * 60)

    # Create temp directory for Mermaid diagrams
    diagram_dir = tempfile.mkdtemp(prefix="semblance_diagrams_")
    print(f"\n  Diagram directory: {diagram_dir}")

    # Step 1: Render Mermaid diagrams
    print("\n  Rendering Mermaid diagrams...")
    rendered = render_mermaid_diagrams(diagram_dir)
    print(f"  Rendered {len(rendered)}/{len(DIAGRAMS)} diagrams\n")

    # Step 2: Build document
    doc = ArchDocTemplate(output_path)
    story = []

    # Cover page
    print("  Cover page...")
    story.append(NextPageTemplate("cover"))
    story.append(Spacer(1, 1))

    # Table of Contents
    print("  Table of Contents...")
    story.append(NextPageTemplate("toc"))
    story.append(PageBreak())
    story.append(BookmarkedHeading("Table of Contents", STYLES["h1"], level=0, bookmark_name="toc"))
    story.append(Spacer(1, 10))
    toc = TableOfContents()
    toc.levelStyles = [STYLES["toc0"], STYLES["toc1"], STYLES["toc2"]]
    story.append(toc)

    # Chapters
    chapters = [
        ("Chapter 1: Executive Overview", build_chapter_1),
        ("Chapter 2: System Architecture", build_chapter_2),
        ("Chapter 3: Frontend Architecture", build_chapter_3),
        ("Chapter 4: Backend Architecture", build_chapter_4),
        ("Chapter 5: Data Model", build_chapter_5),
        ("Chapter 6: Authentication & Authorization", build_chapter_6),
        ("Chapter 7: Real-Time Communication", build_chapter_7),
        ("Chapter 8: AI/LLM Integration", build_chapter_8),
        ("Chapter 9: Core Feature Flows", build_chapter_9),
        ("Chapter 10: API Reference", build_chapter_10),
        ("Chapter 11: Data Flow", build_chapter_11),
    ]
    for name, builder in chapters:
        print(f"  {name}...")
        story.extend(builder(rendered))

    # Build PDF (multi-pass for TOC)
    print("\n  Rendering PDF (multi-pass for TOC)...")
    doc.multiBuild(story)

    # Cleanup diagram temp files
    try:
        shutil.rmtree(diagram_dir)
    except Exception:
        pass

    size_kb = os.path.getsize(output_path) / 1024
    print(f"\nDone! {os.path.abspath(output_path)} ({size_kb:.0f} KB)")


if __name__ == "__main__":
    output = sys.argv[1] if len(sys.argv) > 1 else os.path.join(
        os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
        "semblance_architecture.pdf"
    )
    build_architecture_doc(output)