modcomms/documentation/architecture/generate_architecture_pdf.py

"""
ModComms Technical Architecture PDF Generator

Produces a comprehensive A4 PDF document describing the full system architecture,
multi-agent AI pipeline, database schema, frontend/backend design, authentication,
knowledge base system, and deployment.

Uses ReportLab Platypus for layout and embeds pre-rendered Mermaid diagram PNGs.
"""

import os
from pathlib import Path
from datetime import date

from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch, mm
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY, TA_RIGHT
from reportlab.platypus import (
    BaseDocTemplate, PageTemplate, Frame, NextPageTemplate,
    Paragraph, Spacer, Table, TableStyle, Image, PageBreak,
    KeepTogether,
)
from reportlab.platypus.tableofcontents import TableOfContents

# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
SCRIPT_DIR = Path(__file__).resolve().parent
PROJECT_ROOT = SCRIPT_DIR.parent.parent
LOGO_PATH = PROJECT_ROOT / "UI_guidance" / "Barclays-Modcomms.png"
DIAGRAMS_DIR = SCRIPT_DIR / "diagrams" / "rendered"
OUTPUT_PDF = SCRIPT_DIR / "ModComms_Technical_Architecture.pdf"

# ---------------------------------------------------------------------------
# Design Tokens (matching existing presentation)
# ---------------------------------------------------------------------------
DARK_NAVY = colors.HexColor("#1A2142")
ACTIVE_BLUE = colors.HexColor("#006DE3")
LIME = colors.HexColor("#C3FB5A")
TEAL = colors.HexColor("#01A1A2")
ELECTRIC_VIOLET = colors.HexColor("#7A0FF9")
WHITE = colors.HexColor("#FFFFFF")
LIGHT_GREY = colors.HexColor("#F6F6F6")
MID_GREY = colors.HexColor("#E2E2E2")
DARK_GREY = colors.HexColor("#8E8E8E")
BLACK_TITLE = colors.HexColor("#272727")

RAG_GREEN = colors.HexColor("#09821F")
RAG_AMBER = colors.HexColor("#FFBA00")
RAG_RED = colors.HexColor("#E3000F")

PAGE_W, PAGE_H = A4
MARGIN = 0.75 * inch
CONTENT_W = PAGE_W - 2 * MARGIN

# ---------------------------------------------------------------------------
# Styles
# ---------------------------------------------------------------------------
_base = getSampleStyleSheet()


def _style(name, parent="Normal", **kw):
    """Create a named ParagraphStyle, merging with parent."""
    p = _base[parent]
    return ParagraphStyle(name, parent=p, **kw)


styles = {
    "body": _style("Body", fontSize=10, leading=14, spaceAfter=6, alignment=TA_JUSTIFY,
                    fontName="Helvetica", textColor=BLACK_TITLE),
    "h1": _style("H1", fontSize=22, leading=26, spaceAfter=12, spaceBefore=20,
                  fontName="Helvetica-Bold", textColor=DARK_NAVY),
    "h2": _style("H2", fontSize=16, leading=20, spaceAfter=8, spaceBefore=14,
                  fontName="Helvetica-Bold", textColor=ACTIVE_BLUE),
    "h3": _style("H3", fontSize=12, leading=16, spaceAfter=6, spaceBefore=10,
                  fontName="Helvetica-Bold", textColor=DARK_NAVY),
    "bullet": _style("Bullet", fontSize=10, leading=14, spaceAfter=4,
                      fontName="Helvetica", textColor=BLACK_TITLE,
                      bulletIndent=12, leftIndent=24,
                      bulletFontName="Helvetica", bulletFontSize=10),
    "code": _style("Code", fontSize=8.5, leading=11, spaceAfter=4,
                    fontName="Courier", textColor=BLACK_TITLE,
                    backColor=LIGHT_GREY, borderPadding=4),
    "toc_h1": _style("TOC_H1", fontSize=14, leading=20, leftIndent=0,
                      fontName="Helvetica-Bold", textColor=DARK_NAVY),
    "toc_h2": _style("TOC_H2", fontSize=11, leading=16, leftIndent=20,
                      fontName="Helvetica", textColor=BLACK_TITLE),
    "cover_title": _style("CoverTitle", fontSize=32, leading=38, alignment=TA_LEFT,
                           fontName="Helvetica-Bold", textColor=WHITE),
    "cover_subtitle": _style("CoverSubtitle", fontSize=16, leading=22, alignment=TA_LEFT,
                              fontName="Helvetica", textColor=colors.HexColor("#B0B8D0")),
    "cover_meta": _style("CoverMeta", fontSize=11, leading=15, alignment=TA_LEFT,
                          fontName="Helvetica", textColor=LIME),
    "table_header": _style("TableHeader", fontSize=9, leading=12, alignment=TA_LEFT,
                            fontName="Helvetica-Bold", textColor=WHITE),
    "table_cell": _style("TableCell", fontSize=9, leading=12, alignment=TA_LEFT,
                          fontName="Helvetica", textColor=BLACK_TITLE),
    "footer": _style("Footer", fontSize=8, leading=10, alignment=TA_RIGHT,
                      fontName="Helvetica", textColor=DARK_GREY),
}

# ---------------------------------------------------------------------------
# Heading with TOC entry
# ---------------------------------------------------------------------------

class TOCHeading(Paragraph):
    """A Paragraph that registers itself with the Table of Contents."""

    def __init__(self, text, style, level=0, bookmarkName=None):
        self._toc_text = text
        self._toc_level = level
        self._bookmark = bookmarkName or text.replace(" ", "_").replace("/", "_")
        # Add bookmark anchor
        tagged = f'<a name="{self._bookmark}"/>{text}'
        super().__init__(tagged, style)

    def draw(self):
        super().draw()
        key = self._bookmark
        self.canv.bookmarkPage(key)
        self.canv.addOutlineEntry(self._toc_text, key, self._toc_level)


# ---------------------------------------------------------------------------
# Document Template with header/footer
# ---------------------------------------------------------------------------

class ArchDocTemplate(BaseDocTemplate):
    """A4 document with header stripe and page number footer."""

    def __init__(self, filename, **kw):
        super().__init__(filename, pagesize=A4, **kw)
        frame = Frame(MARGIN, MARGIN + 0.4 * inch, CONTENT_W, PAGE_H - 2 * MARGIN - 0.6 * inch,
                      id="normal")
        cover_frame = Frame(0, 0, PAGE_W, PAGE_H, id="cover")
        self.addPageTemplates([
            PageTemplate(id="cover", frames=[cover_frame], onPage=self._cover_page),
            PageTemplate(id="content", frames=[frame], onPage=self._content_page),
        ])

    def afterFlowable(self, flowable):
        """Register TOC entries."""
        if isinstance(flowable, TOCHeading):
            level = flowable._toc_level
            text = flowable._toc_text
            key = flowable._bookmark
            self.notify("TOCEntry", (level, text, self.page, key))

    @staticmethod
    def _cover_page(canvas, doc):
        """Draw cover page background."""
        canvas.saveState()
        canvas.setFillColor(DARK_NAVY)
        canvas.rect(0, 0, PAGE_W, PAGE_H, fill=1, stroke=0)
        # Accent stripe
        canvas.setFillColor(ACTIVE_BLUE)
        canvas.rect(0, PAGE_H - 8 * mm, PAGE_W, 8 * mm, fill=1, stroke=0)
        # Bottom lime accent
        canvas.setFillColor(LIME)
        canvas.rect(0, 0, PAGE_W, 3 * mm, fill=1, stroke=0)
        canvas.restoreState()

    @staticmethod
    def _content_page(canvas, doc):
        """Draw header and footer on content pages."""
        canvas.saveState()
        # Header stripe
        canvas.setFillColor(DARK_NAVY)
        canvas.rect(0, PAGE_H - 12 * mm, PAGE_W, 12 * mm, fill=1, stroke=0)
        canvas.setFillColor(WHITE)
        canvas.setFont("Helvetica-Bold", 8)
        canvas.drawString(MARGIN, PAGE_H - 9 * mm, "ModComms Technical Architecture")
        canvas.setFillColor(ACTIVE_BLUE)
        canvas.rect(0, PAGE_H - 12.8 * mm, PAGE_W, 0.8 * mm, fill=1, stroke=0)

        # Footer
        canvas.setFillColor(MID_GREY)
        canvas.rect(0, 0, PAGE_W, 10 * mm, fill=1, stroke=0)
        canvas.setFillColor(DARK_GREY)
        canvas.setFont("Helvetica", 7)
        canvas.drawString(MARGIN, 4 * mm, "Barclays Internal - Confidential")
        canvas.drawRightString(PAGE_W - MARGIN, 4 * mm, f"Page {doc.page}")
        canvas.restoreState()


# ---------------------------------------------------------------------------
# Helper functions
# ---------------------------------------------------------------------------

def h1(text):
    return TOCHeading(text, styles["h1"], level=0)


def h2(text):
    return TOCHeading(text, styles["h2"], level=1)


def h3(text):
    return TOCHeading(text, styles["h3"], level=1)


def p(text):
    return Paragraph(text, styles["body"])


def bullet(text):
    return Paragraph(f"<bullet>&bull;</bullet> {text}", styles["bullet"])


def code(text):
    return Paragraph(text.replace("\n", "<br/>"), styles["code"])


def spacer(h=0.15):
    return Spacer(1, h * inch)


def diagram(filename, caption=None, max_width=None):
    """Embed a rendered diagram PNG, scaled to fit content width."""
    from PIL import Image as PILImage

    path = DIAGRAMS_DIR / filename
    if not path.exists():
        return [p(f"<i>[Diagram not found: {filename}]</i>")]

    # Get actual image dimensions
    with PILImage.open(str(path)) as pil_img:
        img_w, img_h = pil_img.size

    # Scale to fit content width
    target_w = max_width or CONTENT_W
    scale = target_w / img_w
    target_h = img_h * scale

    # Cap height at 6 inches
    if target_h > 6 * inch:
        target_h = 6 * inch
        target_w = img_w * (target_h / img_h)

    img = Image(str(path), width=target_w, height=target_h)
    elements = [spacer(0.1), img, spacer(0.05)]
    if caption:
        cap_style = _style("Caption", fontSize=8, leading=10, alignment=TA_CENTER,
                           fontName="Helvetica-Oblique", textColor=DARK_GREY, spaceAfter=8)
        elements.append(Paragraph(caption, cap_style))
    return elements


def make_table(headers, rows, col_widths=None):
    """Create a styled table with header row."""
    header_paras = [Paragraph(h, styles["table_header"]) for h in headers]
    data = [header_paras]
    for row in rows:
        data.append([Paragraph(str(c), styles["table_cell"]) for c in row])

    if col_widths is None:
        col_widths = [CONTENT_W / len(headers)] * len(headers)

    t = Table(data, colWidths=col_widths, repeatRows=1)
    t.setStyle(TableStyle([
        ("BACKGROUND", (0, 0), (-1, 0), DARK_NAVY),
        ("TEXTCOLOR", (0, 0), (-1, 0), WHITE),
        ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
        ("FONTSIZE", (0, 0), (-1, 0), 9),
        ("BOTTOMPADDING", (0, 0), (-1, 0), 6),
        ("TOPPADDING", (0, 0), (-1, 0), 6),
        ("BACKGROUND", (0, 1), (-1, -1), WHITE),
        ("ROWBACKGROUNDS", (0, 1), (-1, -1), [WHITE, LIGHT_GREY]),
        ("FONTNAME", (0, 1), (-1, -1), "Helvetica"),
        ("FONTSIZE", (0, 1), (-1, -1), 9),
        ("TOPPADDING", (0, 1), (-1, -1), 4),
        ("BOTTOMPADDING", (0, 1), (-1, -1), 4),
        ("LEFTPADDING", (0, 0), (-1, -1), 6),
        ("RIGHTPADDING", (0, 0), (-1, -1), 6),
        ("GRID", (0, 0), (-1, -1), 0.5, MID_GREY),
        ("VALIGN", (0, 0), (-1, -1), "TOP"),
    ]))
    return t


# ---------------------------------------------------------------------------
# Document Sections
# ---------------------------------------------------------------------------

def build_cover():
    """Cover page elements."""
    elements = []
    # Push content down
    elements.append(Spacer(1, 2.2 * inch))

    # Logo
    if LOGO_PATH.exists():
        from PIL import Image as PILImage
        with PILImage.open(str(LOGO_PATH)) as pil_img:
            lw, lh = pil_img.size
        logo_w = 2.8 * inch
        logo_h = lh * (logo_w / lw)
        logo = Image(str(LOGO_PATH), width=logo_w, height=logo_h)
        elements.append(logo)
        elements.append(Spacer(1, 0.5 * inch))

    elements.append(Paragraph("Technical Architecture", styles["cover_title"]))
    elements.append(Spacer(1, 0.15 * inch))
    elements.append(Paragraph("Document", styles["cover_title"]))
    elements.append(Spacer(1, 0.4 * inch))
    elements.append(Paragraph(
        "Complete system design reference for the AI-powered proof review platform",
        styles["cover_subtitle"]))
    elements.append(Spacer(1, 0.6 * inch))
    elements.append(Paragraph(f"Version 1.0  |  {date.today().strftime('%B %Y')}  |  Internal",
                              styles["cover_meta"]))

    # Switch to content template for next page
    elements.append(NextPageTemplate("content"))
    elements.append(PageBreak())
    return elements


def build_toc():
    """Table of Contents page."""
    elements = []
    elements.append(TOCHeading("Table of Contents", styles["h1"], level=0))
    elements.append(spacer(0.2))
    toc = TableOfContents()
    toc.levelStyles = [styles["toc_h1"], styles["toc_h2"]]
    elements.append(toc)
    elements.append(PageBreak())
    return elements


def build_executive_summary():
    elements = []
    elements.append(h1("Executive Summary"))
    elements.append(p(
        "<b>ModComms</b> is an AI-powered proof review tool built for Barclays marketing operations. "
        "It automates the compliance, brand, tone-of-voice, and channel-suitability review of marketing "
        "assets (proofs) that would traditionally require manual review by multiple specialist teams."
    ))
    elements.append(p(
        "The platform employs a <b>multi-agent AI architecture</b> where four specialist agents "
        "analyse each proof in parallel, with a lead agent synthesising their findings into a single "
        "RAG (Red/Amber/Green) status decision. This enables near-instant feedback on uploaded proofs, "
        "dramatically reducing review cycle times."
    ))
    elements.append(h2("Key Technology Choices"))
    elements.append(bullet("<b>Frontend:</b> React 18 SPA with TypeScript, Vite build tool, Tailwind CSS"))
    elements.append(bullet("<b>Backend:</b> Python FastAPI with async/await, WebSocket real-time communication"))
    elements.append(bullet("<b>AI Engine:</b> Google Gemini 2.5 Flash for multi-modal image analysis"))
    elements.append(bullet("<b>Database:</b> PostgreSQL with SQLAlchemy async ORM and Alembic migrations"))
    elements.append(bullet("<b>Authentication:</b> Azure AD via MSAL with 4-tier RBAC"))
    elements.append(bullet("<b>Knowledge Base:</b> LlamaParse document parsing + Gemini distillation pipeline"))
    elements.append(PageBreak())
    return elements


def build_system_architecture():
    elements = []
    elements.append(h1("System Architecture Overview"))
    elements.append(p(
        "ModComms follows a <b>three-tier architecture</b> with a React single-page application frontend, "
        "a Python FastAPI backend, and PostgreSQL for persistence. The backend communicates with external "
        "services including Google Gemini for AI analysis, Azure AD for authentication, and LlamaParse for "
        "document processing."
    ))
    elements.extend(diagram("01_system_overview.png", "Figure 1: High-Level System Architecture"))

    elements.append(h2("Technology Stack"))
    elements.append(make_table(
        ["Tier", "Technology", "Purpose"],
        [
            ["Frontend", "React 18 + TypeScript", "Single-page application UI"],
            ["Frontend", "Vite 5", "Build tool and dev server"],
            ["Frontend", "Tailwind CSS", "Utility-first styling"],
            ["Frontend", "@azure/msal-react", "Azure AD authentication"],
            ["Backend", "Python 3.11+ / FastAPI", "Async REST API + WebSocket server"],
            ["Backend", "SQLAlchemy 2.0 (async)", "ORM with asyncpg driver"],
            ["Backend", "Alembic", "Database migration management"],
            ["Backend", "Uvicorn", "ASGI server"],
            ["AI", "Google Gemini 2.5 Flash", "Multi-modal proof analysis"],
            ["AI", "LlamaParse", "PDF/DOCX document parsing"],
            ["Database", "PostgreSQL 15", "Primary data store"],
            ["Auth", "Azure AD / MSAL", "OAuth2 + JWT identity provider"],
            ["Deployment", "Docker Compose", "Container orchestration"],
            ["Deployment", "Apache2 + mod_proxy", "Reverse proxy and static serving"],
        ],
        col_widths=[1.1 * inch, 2.0 * inch, CONTENT_W - 3.1 * inch],
    ))
    elements.append(PageBreak())
    return elements


def build_agent_pipeline():
    elements = []
    elements.append(h1("Multi-Agent Analysis Pipeline"))
    elements.append(p(
        "The core of ModComms is a multi-agent system where four specialist agents analyse each proof "
        "in parallel. Each agent has a distinct area of expertise and access to a curated knowledge base "
        "of reference documents. After all agents complete, a Lead Agent synthesises their findings."
    ))

    elements.append(h2("Specialist Agents"))
    elements.append(make_table(
        ["Agent", "Focus Area", "Key Checks"],
        [
            ["Legal Agent", "Advertising standards compliance",
             "Financial promotion detection, required disclaimers, FCA/ASA rules, risk language"],
            ["Brand Agent", "Brand identity adherence",
             "Logo usage, colour palette, typography, design language principles (Barclays or Barclaycard)"],
            ["Channel Best Practices Agent", "Platform-specific guidelines",
             "Content best practices, accessibility, readability, platform conventions"],
            ["Channel Tech Specs Agent", "Technical specifications",
             "Dimensions, file size limits, format requirements, resolution, safe zones"],
        ],
        col_widths=[1.6 * inch, 1.6 * inch, CONTENT_W - 3.2 * inch],
    ))
    elements.append(spacer(0.15))

    elements.append(h2("Lead Agent RAG Decision Logic"))
    elements.append(p(
        "The Lead Agent synthesises all four sub-reviews into an overall RAG status and a human-readable summary:"))
    elements.append(bullet(
        '<font color="#09821F"><b>GREEN (Passed):</b></font> All agents pass with at most 1 amber-level issue per agent, '
        "and no Legal agent amber issues."))
    elements.append(bullet(
        '<font color="#FFBA00"><b>AMBER (Requires Manual Legal Review):</b></font> More than 1 actionable issue per agent, '
        "or any Legal agent amber-level issue."))
    elements.append(bullet(
        '<font color="#E3000F"><b>RED (Failed):</b></font> Any agent returns a Red status, indicating a critical compliance '
        "or brand violation that must be resolved."))
    elements.append(spacer(0.1))

    elements.append(h2("Revision-Aware Analysis"))
    elements.append(p(
        "When a proof has been previously analysed (version N > 1), the system automatically fetches the "
        "prior version's analysis results and passes them as context to each agent. This enables agents to "
        "identify <b>resolved issues</b>, <b>outstanding issues</b>, and <b>new issues</b> relative to "
        "the previous version, providing actionable delta feedback."
    ))
    elements.extend(diagram("02_agent_pipeline.png", "Figure 2: Multi-Agent Analysis Pipeline"))
    elements.append(PageBreak())
    return elements


def build_websocket_flow():
    elements = []
    elements.append(h1("WebSocket Analysis Flow"))
    elements.append(p(
        "Proof analysis uses a WebSocket connection for real-time streaming of agent progress. "
        "The client sends a single <font name='Courier'>analyze</font> message containing the base64-encoded "
        "file and metadata, and receives a stream of updates as each agent starts and completes."
    ))

    elements.append(h2("Message Protocol"))
    elements.append(make_table(
        ["Direction", "Type", "Payload", "Description"],
        [
            ["Client &rarr; Server", "analyze",
             "file_data, file_type, access_token, brand, campaign_id, proof_name, channel, sub_channel, proof_type",
             "Initiate analysis with file and metadata"],
            ["Server &rarr; Client", "agent_started",
             "agent_name",
             "Agent has begun processing"],
            ["Server &rarr; Client", "agent_completed",
             "agent_name, review (ragStatus, feedback, issues, resolvedIssues, outstandingIssues, newIssues)",
             "Agent finished with results"],
            ["Server &rarr; Client", "complete",
             "result (all agent reviews + lead summary + overall status), proof_id, version_id, is_identical_file, pdf_pages",
             "Full analysis complete and persisted"],
            ["Server &rarr; Client", "error",
             "message",
             "Error occurred during processing"],
        ],
        col_widths=[1.0 * inch, 1.0 * inch, 2.2 * inch, CONTENT_W - 4.2 * inch],
    ))
    elements.append(spacer(0.15))

    elements.append(h2("Flow Lifecycle"))
    elements.append(bullet("1. Client establishes WebSocket connection to <font name='Courier'>/ws/analyze</font>"))
    elements.append(bullet("2. Client sends <font name='Courier'>analyze</font> message with JWT access token"))
    elements.append(bullet("3. Server verifies JWT token against Azure AD"))
    elements.append(bullet("4. Server checks user role (oversight_admin blocked from analysis)"))
    elements.append(bullet("5. Server decodes base64 file data; rasterizes PDF pages if applicable"))
    elements.append(bullet("6. Server fetches previous version analysis for revision context"))
    elements.append(bullet("7. Four agents run in parallel via <font name='Courier'>asyncio.gather()</font>"))
    elements.append(bullet("8. Real-time <font name='Courier'>agent_started</font> / <font name='Courier'>agent_completed</font> messages stream to client"))
    elements.append(bullet("9. Lead Agent synthesises overall RAG status"))
    elements.append(bullet("10. Results persisted to database; file stored to disk"))
    elements.append(bullet("11. <font name='Courier'>complete</font> message sent with full results and IDs"))
    elements.extend(diagram("03_websocket_flow.png", "Figure 3: WebSocket Analysis Sequence"))
    elements.append(PageBreak())
    return elements


def build_database_schema():
    elements = []
    elements.append(h1("Database Schema"))
    elements.append(p(
        "ModComms uses PostgreSQL with SQLAlchemy 2.0 async ORM. The schema comprises 15 tables "
        "organised into four logical domains. All primary keys are UUIDs. Alembic manages migrations."
    ))

    elements.append(h2("Domain Overview"))
    elements.append(make_table(
        ["Domain", "Tables", "Purpose"],
        [
            ["Identity & Access", "agencies, users, user_change_logs",
             "User accounts, agency membership, role audit trail"],
            ["Campaign & Proof", "campaigns, proofs, proof_versions",
             "Marketing campaigns, proof assets, versioned analysis results"],
            ["Audit & Review", "flagged_items, resolved_items, error_items",
             "Manual flagging, issue resolution tracking, analysis error records"],
            ["Knowledge Base", "knowledge_bases, source_documents, spec_versions, processing_jobs",
             "Agent reference documentation, document processing pipeline"],
            ["Configuration", "dropdown_options",
             "Hierarchical channel/sub-channel/proof-type configuration"],
        ],
        col_widths=[1.3 * inch, 2.3 * inch, CONTENT_W - 3.6 * inch],
    ))
    elements.append(spacer(0.1))

    elements.append(h2("Key Design Decisions"))
    elements.append(bullet(
        "<b>JSONB for agent_review:</b> The <font name='Courier'>proof_versions.agent_review</font> column "
        "stores the complete multi-agent analysis result as a JSONB document, enabling flexible querying "
        "while keeping the schema stable as agent output evolves."
    ))
    elements.append(bullet(
        "<b>JSONB for processing logs:</b> <font name='Courier'>processing_jobs.log</font> stores "
        "step-by-step pipeline progress as structured JSON for debugging."
    ))
    elements.append(bullet(
        "<b>Self-referential hierarchy:</b> <font name='Courier'>dropdown_options</font> uses a "
        "<font name='Courier'>parent_id</font> FK to itself, supporting the Channel &rarr; Sub-Channel "
        "&rarr; Proof Type hierarchy in a single table."
    ))
    elements.append(bullet(
        "<b>File hash deduplication:</b> <font name='Courier'>proof_versions.file_hash</font> (MD5) "
        "detects when an identical file is re-uploaded, flagging it to the user."
    ))
    elements.extend(diagram("04_database_erd.png", "Figure 4: Entity Relationship Diagram"))
    elements.append(PageBreak())
    return elements


def build_frontend():
    elements = []
    elements.append(h1("Frontend Architecture"))
    elements.append(p(
        "The frontend is a React 18 single-page application built with TypeScript and Vite. "
        "It uses Tailwind CSS for styling with a custom Barclays design system. The application "
        "is wrapped in an MSAL authentication provider and a custom UserContext for role-based rendering."
    ))

    elements.append(h2("Component Hierarchy"))
    elements.append(p(
        "The entry point (<font name='Courier'>index.tsx</font>) wraps the app in <font name='Courier'>MsalProvider</font>. "
        "<font name='Courier'>App.tsx</font> acts as an authentication gate, rendering <font name='Courier'>Login</font> "
        "for unauthenticated users and <font name='Courier'>AppContent</font> (inside <font name='Courier'>UserProvider</font>) "
        "for authenticated users. Views are rendered based on a <font name='Courier'>currentView</font> state variable."
    ))
    elements.extend(diagram("05_frontend_hierarchy.png", "Figure 5: Frontend Component Hierarchy"))
    elements.append(spacer(0.1))

    elements.append(h2("Views"))
    elements.append(make_table(
        ["View", "Component", "Description"],
        [
            ["Home", "Hero + ChecksOverview + FeedbackReport", "Upload proof, view real-time analysis, export report"],
            ["Campaigns", "Campaigns", "Campaign CRUD, proof list, version history, re-analysis"],
            ["Analytics", "Analytics", "Aggregate statistics, RAG distributions, per-agency breakdowns"],
            ["Auditing", "Auditing", "Flagged items, resolved items, error items with filters"],
            ["WIP Reviewer", "WIPReviewer", "Quick analysis without persisting to a campaign"],
            ["Knowledge Base", "KnowledgeBase", "Manage agent reference docs, trigger processing, view specs"],
            ["Settings", "Settings", "Dropdown options (channels, sub-channels, proof types)"],
            ["User Management", "UserManagement", "Role assignment, agency assignment, change history"],
            ["Profile", "Profile", "Current user info and preferences"],
            ["CopyGenAI", "CopyGenAI", "AI-assisted marketing copy generation"],
        ],
        col_widths=[1.2 * inch, 2.0 * inch, CONTENT_W - 3.2 * inch],
    ))
    elements.append(spacer(0.1))

    elements.append(h2("State Management"))
    elements.append(bullet(
        "<b>UserContext:</b> Provides authenticated user info, role checks (<font name='Courier'>canWrite</font>, "
        "<font name='Courier'>canSeeAnalytics</font>, etc.), and agency filtering state."))
    elements.append(bullet(
        "<b>URL State:</b> Campaign and proof selections are encoded in the URL hash for deep linking "
        "and browser history support."))
    elements.append(bullet(
        "<b>API Service:</b> Centralized REST client (<font name='Courier'>apiService.ts</font>) that "
        "auto-attaches MSAL access tokens to all requests."))
    elements.append(bullet(
        "<b>WebSocket Service:</b> <font name='Courier'>geminiService.ts</font> manages the WebSocket "
        "lifecycle for proof analysis, including reconnection and progress callbacks."))
    elements.append(PageBreak())
    return elements


def build_auth_rbac():
    elements = []
    elements.append(h1("Authentication & RBAC"))
    elements.append(p(
        "ModComms uses Azure Active Directory for authentication via the MSAL (Microsoft Authentication Library) "
        "protocol. The frontend acquires tokens via MSAL.js popup flow, and the backend verifies JWT tokens "
        "using Azure AD's JWKS endpoint."
    ))

    elements.append(h2("Authentication Flow"))
    elements.append(bullet("1. User navigates to app; MSAL checks for existing session"))
    elements.append(bullet("2. If not authenticated, Login component triggers <font name='Courier'>loginPopup()</font>"))
    elements.append(bullet("3. Azure AD returns ID token + access token"))
    elements.append(bullet("4. Frontend calls <font name='Courier'>GET /api/me</font> with Bearer token"))
    elements.append(bullet("5. Backend verifies JWT, extracts claims (oid, name, email)"))
    elements.append(bullet("6. Backend auto-provisions user on first login as <font name='Courier'>basic_user</font>"))
    elements.append(bullet("7. UserContext stores profile and computes role-based feature flags"))
    elements.extend(diagram("06_auth_rbac_flow.png", "Figure 6: Authentication & RBAC Flow"))
    elements.append(spacer(0.1))

    elements.append(h2("Role Hierarchy"))
    elements.append(make_table(
        ["Role", "Scope", "Key Permissions"],
        [
            ["super_admin", "Global",
             "All features, all campaigns, user management, settings, knowledge base, analytics, auditing"],
            ["oversight_admin", "Global (read-only)",
             "View all campaigns across agencies, analytics, auditing. Cannot upload, analyse, flag, or resolve."],
            ["agency_admin", "Own agency",
             "Full CRUD within own agency's campaigns, flagging, resolving"],
            ["basic_user", "Own agency",
             "Upload and analyse proofs, view own agency's campaigns, flag issues"],
        ],
        col_widths=[1.3 * inch, 1.0 * inch, CONTENT_W - 2.3 * inch],
    ))
    elements.append(spacer(0.1))

    elements.append(h2("Backend Enforcement"))
    elements.append(p(
        "Role-based access is enforced at the FastAPI dependency level using composable dependencies:"))
    elements.append(bullet(
        "<font name='Courier'>get_current_user()</font> &mdash; Verifies JWT and returns claims dict"))
    elements.append(bullet(
        "<font name='Courier'>get_current_db_user()</font> &mdash; Resolves claims to a User ORM object with agency"))
    elements.append(bullet(
        "<font name='Courier'>require_role(*roles)</font> &mdash; Factory that restricts endpoints to specific roles"))
    elements.append(bullet(
        "<font name='Courier'>require_write_access()</font> &mdash; Blocks oversight_admin from mutation operations"))
    elements.append(PageBreak())
    return elements


def build_knowledge_base():
    elements = []
    elements.append(h1("Knowledge Base Pipeline"))
    elements.append(p(
        "The Knowledge Base system allows admins to upload reference documents (brand guidelines, "
        "legal standards, channel specifications) that are parsed, combined, and distilled into "
        "concise agent specifications. These specs are versioned and serve as the primary context "
        "for each agent during proof analysis."
    ))

    elements.append(h2("Knowledge Base Types"))
    elements.append(make_table(
        ["Agent Key", "Display Name", "Description"],
        [
            ["legal", "Legal Compliance", "Advertising standards, FCA rules, disclaimer requirements"],
            ["brand_barclays", "Barclays Brand", "Barclays brand identity guidelines, design language"],
            ["brand_barclaycard", "Barclaycard Brand", "Barclaycard-specific brand guidelines"],
            ["channel_best_practices", "Channel Best Practices", "Platform content guidelines, accessibility standards"],
            ["channel_tech_specs", "Channel Tech Specs", "Technical dimensions, file formats, resolution specs"],
        ],
        col_widths=[1.6 * inch, 1.5 * inch, CONTENT_W - 3.1 * inch],
    ))
    elements.append(spacer(0.1))

    elements.append(h2("Processing Pipeline"))
    elements.append(bullet("1. <b>Upload:</b> Admin uploads source documents (PDF, DOCX, etc.) via the KB UI"))
    elements.append(bullet("2. <b>Store:</b> Files saved to <font name='Courier'>kb_storage/</font> with metadata in DB"))
    elements.append(bullet("3. <b>Parse:</b> LlamaParse API converts documents to clean Markdown"))
    elements.append(bullet("4. <b>Combine:</b> All parsed Markdown from source documents is concatenated"))
    elements.append(bullet("5. <b>Distil:</b> Gemini generates a concise, structured agent spec from the combined text"))
    elements.append(bullet("6. <b>Version:</b> New spec version created with version number, character count, and source doc links"))
    elements.append(bullet("7. <b>Activate:</b> New version set as active; reference docs cache invalidated"))
    elements.append(bullet("8. <b>Serve:</b> Agents load the active spec version at analysis time"))

    elements.append(spacer(0.1))
    elements.append(h2("Version Management"))
    elements.append(p(
        "Spec versions are immutable once created. Admins can view the full content of any version, "
        "compare two versions with a unified diff view, and revert to a previous version by activating it. "
        "Processing jobs track the full pipeline lifecycle with status progression: "
        "<font name='Courier'>pending &rarr; parsing &rarr; distilling &rarr; completed</font> (or <font name='Courier'>failed</font>)."
    ))
    elements.extend(diagram("07_knowledge_base_pipeline.png", "Figure 7: Knowledge Base Processing Pipeline"))
    elements.append(PageBreak())
    return elements


def build_deployment():
    elements = []
    elements.append(h1("Deployment Architecture"))
    elements.append(p(
        "ModComms is deployed using Docker Compose for the backend services and Apache as a "
        "reverse proxy serving the static frontend build. The deployment script (<font name='Courier'>deploy.sh</font>) "
        "automates the full build and deployment process."
    ))

    elements.append(h2("Infrastructure Components"))
    elements.append(make_table(
        ["Component", "Technology", "Configuration"],
        [
            ["Reverse Proxy", "Apache2 + mod_proxy + mod_proxy_wstunnel",
             "HTTPS termination, static file serving, proxy to backend (port 8000)"],
            ["Backend", "Docker container (Python + Uvicorn)",
             "Port 8000, auto-restart, volume mounts for uploads and KB storage"],
            ["Database", "Docker container (PostgreSQL 15)",
             "Port 5432, persistent volume for data, health checks"],
            ["Frontend", "Static files (Apache DocumentRoot)",
             "Vite production build served directly by Apache"],
            ["File Storage", "Host filesystem volumes",
             "uploads/ for proof files, kb_storage/ for knowledge base documents"],
        ],
        col_widths=[1.2 * inch, 2.0 * inch, CONTENT_W - 3.2 * inch],
    ))
    elements.append(spacer(0.1))

    elements.append(h2("Deployment Process"))
    elements.append(bullet("1. <b>Pull:</b> Fetch latest code from Git repository"))
    elements.append(bullet("2. <b>Frontend build:</b> <font name='Courier'>npm run build</font> produces static assets"))
    elements.append(bullet("3. <b>Deploy frontend:</b> Copy build output to Apache DocumentRoot"))
    elements.append(bullet("4. <b>Backend build:</b> <font name='Courier'>docker compose build</font> rebuilds backend image"))
    elements.append(bullet("5. <b>Database migration:</b> <font name='Courier'>alembic upgrade head</font> inside container"))
    elements.append(bullet("6. <b>Restart services:</b> <font name='Courier'>docker compose up -d</font> restarts backend + DB"))
    elements.append(bullet("7. <b>Reload Apache:</b> <font name='Courier'>systemctl reload apache2</font> picks up config changes"))
    elements.extend(diagram("08_deployment_architecture.png", "Figure 8: Deployment Architecture"))
    elements.append(PageBreak())
    return elements


def build_api_reference():
    elements = []
    elements.append(h1("API Reference Summary"))

    elements.append(h2("REST Endpoints"))
    elements.append(make_table(
        ["Method", "Endpoint", "Auth", "Description"],
        [
            ["GET", "/api/me", "Bearer", "Get authenticated user profile"],
            ["GET", "/api/campaigns", "Bearer", "List campaigns (filtered by role/agency)"],
            ["POST", "/api/campaigns", "Bearer + Write", "Create a new campaign"],
            ["GET", "/api/campaigns/{id}", "Bearer", "Get campaign by ID"],
            ["PUT", "/api/campaigns/{id}", "Bearer + Write", "Update a campaign"],
            ["DELETE", "/api/campaigns/{id}", "Bearer + Write", "Delete campaign and all files"],
            ["GET", "/api/campaigns/{id}/proofs", "Bearer", "List proofs for a campaign"],
            ["GET", "/api/proofs/{id}", "Bearer", "Get proof by ID"],
            ["DELETE", "/api/proofs/{id}", "Bearer + Write", "Delete proof and files"],
            ["POST", "/api/proofs/{id}/versions/{v}/flag", "Bearer + Write", "Flag an issue"],
            ["POST", "/api/proofs/{id}/versions/{v}/resolve", "Bearer + Write", "Resolve an issue"],
            ["GET", "/api/audit/flagged", "Bearer", "List flagged items"],
            ["GET", "/api/audit/resolved", "Bearer", "List resolved items"],
            ["GET", "/api/audit/errors", "Bearer", "List error items"],
            ["GET", "/api/analytics", "Bearer + Admin", "Get analytics data"],
            ["GET", "/api/analytics/by-agency", "Bearer + Admin", "Per-agency analytics"],
            ["GET", "/api/users", "Bearer + Admin", "List all users"],
            ["PUT", "/api/users/{id}", "Bearer + Super", "Update user role/agency"],
            ["GET", "/api/users/{id}/change-history", "Bearer + Admin", "User change audit trail"],
            ["GET", "/api/agencies", "Bearer", "List all agencies"],
            ["POST", "/api/agencies", "Bearer + Super", "Create agency"],
            ["GET", "/api/dropdown-options", "Bearer", "Get channel/sub-channel/type options"],
            ["POST", "/api/dropdown-options/...", "Bearer + Super", "Manage dropdown options"],
            ["GET", "/api/files/{key}", "Bearer", "Retrieve stored file"],
            ["GET", "/api/files/{key}/pages", "Bearer", "Rasterize PDF to page images"],
            ["POST", "/api/support/email", "Public", "Send support email"],
        ],
        col_widths=[0.6 * inch, 2.3 * inch, 0.9 * inch, CONTENT_W - 3.8 * inch],
    ))
    elements.append(spacer(0.1))

    elements.append(h2("Knowledge Base Endpoints"))
    elements.append(make_table(
        ["Method", "Endpoint", "Description"],
        [
            ["GET", "/api/knowledge-base", "List all knowledge bases"],
            ["GET", "/api/knowledge-base/{id}", "Get KB detail with docs and active spec"],
            ["POST", "/api/knowledge-base/{id}/documents", "Upload source document"],
            ["DELETE", "/api/knowledge-base/{id}/documents/{doc_id}", "Remove source document"],
            ["POST", "/api/knowledge-base/{id}/process", "Trigger processing pipeline"],
            ["GET", "/api/knowledge-base/{id}/jobs/{job_id}", "Get processing job status"],
            ["GET", "/api/knowledge-base/{id}/versions", "List spec versions"],
            ["GET", "/api/knowledge-base/{id}/versions/{v_id}", "Get spec version content"],
            ["GET", "/api/knowledge-base/{id}/versions/{a}/diff/{b}", "Diff two spec versions"],
            ["POST", "/api/knowledge-base/{id}/versions/{v_id}/activate", "Activate a spec version"],
        ],
        col_widths=[0.6 * inch, 2.8 * inch, CONTENT_W - 3.4 * inch],
    ))
    elements.append(spacer(0.1))

    elements.append(h2("WebSocket Endpoint"))
    elements.append(p(
        "<font name='Courier'>ws://host/ws/analyze</font> &mdash; Real-time proof analysis with streaming "
        "agent progress updates. See the WebSocket Analysis Flow section for full protocol details."
    ))
    elements.append(PageBreak())
    return elements


def build_env_vars():
    elements = []
    elements.append(h1("Appendix A: Environment Variables"))

    elements.append(h2("Backend (backend/.env)"))
    elements.append(make_table(
        ["Variable", "Required", "Description"],
        [
            ["GEMINI_API_KEY", "Yes", "Google Gemini API key for AI analysis"],
            ["DATABASE_URL", "Yes", "PostgreSQL connection string (asyncpg)"],
            ["AZURE_TENANT_ID", "Yes*", "Azure AD tenant ID for JWT verification"],
            ["AZURE_CLIENT_ID", "Yes*", "Azure AD application (client) ID"],
            ["CORS_ORIGINS", "Yes", "Comma-separated allowed CORS origins"],
            ["LLAMA_CLOUD_API_KEY", "No", "LlamaParse API key (enables KB pipeline)"],
            ["DISABLE_AUTH", "No", "Set 'true' for local dev without Azure AD"],
            ["REFERENCE_DOCS_PATH", "No", "Path to reference docs directory (default: reference_docs/)"],
        ],
        col_widths=[1.8 * inch, 0.7 * inch, CONTENT_W - 2.5 * inch],
    ))
    elements.append(p("<i>* Required when DISABLE_AUTH is not true</i>"))
    elements.append(spacer(0.15))

    elements.append(h2("Frontend (frontend/.env.local)"))
    elements.append(make_table(
        ["Variable", "Required", "Description"],
        [
            ["VITE_BACKEND_URL", "Yes", "Backend REST API base URL (e.g. http://localhost:8000)"],
            ["VITE_BACKEND_WS_URL", "Yes", "Backend WebSocket URL (e.g. ws://localhost:8000/ws/analyze)"],
            ["VITE_AZURE_CLIENT_ID", "Yes", "Azure AD app client ID for MSAL"],
            ["VITE_AZURE_TENANT_ID", "Yes", "Azure AD tenant ID for MSAL"],
            ["VITE_AZURE_REDIRECT_URI", "Yes", "OAuth2 redirect URI"],
        ],
        col_widths=[2.0 * inch, 0.7 * inch, CONTENT_W - 2.7 * inch],
    ))
    elements.append(PageBreak())
    return elements


def build_tech_stack():
    elements = []
    elements.append(h1("Appendix B: Technology Stack"))

    elements.append(h2("Backend Dependencies"))
    elements.append(make_table(
        ["Package", "Version", "Purpose"],
        [
            ["fastapi", "0.115+", "Web framework (REST + WebSocket)"],
            ["uvicorn", "0.34+", "ASGI server"],
            ["sqlalchemy", "2.0+", "Async ORM"],
            ["asyncpg", "0.30+", "PostgreSQL async driver"],
            ["alembic", "1.14+", "Database migrations"],
            ["google-genai", "1.x", "Google Gemini API client"],
            ["llama-parse", "0.6+", "Document parsing service"],
            ["pydantic", "2.x", "Data validation and serialisation"],
            ["python-jose", "3.3+", "JWT decoding and verification"],
            ["pillow", "11.x", "Image processing (thumbnails)"],
            ["pymupdf", "1.25+", "PDF rasterisation"],
            ["httpx", "0.28+", "Async HTTP client"],
        ],
        col_widths=[1.6 * inch, 1.0 * inch, CONTENT_W - 2.6 * inch],
    ))
    elements.append(spacer(0.15))

    elements.append(h2("Frontend Dependencies"))
    elements.append(make_table(
        ["Package", "Version", "Purpose"],
        [
            ["react", "18.x", "UI component library"],
            ["typescript", "5.x", "Type-safe JavaScript"],
            ["vite", "5.x", "Build tool and dev server"],
            ["tailwindcss", "3.x", "Utility-first CSS framework"],
            ["@azure/msal-browser", "3.x", "Azure AD authentication (browser)"],
            ["@azure/msal-react", "2.x", "React bindings for MSAL"],
            ["lucide-react", "latest", "Icon library"],
            ["react-markdown", "latest", "Markdown rendering"],
            ["recharts", "2.x", "Charting library (Analytics)"],
        ],
        col_widths=[1.8 * inch, 1.0 * inch, CONTENT_W - 2.8 * inch],
    ))
    return elements


# ---------------------------------------------------------------------------
# Main Build
# ---------------------------------------------------------------------------

def main():
    print("Building ModComms Technical Architecture PDF...")
    doc = ArchDocTemplate(str(OUTPUT_PDF))

    story = []

    # Cover
    story.extend(build_cover())

    # TOC
    story.extend(build_toc())

    # Content sections
    story.extend(build_executive_summary())
    story.extend(build_system_architecture())
    story.extend(build_agent_pipeline())
    story.extend(build_websocket_flow())
    story.extend(build_database_schema())
    story.extend(build_frontend())
    story.extend(build_auth_rbac())
    story.extend(build_knowledge_base())
    story.extend(build_deployment())
    story.extend(build_api_reference())
    story.extend(build_env_vars())
    story.extend(build_tech_stack())

    # Build with two passes for TOC page numbers
    doc.multiBuild(story)
    print(f"PDF generated: {OUTPUT_PDF}")
    print(f"  File size: {OUTPUT_PDF.stat().st_size / 1024:.0f} KB")


if __name__ == "__main__":
    main()