ford_qc/generate_pdf.py

#!/usr/bin/env python3
"""
Generate a professionally formatted PDF from the documentation markdown files.

Usage:
    python generate_pdf.py

Output:
    documentation/Ford_BnP_QC_Documentation.pdf
"""

import os
import re
import sys
import glob
import tempfile
import subprocess
from datetime import datetime

from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.colors import HexColor, black, white, Color
from reportlab.lib.units import mm, cm, inch
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT, TA_JUSTIFY
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
    PageBreak, KeepTogether, Flowable, NextPageTemplate, PageTemplate,
    BaseDocTemplate, Frame, Image as RLImage
)
from reportlab.platypus.tableofcontents import TableOfContents
from reportlab.graphics import renderPDF
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

from PIL import Image as PILImage

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DOCS_DIR = os.path.join(SCRIPT_DIR, "documentation")
OUTPUT_PDF = os.path.join(DOCS_DIR, "Ford_BnP_QC_Documentation.pdf")
MMDC_PATH = "/opt/homebrew/bin/mmdc"

PAGE_W, PAGE_H = A4
MARGIN_LEFT = 55
MARGIN_RIGHT = 55
MARGIN_TOP = 50
MARGIN_BOTTOM = 50
CONTENT_W = PAGE_W - MARGIN_LEFT - MARGIN_RIGHT

# Colours
FORD_BLUE = HexColor("#003478")
DARK_BLUE = HexColor("#1a365d")
MEDIUM_BLUE = HexColor("#2b6cb0")
LIGHT_BLUE = HexColor("#ebf4ff")
HEADER_BG = HexColor("#2d3748")
ROW_ALT = HexColor("#f7fafc")
CODE_BG = HexColor("#f0f0f0")
CODE_BORDER = HexColor("#d0d0d0")
BLOCKQUOTE_BG = HexColor("#fffbeb")
BLOCKQUOTE_BORDER = HexColor("#f59e0b")
RULE_COLOR = HexColor("#cbd5e0")
LINK_COLOR = HexColor("#2b6cb0")

DOC_FILES = sorted(glob.glob(os.path.join(DOCS_DIR, "*.md")))


# ---------------------------------------------------------------------------
# Styles
# ---------------------------------------------------------------------------
def build_styles():
    """Create all paragraph styles used in the document."""
    styles = getSampleStyleSheet()

    styles.add(ParagraphStyle(
        "CoverTitle", fontName="Helvetica-Bold", fontSize=32,
        leading=40, textColor=white, alignment=TA_CENTER,
        spaceAfter=12,
    ))
    styles.add(ParagraphStyle(
        "CoverSubtitle", fontName="Helvetica", fontSize=16,
        leading=22, textColor=HexColor("#bee3f8"), alignment=TA_CENTER,
        spaceAfter=6,
    ))
    styles.add(ParagraphStyle(
        "CoverDate", fontName="Helvetica-Oblique", fontSize=12,
        leading=16, textColor=HexColor("#90cdf4"), alignment=TA_CENTER,
        spaceBefore=20,
    ))
    styles.add(ParagraphStyle(
        "H1", fontName="Helvetica-Bold", fontSize=22,
        leading=28, textColor=FORD_BLUE, spaceBefore=28, spaceAfter=12,
        borderPadding=(0, 0, 4, 0),
    ))
    styles.add(ParagraphStyle(
        "H2", fontName="Helvetica-Bold", fontSize=16,
        leading=22, textColor=DARK_BLUE, spaceBefore=20, spaceAfter=8,
    ))
    styles.add(ParagraphStyle(
        "H3", fontName="Helvetica-Bold", fontSize=13,
        leading=18, textColor=MEDIUM_BLUE, spaceBefore=14, spaceAfter=6,
    ))
    styles.add(ParagraphStyle(
        "H4", fontName="Helvetica-Bold", fontSize=11,
        leading=15, textColor=HexColor("#4a5568"), spaceBefore=10, spaceAfter=4,
    ))
    styles.add(ParagraphStyle(
        "BodyText2", fontName="Helvetica", fontSize=9.5,
        leading=14, textColor=black, spaceAfter=6,
        alignment=TA_JUSTIFY,
    ))
    styles.add(ParagraphStyle(
        "BulletItem", fontName="Helvetica", fontSize=9.5,
        leading=14, textColor=black, spaceAfter=3,
        leftIndent=18, bulletIndent=6, bulletFontSize=9.5,
    ))
    styles.add(ParagraphStyle(
        "BulletItemL2", fontName="Helvetica", fontSize=9.5,
        leading=14, textColor=black, spaceAfter=3,
        leftIndent=36, bulletIndent=24, bulletFontSize=9.5,
    ))
    styles.add(ParagraphStyle(
        "NumberedItem", fontName="Helvetica", fontSize=9.5,
        leading=14, textColor=black, spaceAfter=3,
        leftIndent=18, bulletIndent=6,
    ))
    styles.add(ParagraphStyle(
        "CodeBlock", fontName="Courier", fontSize=8,
        leading=11, textColor=HexColor("#1a202c"),
        backColor=CODE_BG, borderColor=CODE_BORDER,
        borderWidth=0.5, borderPadding=8, borderRadius=3,
        spaceAfter=8, spaceBefore=4, leftIndent=4, rightIndent=4,
    ))
    styles.add(ParagraphStyle(
        "BlockquoteText", fontName="Helvetica-Oblique", fontSize=9.5,
        leading=14, textColor=HexColor("#744210"),
        leftIndent=14, spaceAfter=6,
        borderColor=BLOCKQUOTE_BORDER, borderWidth=2,
        borderPadding=(6, 8, 6, 10),
    ))
    styles.add(ParagraphStyle(
        "TableCell", fontName="Helvetica", fontSize=8.5,
        leading=12, textColor=black,
    ))
    styles.add(ParagraphStyle(
        "TableHeader", fontName="Helvetica-Bold", fontSize=8.5,
        leading=12, textColor=white,
    ))
    styles.add(ParagraphStyle(
        "TOCHeading", fontName="Helvetica-Bold", fontSize=20,
        leading=26, textColor=FORD_BLUE, spaceAfter=20,
        alignment=TA_CENTER,
    ))
    styles.add(ParagraphStyle(
        "Caption", fontName="Helvetica-Oblique", fontSize=8.5,
        leading=12, textColor=HexColor("#718096"), alignment=TA_CENTER,
        spaceAfter=10, spaceBefore=4,
    ))
    return styles


# ---------------------------------------------------------------------------
# Mermaid rendering
# ---------------------------------------------------------------------------
def render_mermaid_diagrams(md_files, tmp_dir):
    """Extract all Mermaid blocks from markdown files and render.

    Renders to PNG at high resolution (4x scale) since svglib cannot handle
    the <foreignObject> HTML text elements that Mermaid uses in flowcharts.

    Returns a dict mapping (file_index, block_index_within_file) -> png_path.
    """
    diagrams = {}
    for fi, fpath in enumerate(md_files):
        with open(fpath, "r", encoding="utf-8") as f:
            text = f.read()

        pattern = re.compile(r"```mermaid\s*\n(.*?)```", re.DOTALL)
        for bi, m in enumerate(pattern.finditer(text)):
            mmd_src = m.group(1).strip()
            mmd_file = os.path.join(tmp_dir, f"diagram_{fi}_{bi}.mmd")
            png_file = os.path.join(tmp_dir, f"diagram_{fi}_{bi}.png")
            with open(mmd_file, "w") as mf:
                mf.write(mmd_src)

            try:
                subprocess.run(
                    [MMDC_PATH, "-i", mmd_file, "-o", png_file,
                     "-b", "white", "-t", "neutral",
                     "--scale", "4"],
                    capture_output=True, text=True, timeout=60, check=True,
                )
                diagrams[(fi, bi)] = png_file
                print(f"  Rendered diagram {fi}:{bi} -> {os.path.basename(png_file)}")
            except Exception as exc:
                print(f"  WARNING: Failed to render diagram {fi}:{bi}: {exc}")
    return diagrams


# ---------------------------------------------------------------------------
# Inline formatting
# ---------------------------------------------------------------------------
def format_inline(text):
    """Convert markdown inline formatting to ReportLab XML tags."""
    # Escape XML entities first (but preserve existing tags we'll insert)
    text = text.replace("&", "&amp;")
    text = text.replace("<", "&lt;")
    text = text.replace(">", "&gt;")

    # Bold-italic ***text***
    text = re.sub(r"\*\*\*(.+?)\*\*\*", r"<b><i>\1</i></b>", text)
    # Bold **text**
    text = re.sub(r"\*\*(.+?)\*\*", r"<b>\1</b>", text)
    # Italic *text* (but not inside words like file_names)
    text = re.sub(r"(?<!\w)\*([^*]+?)\*(?!\w)", r"<i>\1</i>", text)
    # Inline code `text`
    text = re.sub(
        r"`([^`]+?)`",
        r'<font name="Courier" size="8" color="#c53030">\1</font>',
        text,
    )
    # Links [text](url) — just show the text in link colour
    text = re.sub(
        r"\[([^\]]+)\]\([^)]+\)",
        rf'<font color="{LINK_COLOR.hexval()}">\1</font>',
        text,
    )
    return text


def strip_markdown(text):
    """Remove markdown formatting, returning plain text."""
    text = re.sub(r"\*\*\*(.+?)\*\*\*", r"\1", text)
    text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
    text = re.sub(r"(?<!\w)\*([^*]+?)\*(?!\w)", r"\1", text)
    text = re.sub(r"`([^`]+?)`", r"\1", text)
    text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
    return text


# ---------------------------------------------------------------------------
# Markdown parser
# ---------------------------------------------------------------------------
def parse_markdown(text):
    """Parse markdown text into a list of element dicts."""
    lines = text.split("\n")
    elements = []
    i = 0
    n = len(lines)

    def _flush_paragraph(buf):
        joined = " ".join(buf).strip()
        if joined:
            elements.append({"type": "paragraph", "text": joined})

    para_buf = []

    while i < n:
        line = lines[i]
        stripped = line.strip()

        # Blank line → flush paragraph
        if not stripped:
            _flush_paragraph(para_buf)
            para_buf = []
            i += 1
            continue

        # Fenced code block
        if stripped.startswith("```"):
            _flush_paragraph(para_buf)
            para_buf = []
            lang = stripped[3:].strip()
            code_lines = []
            i += 1
            while i < n and not lines[i].strip().startswith("```"):
                code_lines.append(lines[i])
                i += 1
            i += 1  # skip closing ```
            if lang == "mermaid":
                elements.append({"type": "mermaid", "code": "\n".join(code_lines)})
            else:
                elements.append({"type": "code_block", "lang": lang, "code": "\n".join(code_lines)})
            continue

        # Headings
        heading_match = re.match(r"^(#{1,4})\s+(.+)$", stripped)
        if heading_match:
            _flush_paragraph(para_buf)
            para_buf = []
            level = len(heading_match.group(1))
            elements.append({"type": "heading", "level": level, "text": heading_match.group(2)})
            i += 1
            continue

        # Horizontal rule
        if re.match(r"^-{3,}$", stripped) or re.match(r"^\*{3,}$", stripped):
            _flush_paragraph(para_buf)
            para_buf = []
            elements.append({"type": "hr"})
            i += 1
            continue

        # Table (starts with |)
        if stripped.startswith("|") and "|" in stripped[1:]:
            _flush_paragraph(para_buf)
            para_buf = []
            table_lines = []
            while i < n and lines[i].strip().startswith("|"):
                table_lines.append(lines[i].strip())
                i += 1
            elements.append({"type": "table", "lines": table_lines})
            continue

        # Blockquote
        if stripped.startswith(">"):
            _flush_paragraph(para_buf)
            para_buf = []
            bq_lines = []
            while i < n and lines[i].strip().startswith(">"):
                bq_lines.append(lines[i].strip().lstrip("> ").strip())
                i += 1
            elements.append({"type": "blockquote", "text": " ".join(bq_lines)})
            continue

        # Bullet list
        bullet_match = re.match(r"^(\s*)([-*])\s+(.+)$", stripped)
        if bullet_match:
            _flush_paragraph(para_buf)
            para_buf = []
            items = []
            while i < n:
                bm = re.match(r"^(\s*)([-*])\s+(.+)$", lines[i].strip())
                if bm:
                    indent = len(lines[i]) - len(lines[i].lstrip())
                    items.append({"text": bm.group(3), "indent": indent})
                    i += 1
                elif lines[i].strip() == "":
                    # Check if next non-blank line is still a list item
                    peek = i + 1
                    while peek < n and lines[peek].strip() == "":
                        peek += 1
                    if peek < n and re.match(r"^\s*[-*]\s+", lines[peek]):
                        i += 1
                        continue
                    break
                else:
                    # Continuation of previous item
                    if items:
                        items[-1]["text"] += " " + lines[i].strip()
                    i += 1
                    continue
            elements.append({"type": "bullet_list", "items": items})
            continue

        # Ordered list
        ordered_match = re.match(r"^(\d+)\.\s+(.+)$", stripped)
        if ordered_match:
            _flush_paragraph(para_buf)
            para_buf = []
            items = []
            while i < n:
                om = re.match(r"^\s*(\d+)\.\s+(.+)$", lines[i].strip())
                if om:
                    items.append({"num": om.group(1), "text": om.group(2)})
                    i += 1
                elif lines[i].strip() == "":
                    peek = i + 1
                    while peek < n and lines[peek].strip() == "":
                        peek += 1
                    if peek < n and re.match(r"^\s*\d+\.\s+", lines[peek]):
                        i += 1
                        continue
                    break
                else:
                    if items:
                        items[-1]["text"] += " " + lines[i].strip()
                    i += 1
                    continue
            elements.append({"type": "ordered_list", "items": items})
            continue

        # Regular paragraph text
        para_buf.append(stripped)
        i += 1

    _flush_paragraph(para_buf)
    return elements


# ---------------------------------------------------------------------------
# Table builder
# ---------------------------------------------------------------------------
def build_table_flowable(table_element, styles):
    """Convert a parsed table element into a ReportLab Table."""
    raw_lines = table_element["lines"]
    # Filter out separator lines (|---|---|)
    data_lines = [l for l in raw_lines if not re.match(r"^\|[\s\-:|]+\|$", l)]
    if not data_lines:
        return None

    rows = []
    for line in data_lines:
        cells = [c.strip() for c in line.strip("|").split("|")]
        rows.append(cells)

    if not rows:
        return None

    # Build Paragraph cells
    header_style = styles["TableHeader"]
    cell_style = styles["TableCell"]

    table_data = []
    for ri, row in enumerate(rows):
        style = header_style if ri == 0 else cell_style
        table_data.append([Paragraph(format_inline(c), style) for c in row])

    num_cols = max(len(r) for r in table_data)
    # Pad short rows
    for r in table_data:
        while len(r) < num_cols:
            r.append(Paragraph("", cell_style))

    # Calculate column widths
    avail = CONTENT_W - 4
    col_w = avail / num_cols
    col_widths = [col_w] * num_cols

    tbl = Table(table_data, colWidths=col_widths, repeatRows=1)

    # Style
    style_cmds = [
        ("BACKGROUND", (0, 0), (-1, 0), HEADER_BG),
        ("TEXTCOLOR", (0, 0), (-1, 0), white),
        ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
        ("FONTSIZE", (0, 0), (-1, -1), 8.5),
        ("ALIGN", (0, 0), (-1, -1), "LEFT"),
        ("VALIGN", (0, 0), (-1, -1), "TOP"),
        ("TOPPADDING", (0, 0), (-1, -1), 5),
        ("BOTTOMPADDING", (0, 0), (-1, -1), 5),
        ("LEFTPADDING", (0, 0), (-1, -1), 6),
        ("RIGHTPADDING", (0, 0), (-1, -1), 6),
        ("GRID", (0, 0), (-1, -1), 0.5, HexColor("#e2e8f0")),
        ("LINEBELOW", (0, 0), (-1, 0), 1.5, FORD_BLUE),
    ]

    # Alternating row colours
    for ri in range(1, len(table_data)):
        if ri % 2 == 0:
            style_cmds.append(("BACKGROUND", (0, ri), (-1, ri), ROW_ALT))

    tbl.setStyle(TableStyle(style_cmds))
    return tbl


# ---------------------------------------------------------------------------
# SVG embedding
# ---------------------------------------------------------------------------
def embed_diagram(png_path, max_width=None):
    """Embed a PNG diagram image, scaled to fit the content width."""
    if max_width is None:
        max_width = CONTENT_W
    try:
        from PIL import Image as PILImage
        with PILImage.open(png_path) as img:
            img_w, img_h = img.size

        # The PNG is rendered at 4x scale, so its pixel dimensions are large.
        # Scale it down to fit the content width while maintaining aspect ratio.
        scale = min(max_width / img_w, 1.0)
        max_height = 420
        if img_h * scale > max_height:
            scale = max_height / img_h

        display_w = img_w * scale
        display_h = img_h * scale

        return RLImage(png_path, width=display_w, height=display_h)
    except Exception as e:
        print(f"  WARNING: Could not embed diagram {png_path}: {e}")
        return None


# ---------------------------------------------------------------------------
# Cover page & page templates
# ---------------------------------------------------------------------------
class CoverPage(Flowable):
    """Full-page cover with gradient background."""

    def __init__(self, width, height):
        super().__init__()
        self.width = width
        self.height = height

    def wrap(self, aW, aH):
        return (self.width, self.height)

    def draw(self):
        c = self.canv
        h = self.height
        w = self.width

        # Draw gradient background (using horizontal bands)
        steps = 60
        for i in range(steps):
            frac = i / steps
            r = 0.0 + frac * 0.05
            g = 0.2 * (1 - frac * 0.3)
            b = 0.47 * (1 - frac * 0.15)
            band_h = h / steps
            y = h - (i + 1) * band_h
            c.setFillColor(Color(r, g, b))
            c.rect(0, y, w, band_h + 1, fill=True, stroke=False)

        # Decorative line
        c.setStrokeColor(HexColor("#63b3ed"))
        c.setLineWidth(2)
        c.line(w * 0.2, h * 0.52, w * 0.8, h * 0.52)

        # Title
        c.setFillColor(white)
        c.setFont("Helvetica-Bold", 34)
        c.drawCentredString(w / 2, h * 0.65, "Ford BnP QC System")

        c.setFont("Helvetica", 18)
        c.setFillColor(HexColor("#bee3f8"))
        c.drawCentredString(w / 2, h * 0.59, "Technical Documentation")

        # Subtitle
        c.setFont("Helvetica", 13)
        c.setFillColor(HexColor("#90cdf4"))
        c.drawCentredString(w / 2, h * 0.46,
                            "Build & Price Asset Pack Quality Control System")

        # Date
        c.setFont("Helvetica-Oblique", 12)
        c.setFillColor(HexColor("#90cdf4"))
        date_str = datetime.now().strftime("%B %d, %Y")
        c.drawCentredString(w / 2, h * 0.40, f"Generated: {date_str}")

        # Bottom decorative line
        c.setStrokeColor(HexColor("#63b3ed"))
        c.setLineWidth(1)
        c.line(w * 0.3, h * 0.15, w * 0.7, h * 0.15)

        c.setFont("Helvetica", 10)
        c.setFillColor(HexColor("#a0aec0"))
        c.drawCentredString(w / 2, h * 0.12, "Confidential — For Internal Use Only")


class HorizontalRule(Flowable):
    """A thin horizontal line."""

    def __init__(self, width, color=RULE_COLOR, thickness=0.75):
        super().__init__()
        self.width = width
        self.color = color
        self.thickness = thickness

    def wrap(self, aW, aH):
        return (self.width, self.thickness + 6)

    def draw(self):
        self.canv.setStrokeColor(self.color)
        self.canv.setLineWidth(self.thickness)
        self.canv.line(0, 3, self.width, 3)


class HeadingRule(Flowable):
    """A coloured rule drawn under chapter headings."""

    def __init__(self, width, color=FORD_BLUE, thickness=1.5):
        super().__init__()
        self.width = width
        self.color = color
        self.thickness = thickness

    def wrap(self, aW, aH):
        return (self.width, self.thickness + 2)

    def draw(self):
        self.canv.setStrokeColor(self.color)
        self.canv.setLineWidth(self.thickness)
        self.canv.line(0, 1, self.width * 0.35, 1)


# ---------------------------------------------------------------------------
# Heading anchor — a flowable that records its page number for TOC
# ---------------------------------------------------------------------------
class HeadingAnchor(Flowable):
    """Invisible flowable that bookmarks its position and updates chapter title."""

    def __init__(self, key, level, text, collector):
        super().__init__()
        self.key = key
        self.level = level
        self.text = text
        self.collector = collector

    def wrap(self, aW, aH):
        return (0, 0)

    def draw(self):
        global _chapter_title
        self.canv.bookmarkPage(self.key)
        self.collector.append((self.level, self.text, self.canv.getPageNumber(), self.key))
        if self.level == 0:
            _chapter_title = self.text


# ---------------------------------------------------------------------------
# Two-pass page handler
# ---------------------------------------------------------------------------
_chapter_title = ""


def content_page_handler(canvas, doc):
    global _chapter_title
    canvas.saveState()
    # Header line
    canvas.setStrokeColor(FORD_BLUE)
    canvas.setLineWidth(0.75)
    canvas.line(MARGIN_LEFT, PAGE_H - 35, PAGE_W - MARGIN_RIGHT, PAGE_H - 35)
    # Header text
    canvas.setFont("Helvetica", 7.5)
    canvas.setFillColor(HexColor("#718096"))
    canvas.drawString(MARGIN_LEFT, PAGE_H - 30,
                      "Ford BnP QC System \u2014 Technical Documentation")
    if _chapter_title:
        canvas.drawRightString(PAGE_W - MARGIN_RIGHT, PAGE_H - 30,
                               _chapter_title)
    # Footer
    canvas.setStrokeColor(RULE_COLOR)
    canvas.setLineWidth(0.5)
    canvas.line(MARGIN_LEFT, MARGIN_BOTTOM - 10,
                PAGE_W - MARGIN_RIGHT, MARGIN_BOTTOM - 10)
    canvas.setFont("Helvetica", 8)
    canvas.setFillColor(HexColor("#a0aec0"))
    canvas.drawCentredString(PAGE_W / 2, MARGIN_BOTTOM - 22,
                             f"Page {doc.page}")
    canvas.drawRightString(PAGE_W - MARGIN_RIGHT, MARGIN_BOTTOM - 22,
                           datetime.now().strftime("%Y-%m-%d"))
    canvas.restoreState()


def cover_page_handler(canvas, doc):
    pass


# ---------------------------------------------------------------------------
# Build content story (without TOC — used in both passes)
# ---------------------------------------------------------------------------
def build_content_story(md_files, diagrams, styles, toc_collector, chapter_titles):
    """Build the content flowables from markdown files."""
    global _chapter_title
    _chapter_title = ""
    story = []
    anchor_idx = 0

    for fi, fpath in enumerate(md_files):
        with open(fpath, "r", encoding="utf-8") as f:
            text = f.read()

        elements = parse_markdown(text)
        mermaid_idx = 0

        for el in elements:
            etype = el["type"]

            if etype == "heading":
                level = el["level"]
                htxt = format_inline(el["text"])
                plain = strip_markdown(el["text"])
                style_name = f"H{min(level, 4)}"

                # Add bookmark anchor for TOC (levels 1-3)
                if level <= 3:
                    key = f"heading_{anchor_idx}"
                    anchor_idx += 1
                    story.append(HeadingAnchor(key, level - 1, plain, toc_collector))

                story.append(Paragraph(htxt, styles[style_name]))
                if level == 1:
                    story.append(HeadingRule(CONTENT_W))

            elif etype == "paragraph":
                story.append(Paragraph(format_inline(el["text"]), styles["BodyText2"]))

            elif etype == "bullet_list":
                for item in el["items"]:
                    indent = item.get("indent", 0)
                    sn = "BulletItemL2" if indent >= 4 else "BulletItem"
                    story.append(
                        Paragraph(
                            f"\u2022  {format_inline(item['text'])}",
                            styles[sn],
                        )
                    )

            elif etype == "ordered_list":
                for item in el["items"]:
                    story.append(
                        Paragraph(
                            f"{item['num']}.  {format_inline(item['text'])}",
                            styles["NumberedItem"],
                        )
                    )

            elif etype == "code_block":
                code_text = el["code"]
                # Replace Unicode box-drawing characters with ASCII equivalents
                # (Courier font lacks these glyphs)
                code_text = code_text.replace("\u2514", "`")   # └
                code_text = code_text.replace("\u251c", "|")   # ├
                code_text = code_text.replace("\u2502", "|")   # │
                code_text = code_text.replace("\u2500", "-")   # ─
                code_text = code_text.replace("\u2510", "+")   # ┐
                code_text = code_text.replace("\u250c", "+")   # ┌
                code_text = code_text.replace("\u2518", "+")   # ┘
                code_text = code_text.replace("\u253c", "+")   # ┼
                # Escape XML
                code_text = code_text.replace("&", "&amp;")
                code_text = code_text.replace("<", "&lt;")
                code_text = code_text.replace(">", "&gt;")
                code_text = code_text.replace("\n", "<br/>")
                code_text = code_text.replace("  ", " &nbsp;")
                code_text = code_text.replace("\t", " &nbsp;&nbsp;&nbsp;")
                story.append(Paragraph(code_text, styles["CodeBlock"]))

            elif etype == "mermaid":
                dia_key = (fi, mermaid_idx)
                mermaid_idx += 1
                dia_path = diagrams.get(dia_key)
                if dia_path:
                    img = embed_diagram(dia_path)
                    if img:
                        story.append(Spacer(1, 6))
                        story.append(img)
                        story.append(Spacer(1, 6))
                    else:
                        story.append(Paragraph(
                            "<i>[Diagram could not be embedded]</i>",
                            styles["Caption"],
                        ))
                else:
                    story.append(Paragraph(
                        "<i>[Diagram not rendered]</i>",
                        styles["Caption"],
                    ))

            elif etype == "table":
                tbl = build_table_flowable(el, styles)
                if tbl:
                    story.append(Spacer(1, 4))
                    story.append(tbl)
                    story.append(Spacer(1, 4))

            elif etype == "blockquote":
                story.append(
                    Paragraph(format_inline(el["text"]), styles["BlockquoteText"])
                )

            elif etype == "hr":
                story.append(Spacer(1, 4))
                story.append(HorizontalRule(CONTENT_W))
                story.append(Spacer(1, 4))

        # Page break between chapters (except after last).
        # Set the NEXT chapter's title BEFORE the PageBreak so the header
        # on the first page of the next chapter is correct.
        if fi < len(md_files) - 1:
            story.append(ChapterTitleSetter(chapter_titles[fi + 1]))
            story.append(PageBreak())

    return story


class ChapterTitleSetter(Flowable):
    """Invisible flowable that sets the chapter title for headers."""
    def __init__(self, title):
        super().__init__()
        self.title = title
    def wrap(self, aW, aH):
        return (0, 0)
    def draw(self):
        global _chapter_title
        _chapter_title = self.title


def build_toc_flowables(toc_entries, styles, toc_page_offset):
    """Build a manual TOC from collected entries."""
    flowables = []
    flowables.append(ChapterTitleSetter("Table of Contents"))
    flowables.append(Paragraph("Table of Contents", styles["TOCHeading"]))
    flowables.append(Spacer(1, 12))

    toc1 = ParagraphStyle(
        "TOCEntry1", fontName="Helvetica-Bold", fontSize=12,
        leading=22, leftIndent=0, textColor=DARK_BLUE, spaceBefore=10,
    )
    toc2 = ParagraphStyle(
        "TOCEntry2", fontName="Helvetica", fontSize=10,
        leading=18, leftIndent=24, textColor=HexColor("#4a5568"), spaceBefore=3,
    )
    toc3 = ParagraphStyle(
        "TOCEntry3", fontName="Helvetica", fontSize=9,
        leading=15, leftIndent=48, textColor=HexColor("#718096"), spaceBefore=2,
    )
    level_styles = [toc1, toc2, toc3]

    for level, text, page_num, key in toc_entries:
        # Adjust page number: pass 1 pages include cover (1) but not TOC pages
        adjusted_page = page_num + toc_page_offset
        style = level_styles[min(level, 2)]
        # Dots leader + page number
        dots = " " + "\u00b7 " * 3
        entry_text = (
            f'<a href="#{key}">{text}</a>'
            f'<font color="#a0aec0">{dots}</font>'
            f'<font color="#718096">{adjusted_page}</font>'
        )
        flowables.append(Paragraph(entry_text, style))

    # NOTE: The caller should append a ChapterTitleSetter for the first chapter
    # BEFORE this PageBreak so the header is correct on the first content page.
    flowables.append(PageBreak())
    return flowables


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
    print("Ford BnP QC Documentation \u2014 PDF Generator")
    print("=" * 50)

    if not DOC_FILES:
        print("ERROR: No markdown files found in documentation/")
        sys.exit(1)

    print(f"\nFound {len(DOC_FILES)} documentation files:")
    for f in DOC_FILES:
        print(f"  {os.path.basename(f)}")

    with tempfile.TemporaryDirectory() as tmp_dir:
        # Render Mermaid diagrams
        print("\nRendering Mermaid diagrams...")
        diagrams = render_mermaid_diagrams(DOC_FILES, tmp_dir)
        print(f"  {len(diagrams)} diagrams rendered successfully")

        styles = build_styles()

        # Pre-scan all files for H1 titles (for page header tracking)
        chapter_titles_for_headers = []
        for fpath in DOC_FILES:
            with open(fpath, "r", encoding="utf-8") as f:
                ftxt = f.read()
            title = ""
            for line in ftxt.split("\n"):
                m = re.match(r"^#\s+(.+)$", line.strip())
                if m:
                    title = strip_markdown(m.group(1))
                    break
            chapter_titles_for_headers.append(title)

        # ---- Pass 1: build without TOC to collect heading page numbers ----
        print("\nPass 1: Collecting heading positions...")
        toc_collector = []
        pass1_story = []
        # Cover page
        pass1_story.append(CoverPage(PAGE_W, PAGE_H - MARGIN_TOP - MARGIN_BOTTOM + 30))
        pass1_story.append(NextPageTemplate("content"))
        pass1_story.append(PageBreak())
        # Content (no TOC pages)
        pass1_story.extend(build_content_story(
            DOC_FILES, diagrams, styles, toc_collector, chapter_titles_for_headers
        ))

        # Build to a throwaway file to collect page numbers
        pass1_path = os.path.join(tmp_dir, "pass1.pdf")
        content_frame = Frame(
            MARGIN_LEFT, MARGIN_BOTTOM,
            CONTENT_W, PAGE_H - MARGIN_TOP - MARGIN_BOTTOM,
            id="content",
        )
        cover_frame = Frame(0, 0, PAGE_W, PAGE_H, id="cover")

        doc1 = BaseDocTemplate(pass1_path, pagesize=A4)
        doc1.addPageTemplates([
            PageTemplate(id="cover", frames=[cover_frame],
                         onPage=cover_page_handler),
            PageTemplate(id="content", frames=[content_frame],
                         onPage=content_page_handler),
        ])
        doc1.build(pass1_story)
        print(f"  Collected {len(toc_collector)} headings across {doc1.page} pages")

        # ---- Estimate TOC pages so we can offset page numbers ----
        # Build the TOC flowables to measure how many pages they need
        toc_test = build_toc_flowables(toc_collector, styles, 0)
        # Rough estimate: measure total height of TOC entries
        toc_height = 12 + 22  # title + spacer
        for level, text, page_num, key in toc_collector:
            if level == 0:
                toc_height += 32  # TOC1 entry
            elif level == 1:
                toc_height += 21  # TOC2 entry
            else:
                toc_height += 17  # TOC3 entry
        frame_height = PAGE_H - MARGIN_TOP - MARGIN_BOTTOM
        toc_pages = max(1, int(toc_height / frame_height) + 1)
        print(f"  Estimated TOC pages: {toc_pages}")

        # ---- Pass 2: build final PDF with TOC ----
        print("\nPass 2: Building final PDF with Table of Contents...")
        global _chapter_title
        _chapter_title = ""
        toc_collector2 = []

        final_story = []
        # Cover page
        final_story.append(CoverPage(PAGE_W, PAGE_H - MARGIN_TOP - MARGIN_BOTTOM + 30))
        final_story.append(NextPageTemplate("content"))
        final_story.append(PageBreak())
        # TOC (with adjusted page numbers)
        toc_flowables = build_toc_flowables(toc_collector, styles, toc_pages)
        # Insert first chapter's title setter before the TOC's final PageBreak
        # so the header is correct when the first content page renders.
        toc_flowables.insert(-1, ChapterTitleSetter(chapter_titles_for_headers[0]))
        final_story.extend(toc_flowables)
        # Content
        final_story.extend(build_content_story(
            DOC_FILES, diagrams, styles, toc_collector2, chapter_titles_for_headers
        ))

        content_frame2 = Frame(
            MARGIN_LEFT, MARGIN_BOTTOM,
            CONTENT_W, PAGE_H - MARGIN_TOP - MARGIN_BOTTOM,
            id="content",
        )
        cover_frame2 = Frame(0, 0, PAGE_W, PAGE_H, id="cover")

        doc2 = BaseDocTemplate(
            OUTPUT_PDF, pagesize=A4,
            title="Ford BnP QC System \u2014 Technical Documentation",
            author="Ford BnP QC Team",
            subject="Quality Control System Documentation",
        )
        doc2.addPageTemplates([
            PageTemplate(id="cover", frames=[cover_frame2],
                         onPage=cover_page_handler),
            PageTemplate(id="content", frames=[content_frame2],
                         onPage=content_page_handler),
        ])
        doc2.build(final_story)

    print(f"\nPDF generated: {OUTPUT_PDF}")
    print(f"File size: {os.path.getsize(OUTPUT_PDF) / 1024:.1f} KB")


if __name__ == "__main__":
    main()