ford_qc/generate_pdf.py
2026-02-23 10:49:49 -06:00

964 lines
34 KiB
Python

#!/usr/bin/env python3
"""
Generate a professionally formatted PDF from the documentation markdown files.
Usage:
python generate_pdf.py
Output:
documentation/Ford_BnP_QC_Documentation.pdf
"""
import os
import re
import sys
import glob
import tempfile
import subprocess
from datetime import datetime
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.colors import HexColor, black, white, Color
from reportlab.lib.units import mm, cm, inch
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT, TA_JUSTIFY
from reportlab.platypus import (
SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
PageBreak, KeepTogether, Flowable, NextPageTemplate, PageTemplate,
BaseDocTemplate, Frame, Image as RLImage
)
from reportlab.platypus.tableofcontents import TableOfContents
from reportlab.graphics import renderPDF
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from PIL import Image as PILImage
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DOCS_DIR = os.path.join(SCRIPT_DIR, "documentation")
OUTPUT_PDF = os.path.join(DOCS_DIR, "Ford_BnP_QC_Documentation.pdf")
MMDC_PATH = "/opt/homebrew/bin/mmdc"
PAGE_W, PAGE_H = A4
MARGIN_LEFT = 55
MARGIN_RIGHT = 55
MARGIN_TOP = 50
MARGIN_BOTTOM = 50
CONTENT_W = PAGE_W - MARGIN_LEFT - MARGIN_RIGHT
# Colours
FORD_BLUE = HexColor("#003478")
DARK_BLUE = HexColor("#1a365d")
MEDIUM_BLUE = HexColor("#2b6cb0")
LIGHT_BLUE = HexColor("#ebf4ff")
HEADER_BG = HexColor("#2d3748")
ROW_ALT = HexColor("#f7fafc")
CODE_BG = HexColor("#f0f0f0")
CODE_BORDER = HexColor("#d0d0d0")
BLOCKQUOTE_BG = HexColor("#fffbeb")
BLOCKQUOTE_BORDER = HexColor("#f59e0b")
RULE_COLOR = HexColor("#cbd5e0")
LINK_COLOR = HexColor("#2b6cb0")
DOC_FILES = sorted(glob.glob(os.path.join(DOCS_DIR, "*.md")))
# ---------------------------------------------------------------------------
# Styles
# ---------------------------------------------------------------------------
def build_styles():
"""Create all paragraph styles used in the document."""
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(
"CoverTitle", fontName="Helvetica-Bold", fontSize=32,
leading=40, textColor=white, alignment=TA_CENTER,
spaceAfter=12,
))
styles.add(ParagraphStyle(
"CoverSubtitle", fontName="Helvetica", fontSize=16,
leading=22, textColor=HexColor("#bee3f8"), alignment=TA_CENTER,
spaceAfter=6,
))
styles.add(ParagraphStyle(
"CoverDate", fontName="Helvetica-Oblique", fontSize=12,
leading=16, textColor=HexColor("#90cdf4"), alignment=TA_CENTER,
spaceBefore=20,
))
styles.add(ParagraphStyle(
"H1", fontName="Helvetica-Bold", fontSize=22,
leading=28, textColor=FORD_BLUE, spaceBefore=28, spaceAfter=12,
borderPadding=(0, 0, 4, 0),
))
styles.add(ParagraphStyle(
"H2", fontName="Helvetica-Bold", fontSize=16,
leading=22, textColor=DARK_BLUE, spaceBefore=20, spaceAfter=8,
))
styles.add(ParagraphStyle(
"H3", fontName="Helvetica-Bold", fontSize=13,
leading=18, textColor=MEDIUM_BLUE, spaceBefore=14, spaceAfter=6,
))
styles.add(ParagraphStyle(
"H4", fontName="Helvetica-Bold", fontSize=11,
leading=15, textColor=HexColor("#4a5568"), spaceBefore=10, spaceAfter=4,
))
styles.add(ParagraphStyle(
"BodyText2", fontName="Helvetica", fontSize=9.5,
leading=14, textColor=black, spaceAfter=6,
alignment=TA_JUSTIFY,
))
styles.add(ParagraphStyle(
"BulletItem", fontName="Helvetica", fontSize=9.5,
leading=14, textColor=black, spaceAfter=3,
leftIndent=18, bulletIndent=6, bulletFontSize=9.5,
))
styles.add(ParagraphStyle(
"BulletItemL2", fontName="Helvetica", fontSize=9.5,
leading=14, textColor=black, spaceAfter=3,
leftIndent=36, bulletIndent=24, bulletFontSize=9.5,
))
styles.add(ParagraphStyle(
"NumberedItem", fontName="Helvetica", fontSize=9.5,
leading=14, textColor=black, spaceAfter=3,
leftIndent=18, bulletIndent=6,
))
styles.add(ParagraphStyle(
"CodeBlock", fontName="Courier", fontSize=8,
leading=11, textColor=HexColor("#1a202c"),
backColor=CODE_BG, borderColor=CODE_BORDER,
borderWidth=0.5, borderPadding=8, borderRadius=3,
spaceAfter=8, spaceBefore=4, leftIndent=4, rightIndent=4,
))
styles.add(ParagraphStyle(
"BlockquoteText", fontName="Helvetica-Oblique", fontSize=9.5,
leading=14, textColor=HexColor("#744210"),
leftIndent=14, spaceAfter=6,
borderColor=BLOCKQUOTE_BORDER, borderWidth=2,
borderPadding=(6, 8, 6, 10),
))
styles.add(ParagraphStyle(
"TableCell", fontName="Helvetica", fontSize=8.5,
leading=12, textColor=black,
))
styles.add(ParagraphStyle(
"TableHeader", fontName="Helvetica-Bold", fontSize=8.5,
leading=12, textColor=white,
))
styles.add(ParagraphStyle(
"TOCHeading", fontName="Helvetica-Bold", fontSize=20,
leading=26, textColor=FORD_BLUE, spaceAfter=20,
alignment=TA_CENTER,
))
styles.add(ParagraphStyle(
"Caption", fontName="Helvetica-Oblique", fontSize=8.5,
leading=12, textColor=HexColor("#718096"), alignment=TA_CENTER,
spaceAfter=10, spaceBefore=4,
))
return styles
# ---------------------------------------------------------------------------
# Mermaid rendering
# ---------------------------------------------------------------------------
def render_mermaid_diagrams(md_files, tmp_dir):
"""Extract all Mermaid blocks from markdown files and render.
Renders to PNG at high resolution (4x scale) since svglib cannot handle
the <foreignObject> HTML text elements that Mermaid uses in flowcharts.
Returns a dict mapping (file_index, block_index_within_file) -> png_path.
"""
diagrams = {}
for fi, fpath in enumerate(md_files):
with open(fpath, "r", encoding="utf-8") as f:
text = f.read()
pattern = re.compile(r"```mermaid\s*\n(.*?)```", re.DOTALL)
for bi, m in enumerate(pattern.finditer(text)):
mmd_src = m.group(1).strip()
mmd_file = os.path.join(tmp_dir, f"diagram_{fi}_{bi}.mmd")
png_file = os.path.join(tmp_dir, f"diagram_{fi}_{bi}.png")
with open(mmd_file, "w") as mf:
mf.write(mmd_src)
try:
subprocess.run(
[MMDC_PATH, "-i", mmd_file, "-o", png_file,
"-b", "white", "-t", "neutral",
"--scale", "4"],
capture_output=True, text=True, timeout=60, check=True,
)
diagrams[(fi, bi)] = png_file
print(f" Rendered diagram {fi}:{bi} -> {os.path.basename(png_file)}")
except Exception as exc:
print(f" WARNING: Failed to render diagram {fi}:{bi}: {exc}")
return diagrams
# ---------------------------------------------------------------------------
# Inline formatting
# ---------------------------------------------------------------------------
def format_inline(text):
"""Convert markdown inline formatting to ReportLab XML tags."""
# Escape XML entities first (but preserve existing tags we'll insert)
text = text.replace("&", "&amp;")
text = text.replace("<", "&lt;")
text = text.replace(">", "&gt;")
# Bold-italic ***text***
text = re.sub(r"\*\*\*(.+?)\*\*\*", r"<b><i>\1</i></b>", text)
# Bold **text**
text = re.sub(r"\*\*(.+?)\*\*", r"<b>\1</b>", text)
# Italic *text* (but not inside words like file_names)
text = re.sub(r"(?<!\w)\*([^*]+?)\*(?!\w)", r"<i>\1</i>", text)
# Inline code `text`
text = re.sub(
r"`([^`]+?)`",
r'<font name="Courier" size="8" color="#c53030">\1</font>',
text,
)
# Links [text](url) — just show the text in link colour
text = re.sub(
r"\[([^\]]+)\]\([^)]+\)",
rf'<font color="{LINK_COLOR.hexval()}">\1</font>',
text,
)
return text
def strip_markdown(text):
"""Remove markdown formatting, returning plain text."""
text = re.sub(r"\*\*\*(.+?)\*\*\*", r"\1", text)
text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
text = re.sub(r"(?<!\w)\*([^*]+?)\*(?!\w)", r"\1", text)
text = re.sub(r"`([^`]+?)`", r"\1", text)
text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
return text
# ---------------------------------------------------------------------------
# Markdown parser
# ---------------------------------------------------------------------------
def parse_markdown(text):
"""Parse markdown text into a list of element dicts."""
lines = text.split("\n")
elements = []
i = 0
n = len(lines)
def _flush_paragraph(buf):
joined = " ".join(buf).strip()
if joined:
elements.append({"type": "paragraph", "text": joined})
para_buf = []
while i < n:
line = lines[i]
stripped = line.strip()
# Blank line → flush paragraph
if not stripped:
_flush_paragraph(para_buf)
para_buf = []
i += 1
continue
# Fenced code block
if stripped.startswith("```"):
_flush_paragraph(para_buf)
para_buf = []
lang = stripped[3:].strip()
code_lines = []
i += 1
while i < n and not lines[i].strip().startswith("```"):
code_lines.append(lines[i])
i += 1
i += 1 # skip closing ```
if lang == "mermaid":
elements.append({"type": "mermaid", "code": "\n".join(code_lines)})
else:
elements.append({"type": "code_block", "lang": lang, "code": "\n".join(code_lines)})
continue
# Headings
heading_match = re.match(r"^(#{1,4})\s+(.+)$", stripped)
if heading_match:
_flush_paragraph(para_buf)
para_buf = []
level = len(heading_match.group(1))
elements.append({"type": "heading", "level": level, "text": heading_match.group(2)})
i += 1
continue
# Horizontal rule
if re.match(r"^-{3,}$", stripped) or re.match(r"^\*{3,}$", stripped):
_flush_paragraph(para_buf)
para_buf = []
elements.append({"type": "hr"})
i += 1
continue
# Table (starts with |)
if stripped.startswith("|") and "|" in stripped[1:]:
_flush_paragraph(para_buf)
para_buf = []
table_lines = []
while i < n and lines[i].strip().startswith("|"):
table_lines.append(lines[i].strip())
i += 1
elements.append({"type": "table", "lines": table_lines})
continue
# Blockquote
if stripped.startswith(">"):
_flush_paragraph(para_buf)
para_buf = []
bq_lines = []
while i < n and lines[i].strip().startswith(">"):
bq_lines.append(lines[i].strip().lstrip("> ").strip())
i += 1
elements.append({"type": "blockquote", "text": " ".join(bq_lines)})
continue
# Bullet list
bullet_match = re.match(r"^(\s*)([-*])\s+(.+)$", stripped)
if bullet_match:
_flush_paragraph(para_buf)
para_buf = []
items = []
while i < n:
bm = re.match(r"^(\s*)([-*])\s+(.+)$", lines[i].strip())
if bm:
indent = len(lines[i]) - len(lines[i].lstrip())
items.append({"text": bm.group(3), "indent": indent})
i += 1
elif lines[i].strip() == "":
# Check if next non-blank line is still a list item
peek = i + 1
while peek < n and lines[peek].strip() == "":
peek += 1
if peek < n and re.match(r"^\s*[-*]\s+", lines[peek]):
i += 1
continue
break
else:
# Continuation of previous item
if items:
items[-1]["text"] += " " + lines[i].strip()
i += 1
continue
elements.append({"type": "bullet_list", "items": items})
continue
# Ordered list
ordered_match = re.match(r"^(\d+)\.\s+(.+)$", stripped)
if ordered_match:
_flush_paragraph(para_buf)
para_buf = []
items = []
while i < n:
om = re.match(r"^\s*(\d+)\.\s+(.+)$", lines[i].strip())
if om:
items.append({"num": om.group(1), "text": om.group(2)})
i += 1
elif lines[i].strip() == "":
peek = i + 1
while peek < n and lines[peek].strip() == "":
peek += 1
if peek < n and re.match(r"^\s*\d+\.\s+", lines[peek]):
i += 1
continue
break
else:
if items:
items[-1]["text"] += " " + lines[i].strip()
i += 1
continue
elements.append({"type": "ordered_list", "items": items})
continue
# Regular paragraph text
para_buf.append(stripped)
i += 1
_flush_paragraph(para_buf)
return elements
# ---------------------------------------------------------------------------
# Table builder
# ---------------------------------------------------------------------------
def build_table_flowable(table_element, styles):
"""Convert a parsed table element into a ReportLab Table."""
raw_lines = table_element["lines"]
# Filter out separator lines (|---|---|)
data_lines = [l for l in raw_lines if not re.match(r"^\|[\s\-:|]+\|$", l)]
if not data_lines:
return None
rows = []
for line in data_lines:
cells = [c.strip() for c in line.strip("|").split("|")]
rows.append(cells)
if not rows:
return None
# Build Paragraph cells
header_style = styles["TableHeader"]
cell_style = styles["TableCell"]
table_data = []
for ri, row in enumerate(rows):
style = header_style if ri == 0 else cell_style
table_data.append([Paragraph(format_inline(c), style) for c in row])
num_cols = max(len(r) for r in table_data)
# Pad short rows
for r in table_data:
while len(r) < num_cols:
r.append(Paragraph("", cell_style))
# Calculate column widths
avail = CONTENT_W - 4
col_w = avail / num_cols
col_widths = [col_w] * num_cols
tbl = Table(table_data, colWidths=col_widths, repeatRows=1)
# Style
style_cmds = [
("BACKGROUND", (0, 0), (-1, 0), HEADER_BG),
("TEXTCOLOR", (0, 0), (-1, 0), white),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, -1), 8.5),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
("VALIGN", (0, 0), (-1, -1), "TOP"),
("TOPPADDING", (0, 0), (-1, -1), 5),
("BOTTOMPADDING", (0, 0), (-1, -1), 5),
("LEFTPADDING", (0, 0), (-1, -1), 6),
("RIGHTPADDING", (0, 0), (-1, -1), 6),
("GRID", (0, 0), (-1, -1), 0.5, HexColor("#e2e8f0")),
("LINEBELOW", (0, 0), (-1, 0), 1.5, FORD_BLUE),
]
# Alternating row colours
for ri in range(1, len(table_data)):
if ri % 2 == 0:
style_cmds.append(("BACKGROUND", (0, ri), (-1, ri), ROW_ALT))
tbl.setStyle(TableStyle(style_cmds))
return tbl
# ---------------------------------------------------------------------------
# SVG embedding
# ---------------------------------------------------------------------------
def embed_diagram(png_path, max_width=None):
"""Embed a PNG diagram image, scaled to fit the content width."""
if max_width is None:
max_width = CONTENT_W
try:
from PIL import Image as PILImage
with PILImage.open(png_path) as img:
img_w, img_h = img.size
# The PNG is rendered at 4x scale, so its pixel dimensions are large.
# Scale it down to fit the content width while maintaining aspect ratio.
scale = min(max_width / img_w, 1.0)
max_height = 420
if img_h * scale > max_height:
scale = max_height / img_h
display_w = img_w * scale
display_h = img_h * scale
return RLImage(png_path, width=display_w, height=display_h)
except Exception as e:
print(f" WARNING: Could not embed diagram {png_path}: {e}")
return None
# ---------------------------------------------------------------------------
# Cover page & page templates
# ---------------------------------------------------------------------------
class CoverPage(Flowable):
"""Full-page cover with gradient background."""
def __init__(self, width, height):
super().__init__()
self.width = width
self.height = height
def wrap(self, aW, aH):
return (self.width, self.height)
def draw(self):
c = self.canv
h = self.height
w = self.width
# Draw gradient background (using horizontal bands)
steps = 60
for i in range(steps):
frac = i / steps
r = 0.0 + frac * 0.05
g = 0.2 * (1 - frac * 0.3)
b = 0.47 * (1 - frac * 0.15)
band_h = h / steps
y = h - (i + 1) * band_h
c.setFillColor(Color(r, g, b))
c.rect(0, y, w, band_h + 1, fill=True, stroke=False)
# Decorative line
c.setStrokeColor(HexColor("#63b3ed"))
c.setLineWidth(2)
c.line(w * 0.2, h * 0.52, w * 0.8, h * 0.52)
# Title
c.setFillColor(white)
c.setFont("Helvetica-Bold", 34)
c.drawCentredString(w / 2, h * 0.65, "Ford BnP QC System")
c.setFont("Helvetica", 18)
c.setFillColor(HexColor("#bee3f8"))
c.drawCentredString(w / 2, h * 0.59, "Technical Documentation")
# Subtitle
c.setFont("Helvetica", 13)
c.setFillColor(HexColor("#90cdf4"))
c.drawCentredString(w / 2, h * 0.46,
"Build & Price Asset Pack Quality Control System")
# Date
c.setFont("Helvetica-Oblique", 12)
c.setFillColor(HexColor("#90cdf4"))
date_str = datetime.now().strftime("%B %d, %Y")
c.drawCentredString(w / 2, h * 0.40, f"Generated: {date_str}")
# Bottom decorative line
c.setStrokeColor(HexColor("#63b3ed"))
c.setLineWidth(1)
c.line(w * 0.3, h * 0.15, w * 0.7, h * 0.15)
c.setFont("Helvetica", 10)
c.setFillColor(HexColor("#a0aec0"))
c.drawCentredString(w / 2, h * 0.12, "Confidential — For Internal Use Only")
class HorizontalRule(Flowable):
"""A thin horizontal line."""
def __init__(self, width, color=RULE_COLOR, thickness=0.75):
super().__init__()
self.width = width
self.color = color
self.thickness = thickness
def wrap(self, aW, aH):
return (self.width, self.thickness + 6)
def draw(self):
self.canv.setStrokeColor(self.color)
self.canv.setLineWidth(self.thickness)
self.canv.line(0, 3, self.width, 3)
class HeadingRule(Flowable):
"""A coloured rule drawn under chapter headings."""
def __init__(self, width, color=FORD_BLUE, thickness=1.5):
super().__init__()
self.width = width
self.color = color
self.thickness = thickness
def wrap(self, aW, aH):
return (self.width, self.thickness + 2)
def draw(self):
self.canv.setStrokeColor(self.color)
self.canv.setLineWidth(self.thickness)
self.canv.line(0, 1, self.width * 0.35, 1)
# ---------------------------------------------------------------------------
# Heading anchor — a flowable that records its page number for TOC
# ---------------------------------------------------------------------------
class HeadingAnchor(Flowable):
"""Invisible flowable that bookmarks its position and updates chapter title."""
def __init__(self, key, level, text, collector):
super().__init__()
self.key = key
self.level = level
self.text = text
self.collector = collector
def wrap(self, aW, aH):
return (0, 0)
def draw(self):
global _chapter_title
self.canv.bookmarkPage(self.key)
self.collector.append((self.level, self.text, self.canv.getPageNumber(), self.key))
if self.level == 0:
_chapter_title = self.text
# ---------------------------------------------------------------------------
# Two-pass page handler
# ---------------------------------------------------------------------------
_chapter_title = ""
def content_page_handler(canvas, doc):
global _chapter_title
canvas.saveState()
# Header line
canvas.setStrokeColor(FORD_BLUE)
canvas.setLineWidth(0.75)
canvas.line(MARGIN_LEFT, PAGE_H - 35, PAGE_W - MARGIN_RIGHT, PAGE_H - 35)
# Header text
canvas.setFont("Helvetica", 7.5)
canvas.setFillColor(HexColor("#718096"))
canvas.drawString(MARGIN_LEFT, PAGE_H - 30,
"Ford BnP QC System \u2014 Technical Documentation")
if _chapter_title:
canvas.drawRightString(PAGE_W - MARGIN_RIGHT, PAGE_H - 30,
_chapter_title)
# Footer
canvas.setStrokeColor(RULE_COLOR)
canvas.setLineWidth(0.5)
canvas.line(MARGIN_LEFT, MARGIN_BOTTOM - 10,
PAGE_W - MARGIN_RIGHT, MARGIN_BOTTOM - 10)
canvas.setFont("Helvetica", 8)
canvas.setFillColor(HexColor("#a0aec0"))
canvas.drawCentredString(PAGE_W / 2, MARGIN_BOTTOM - 22,
f"Page {doc.page}")
canvas.drawRightString(PAGE_W - MARGIN_RIGHT, MARGIN_BOTTOM - 22,
datetime.now().strftime("%Y-%m-%d"))
canvas.restoreState()
def cover_page_handler(canvas, doc):
pass
# ---------------------------------------------------------------------------
# Build content story (without TOC — used in both passes)
# ---------------------------------------------------------------------------
def build_content_story(md_files, diagrams, styles, toc_collector, chapter_titles):
"""Build the content flowables from markdown files."""
global _chapter_title
_chapter_title = ""
story = []
anchor_idx = 0
for fi, fpath in enumerate(md_files):
with open(fpath, "r", encoding="utf-8") as f:
text = f.read()
elements = parse_markdown(text)
mermaid_idx = 0
for el in elements:
etype = el["type"]
if etype == "heading":
level = el["level"]
htxt = format_inline(el["text"])
plain = strip_markdown(el["text"])
style_name = f"H{min(level, 4)}"
# Add bookmark anchor for TOC (levels 1-3)
if level <= 3:
key = f"heading_{anchor_idx}"
anchor_idx += 1
story.append(HeadingAnchor(key, level - 1, plain, toc_collector))
story.append(Paragraph(htxt, styles[style_name]))
if level == 1:
story.append(HeadingRule(CONTENT_W))
elif etype == "paragraph":
story.append(Paragraph(format_inline(el["text"]), styles["BodyText2"]))
elif etype == "bullet_list":
for item in el["items"]:
indent = item.get("indent", 0)
sn = "BulletItemL2" if indent >= 4 else "BulletItem"
story.append(
Paragraph(
f"\u2022 {format_inline(item['text'])}",
styles[sn],
)
)
elif etype == "ordered_list":
for item in el["items"]:
story.append(
Paragraph(
f"{item['num']}. {format_inline(item['text'])}",
styles["NumberedItem"],
)
)
elif etype == "code_block":
code_text = el["code"]
# Replace Unicode box-drawing characters with ASCII equivalents
# (Courier font lacks these glyphs)
code_text = code_text.replace("\u2514", "`") # └
code_text = code_text.replace("\u251c", "|") # ├
code_text = code_text.replace("\u2502", "|") # │
code_text = code_text.replace("\u2500", "-") # ─
code_text = code_text.replace("\u2510", "+") # ┐
code_text = code_text.replace("\u250c", "+") # ┌
code_text = code_text.replace("\u2518", "+") # ┘
code_text = code_text.replace("\u253c", "+") # ┼
# Escape XML
code_text = code_text.replace("&", "&amp;")
code_text = code_text.replace("<", "&lt;")
code_text = code_text.replace(">", "&gt;")
code_text = code_text.replace("\n", "<br/>")
code_text = code_text.replace(" ", " &nbsp;")
code_text = code_text.replace("\t", " &nbsp;&nbsp;&nbsp;")
story.append(Paragraph(code_text, styles["CodeBlock"]))
elif etype == "mermaid":
dia_key = (fi, mermaid_idx)
mermaid_idx += 1
dia_path = diagrams.get(dia_key)
if dia_path:
img = embed_diagram(dia_path)
if img:
story.append(Spacer(1, 6))
story.append(img)
story.append(Spacer(1, 6))
else:
story.append(Paragraph(
"<i>[Diagram could not be embedded]</i>",
styles["Caption"],
))
else:
story.append(Paragraph(
"<i>[Diagram not rendered]</i>",
styles["Caption"],
))
elif etype == "table":
tbl = build_table_flowable(el, styles)
if tbl:
story.append(Spacer(1, 4))
story.append(tbl)
story.append(Spacer(1, 4))
elif etype == "blockquote":
story.append(
Paragraph(format_inline(el["text"]), styles["BlockquoteText"])
)
elif etype == "hr":
story.append(Spacer(1, 4))
story.append(HorizontalRule(CONTENT_W))
story.append(Spacer(1, 4))
# Page break between chapters (except after last).
# Set the NEXT chapter's title BEFORE the PageBreak so the header
# on the first page of the next chapter is correct.
if fi < len(md_files) - 1:
story.append(ChapterTitleSetter(chapter_titles[fi + 1]))
story.append(PageBreak())
return story
class ChapterTitleSetter(Flowable):
"""Invisible flowable that sets the chapter title for headers."""
def __init__(self, title):
super().__init__()
self.title = title
def wrap(self, aW, aH):
return (0, 0)
def draw(self):
global _chapter_title
_chapter_title = self.title
def build_toc_flowables(toc_entries, styles, toc_page_offset):
"""Build a manual TOC from collected entries."""
flowables = []
flowables.append(ChapterTitleSetter("Table of Contents"))
flowables.append(Paragraph("Table of Contents", styles["TOCHeading"]))
flowables.append(Spacer(1, 12))
toc1 = ParagraphStyle(
"TOCEntry1", fontName="Helvetica-Bold", fontSize=12,
leading=22, leftIndent=0, textColor=DARK_BLUE, spaceBefore=10,
)
toc2 = ParagraphStyle(
"TOCEntry2", fontName="Helvetica", fontSize=10,
leading=18, leftIndent=24, textColor=HexColor("#4a5568"), spaceBefore=3,
)
toc3 = ParagraphStyle(
"TOCEntry3", fontName="Helvetica", fontSize=9,
leading=15, leftIndent=48, textColor=HexColor("#718096"), spaceBefore=2,
)
level_styles = [toc1, toc2, toc3]
for level, text, page_num, key in toc_entries:
# Adjust page number: pass 1 pages include cover (1) but not TOC pages
adjusted_page = page_num + toc_page_offset
style = level_styles[min(level, 2)]
# Dots leader + page number
dots = " " + "\u00b7 " * 3
entry_text = (
f'<a href="#{key}">{text}</a>'
f'<font color="#a0aec0">{dots}</font>'
f'<font color="#718096">{adjusted_page}</font>'
)
flowables.append(Paragraph(entry_text, style))
# NOTE: The caller should append a ChapterTitleSetter for the first chapter
# BEFORE this PageBreak so the header is correct on the first content page.
flowables.append(PageBreak())
return flowables
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
print("Ford BnP QC Documentation \u2014 PDF Generator")
print("=" * 50)
if not DOC_FILES:
print("ERROR: No markdown files found in documentation/")
sys.exit(1)
print(f"\nFound {len(DOC_FILES)} documentation files:")
for f in DOC_FILES:
print(f" {os.path.basename(f)}")
with tempfile.TemporaryDirectory() as tmp_dir:
# Render Mermaid diagrams
print("\nRendering Mermaid diagrams...")
diagrams = render_mermaid_diagrams(DOC_FILES, tmp_dir)
print(f" {len(diagrams)} diagrams rendered successfully")
styles = build_styles()
# Pre-scan all files for H1 titles (for page header tracking)
chapter_titles_for_headers = []
for fpath in DOC_FILES:
with open(fpath, "r", encoding="utf-8") as f:
ftxt = f.read()
title = ""
for line in ftxt.split("\n"):
m = re.match(r"^#\s+(.+)$", line.strip())
if m:
title = strip_markdown(m.group(1))
break
chapter_titles_for_headers.append(title)
# ---- Pass 1: build without TOC to collect heading page numbers ----
print("\nPass 1: Collecting heading positions...")
toc_collector = []
pass1_story = []
# Cover page
pass1_story.append(CoverPage(PAGE_W, PAGE_H - MARGIN_TOP - MARGIN_BOTTOM + 30))
pass1_story.append(NextPageTemplate("content"))
pass1_story.append(PageBreak())
# Content (no TOC pages)
pass1_story.extend(build_content_story(
DOC_FILES, diagrams, styles, toc_collector, chapter_titles_for_headers
))
# Build to a throwaway file to collect page numbers
pass1_path = os.path.join(tmp_dir, "pass1.pdf")
content_frame = Frame(
MARGIN_LEFT, MARGIN_BOTTOM,
CONTENT_W, PAGE_H - MARGIN_TOP - MARGIN_BOTTOM,
id="content",
)
cover_frame = Frame(0, 0, PAGE_W, PAGE_H, id="cover")
doc1 = BaseDocTemplate(pass1_path, pagesize=A4)
doc1.addPageTemplates([
PageTemplate(id="cover", frames=[cover_frame],
onPage=cover_page_handler),
PageTemplate(id="content", frames=[content_frame],
onPage=content_page_handler),
])
doc1.build(pass1_story)
print(f" Collected {len(toc_collector)} headings across {doc1.page} pages")
# ---- Estimate TOC pages so we can offset page numbers ----
# Build the TOC flowables to measure how many pages they need
toc_test = build_toc_flowables(toc_collector, styles, 0)
# Rough estimate: measure total height of TOC entries
toc_height = 12 + 22 # title + spacer
for level, text, page_num, key in toc_collector:
if level == 0:
toc_height += 32 # TOC1 entry
elif level == 1:
toc_height += 21 # TOC2 entry
else:
toc_height += 17 # TOC3 entry
frame_height = PAGE_H - MARGIN_TOP - MARGIN_BOTTOM
toc_pages = max(1, int(toc_height / frame_height) + 1)
print(f" Estimated TOC pages: {toc_pages}")
# ---- Pass 2: build final PDF with TOC ----
print("\nPass 2: Building final PDF with Table of Contents...")
global _chapter_title
_chapter_title = ""
toc_collector2 = []
final_story = []
# Cover page
final_story.append(CoverPage(PAGE_W, PAGE_H - MARGIN_TOP - MARGIN_BOTTOM + 30))
final_story.append(NextPageTemplate("content"))
final_story.append(PageBreak())
# TOC (with adjusted page numbers)
toc_flowables = build_toc_flowables(toc_collector, styles, toc_pages)
# Insert first chapter's title setter before the TOC's final PageBreak
# so the header is correct when the first content page renders.
toc_flowables.insert(-1, ChapterTitleSetter(chapter_titles_for_headers[0]))
final_story.extend(toc_flowables)
# Content
final_story.extend(build_content_story(
DOC_FILES, diagrams, styles, toc_collector2, chapter_titles_for_headers
))
content_frame2 = Frame(
MARGIN_LEFT, MARGIN_BOTTOM,
CONTENT_W, PAGE_H - MARGIN_TOP - MARGIN_BOTTOM,
id="content",
)
cover_frame2 = Frame(0, 0, PAGE_W, PAGE_H, id="cover")
doc2 = BaseDocTemplate(
OUTPUT_PDF, pagesize=A4,
title="Ford BnP QC System \u2014 Technical Documentation",
author="Ford BnP QC Team",
subject="Quality Control System Documentation",
)
doc2.addPageTemplates([
PageTemplate(id="cover", frames=[cover_frame2],
onPage=cover_page_handler),
PageTemplate(id="content", frames=[content_frame2],
onPage=content_page_handler),
])
doc2.build(final_story)
print(f"\nPDF generated: {OUTPUT_PDF}")
print(f"File size: {os.path.getsize(OUTPUT_PDF) / 1024:.1f} KB")
if __name__ == "__main__":
main()