cohorta/backend/scripts/generate_architecture_doc.py
michael c7ff1755ee Add architecture document generator and PDF
Create comprehensive technical architecture document (PDF) with 11
chapters covering system architecture, frontend/backend design, data
model, auth, WebSocket communication, LLM integration, and core
feature flows. Includes 11 Mermaid diagrams rendered as PNGs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 11:39:50 -06:00

2271 lines
92 KiB
Python

#!/usr/bin/env python3
"""
Semblance - Technical Architecture Document Generator
Generates a professionally formatted PDF with Mermaid diagrams rendered as PNGs.
Usage:
cd backend && source venv/bin/activate
python scripts/generate_architecture_doc.py [output_path]
"""
import sys
import os
import subprocess
import json
import tempfile
import shutil
from reportlab.platypus import (
BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer, PageBreak,
NextPageTemplate, Table, TableStyle, Flowable, KeepTogether, Image,
)
from reportlab.platypus.tableofcontents import TableOfContents
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import ParagraphStyle
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY
from reportlab.lib.colors import HexColor, white, black
from reportlab.lib.units import inch, mm
from reportlab.lib import colors
# ============================================================================
# DESIGN SYSTEM (matches user manual)
# ============================================================================
COLORS = {
"primary": HexColor("#E8A0B4"),
"primary_dark": HexColor("#9B4D63"),
"primary_deeper": HexColor("#6B2D3F"),
"secondary": HexColor("#F5EEF0"),
"accent": HexColor("#3B82F6"),
"text_primary": HexColor("#2D1F24"),
"text_secondary": HexColor("#7A6068"),
"text_light": HexColor("#FFFFFF"),
"background": HexColor("#FAFAFA"),
"border": HexColor("#E8D9DE"),
"code_bg": HexColor("#F8F4F5"),
"tip_bg": HexColor("#EFF6FF"),
"tip_border": HexColor("#3B82F6"),
"note_bg": HexColor("#FFF7ED"),
"note_border": HexColor("#F59E0B"),
"warning_bg": HexColor("#FEF2F2"),
"warning_border": HexColor("#EF4444"),
"success_bg": HexColor("#F0FDF4"),
"success_border": HexColor("#22C55E"),
"chapter_bg": HexColor("#6B2D3F"),
"table_header_bg": HexColor("#6B2D3F"),
"table_alt_row": HexColor("#FAF5F7"),
}
FONTS = {
"heading": "Helvetica-Bold",
"body": "Helvetica",
"body_italic": "Helvetica-Oblique",
"mono": "Courier",
"mono_bold": "Courier-Bold",
}
PAGE_WIDTH, PAGE_HEIGHT = A4
MARGIN_LEFT = 60
MARGIN_RIGHT = 60
MARGIN_TOP = 72
MARGIN_BOTTOM = 72
CONTENT_WIDTH = PAGE_WIDTH - MARGIN_LEFT - MARGIN_RIGHT
FRAME_HEIGHT = PAGE_HEIGHT - MARGIN_TOP - MARGIN_BOTTOM
# ============================================================================
# PARAGRAPH STYLES
# ============================================================================
def get_styles():
return {
"h1": ParagraphStyle(
"Heading1", fontName=FONTS["heading"], fontSize=24, leading=30,
textColor=COLORS["primary_deeper"], spaceBefore=28, spaceAfter=14,
keepWithNext=True,
),
"h2": ParagraphStyle(
"Heading2", fontName=FONTS["heading"], fontSize=18, leading=24,
textColor=COLORS["primary_dark"], spaceBefore=22, spaceAfter=10,
keepWithNext=True,
),
"h3": ParagraphStyle(
"Heading3", fontName=FONTS["heading"], fontSize=14, leading=18,
textColor=COLORS["primary_dark"], spaceBefore=16, spaceAfter=8,
keepWithNext=True,
),
"body": ParagraphStyle(
"Body", fontName=FONTS["body"], fontSize=10.5, leading=15,
textColor=COLORS["text_primary"], spaceBefore=3, spaceAfter=7,
alignment=TA_JUSTIFY,
),
"body_bold": ParagraphStyle(
"BodyBold", fontName=FONTS["heading"], fontSize=10.5, leading=15,
textColor=COLORS["text_primary"], spaceBefore=3, spaceAfter=4,
),
"bullet": ParagraphStyle(
"Bullet", fontName=FONTS["body"], fontSize=10.5, leading=15,
textColor=COLORS["text_primary"], leftIndent=20, bulletIndent=8,
spaceBefore=2, spaceAfter=2,
),
"bullet2": ParagraphStyle(
"Bullet2", fontName=FONTS["body"], fontSize=10, leading=14,
textColor=COLORS["text_secondary"], leftIndent=40, bulletIndent=28,
spaceBefore=1, spaceAfter=1,
),
"caption": ParagraphStyle(
"Caption", fontName=FONTS["body_italic"], fontSize=9, leading=12,
textColor=COLORS["text_secondary"], alignment=TA_CENTER,
spaceBefore=4, spaceAfter=14,
),
"code_block": ParagraphStyle(
"CodeBlock", fontName=FONTS["mono"], fontSize=8, leading=11,
textColor=COLORS["text_primary"], spaceBefore=4, spaceAfter=4,
leftIndent=12, backColor=COLORS["code_bg"],
),
"toc0": ParagraphStyle(
"TOC0", fontName=FONTS["heading"], fontSize=13, leading=22,
leftIndent=0, spaceBefore=10, textColor=COLORS["primary_deeper"],
),
"toc1": ParagraphStyle(
"TOC1", fontName=FONTS["body"], fontSize=11, leading=17,
leftIndent=20, spaceBefore=3, textColor=COLORS["text_primary"],
),
"toc2": ParagraphStyle(
"TOC2", fontName=FONTS["body"], fontSize=10, leading=15,
leftIndent=40, spaceBefore=2, textColor=COLORS["text_secondary"],
),
"table_header": ParagraphStyle(
"TableHeader", fontName=FONTS["heading"], fontSize=9.5, leading=13,
textColor=COLORS["text_light"],
),
"table_cell": ParagraphStyle(
"TableCell", fontName=FONTS["body"], fontSize=9.5, leading=13,
textColor=COLORS["text_primary"],
),
"table_cell_mono": ParagraphStyle(
"TableCellMono", fontName=FONTS["mono"], fontSize=8.5, leading=12,
textColor=COLORS["text_primary"],
),
"callout_body": ParagraphStyle(
"CalloutBody", fontName=FONTS["body"], fontSize=10, leading=14,
textColor=COLORS["text_primary"],
),
}
STYLES = get_styles()
# ============================================================================
# CUSTOM FLOWABLES
# ============================================================================
class ChapterTitlePage(Flowable):
"""Chapter opener with large number and colored banner."""
def __init__(self, number, title, subtitle=""):
super().__init__()
self.number = number
self.title = title
self.subtitle = subtitle
def wrap(self, availWidth, availHeight):
self.width = availWidth
self.height = FRAME_HEIGHT
return self.width, self.height
def draw(self):
c = self.canv
h = self.height
w = self.width
banner_h = h * 0.40
c.setFillColor(COLORS["chapter_bg"])
c.rect(-MARGIN_LEFT, h - banner_h,
w + MARGIN_LEFT + MARGIN_RIGHT, banner_h + MARGIN_TOP,
fill=1, stroke=0)
c.setFillColor(COLORS["primary"])
c.setFont(FONTS["body"], 14)
c.drawCentredString(w / 2, h - banner_h + banner_h * 0.72,
f"CHAPTER {self.number}")
c.setFillColorRGB(1, 1, 1, 0.12)
c.setFont(FONTS["heading"], 160)
c.drawCentredString(w / 2, h - banner_h + banner_h * 0.18,
str(self.number))
c.setFillColor(white)
c.setFont(FONTS["heading"], 30)
c.drawCentredString(w / 2, h - banner_h + banner_h * 0.38,
self.title)
if self.subtitle:
c.setFillColorRGB(1, 1, 1, 0.75)
c.setFont(FONTS["body"], 13)
c.drawCentredString(w / 2, h - banner_h + banner_h * 0.22,
self.subtitle)
c.setStrokeColor(COLORS["primary"])
c.setLineWidth(3)
lw = 60
c.line(w / 2 - lw / 2, h - banner_h - 20, w / 2 + lw / 2, h - banner_h - 20)
class CalloutBox(Flowable):
"""Styled callout box with colored left border."""
CONFIGS = {
"tip": {"bg": COLORS["tip_bg"], "border": COLORS["tip_border"], "label": "TIP"},
"note": {"bg": COLORS["note_bg"], "border": COLORS["note_border"], "label": "NOTE"},
"warning": {"bg": COLORS["warning_bg"], "border": COLORS["warning_border"], "label": "WARNING"},
}
def __init__(self, text, callout_type="tip"):
super().__init__()
self.text = text
self.config = self.CONFIGS[callout_type]
self._para = Paragraph(self.text, STYLES["callout_body"])
def wrap(self, availWidth, availHeight):
inner_w = availWidth - 26
w, h = self._para.wrap(inner_w, availHeight)
self.width = availWidth
self.height = h + 32
return self.width, self.height
def draw(self):
c = self.canv
c.setFillColor(self.config["bg"])
c.roundRect(0, 0, self.width, self.height, 4, fill=1, stroke=0)
c.setFillColor(self.config["border"])
c.rect(0, 0, 4, self.height, fill=1, stroke=0)
c.setFont(FONTS["heading"], 8.5)
c.setFillColor(self.config["border"])
c.drawString(14, self.height - 15, self.config["label"])
self._para.drawOn(c, 14, 6)
class HorizontalRule(Flowable):
def __init__(self):
super().__init__()
def wrap(self, availWidth, availHeight):
self.width = availWidth
self.height = 12
return self.width, self.height
def draw(self):
self.canv.setStrokeColor(COLORS["border"])
self.canv.setLineWidth(0.5)
self.canv.line(0, 6, self.width, 6)
class DiagramImage(Flowable):
"""Embed a PNG diagram with optional caption, scaled to fit content width."""
def __init__(self, image_path, caption="", max_width=None):
super().__init__()
self.image_path = image_path
self.caption_text = caption
self.max_width = max_width or CONTENT_WIDTH
self._img = None
self._cap = None
if caption:
self._cap = Paragraph(caption, STYLES["caption"])
def wrap(self, availWidth, availHeight):
max_w = min(self.max_width, availWidth)
if os.path.exists(self.image_path):
self._img = Image(self.image_path)
iw, ih = self._img.imageWidth, self._img.imageHeight
if iw > max_w:
scale = max_w / iw
self._img.drawWidth = iw * scale
self._img.drawHeight = ih * scale
else:
self._img.drawWidth = iw
self._img.drawHeight = ih
# Cap height to prevent overly tall images
max_h = availHeight * 0.65
if self._img.drawHeight > max_h:
scale = max_h / self._img.drawHeight
self._img.drawWidth *= scale
self._img.drawHeight *= scale
self._img.wrap(availWidth, availHeight)
img_h = self._img.drawHeight
else:
img_h = 40 # placeholder height
cap_h = 0
if self._cap:
_, cap_h = self._cap.wrap(availWidth, availHeight)
cap_h += 6
self.width = availWidth
self.height = img_h + cap_h + 12
return self.width, self.height
def draw(self):
c = self.canv
cap_h = 0
if self._cap:
_, cap_h = self._cap.wrap(self.width, 9999)
cap_h += 6
self._cap.drawOn(c, 0, 0)
if self._img and os.path.exists(self.image_path):
x_offset = (self.width - self._img.drawWidth) / 2
self._img.drawOn(c, x_offset, cap_h + 4)
elif not os.path.exists(self.image_path):
c.setFillColor(COLORS["code_bg"])
c.roundRect(0, cap_h + 4, self.width, 30, 4, fill=1, stroke=0)
c.setFont(FONTS["body_italic"], 9)
c.setFillColor(COLORS["text_secondary"])
c.drawCentredString(self.width / 2, cap_h + 16,
f"[Diagram not found: {os.path.basename(self.image_path)}]")
# ============================================================================
# TABLE HELPER
# ============================================================================
def styled_table(header, rows, col_widths=None):
data = [[Paragraph(h, STYLES["table_header"]) for h in header]]
for row in rows:
data.append([Paragraph(str(c), STYLES["table_cell"]) for c in row])
t = Table(data, colWidths=col_widths, repeatRows=1)
cmds = [
("BACKGROUND", (0, 0), (-1, 0), COLORS["table_header_bg"]),
("TEXTCOLOR", (0, 0), (-1, 0), white),
("FONTNAME", (0, 0), (-1, 0), FONTS["heading"]),
("FONTSIZE", (0, 0), (-1, 0), 9.5),
("BOTTOMPADDING", (0, 0), (-1, 0), 8),
("TOPPADDING", (0, 0), (-1, 0), 8),
("FONTNAME", (0, 1), (-1, -1), FONTS["body"]),
("FONTSIZE", (0, 1), (-1, -1), 9.5),
("TOPPADDING", (0, 1), (-1, -1), 6),
("BOTTOMPADDING", (0, 1), (-1, -1), 6),
("LEFTPADDING", (0, 0), (-1, -1), 8),
("RIGHTPADDING", (0, 0), (-1, -1), 8),
("GRID", (0, 0), (-1, -1), 0.5, COLORS["border"]),
("LINEBELOW", (0, 0), (-1, 0), 1.5, COLORS["primary_dark"]),
("VALIGN", (0, 0), (-1, -1), "TOP"),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
]
for i in range(1, len(data)):
if i % 2 == 0:
cmds.append(("BACKGROUND", (0, i), (-1, i), COLORS["table_alt_row"]))
t.setStyle(TableStyle(cmds))
return t
def mono_table(header, rows, col_widths=None):
"""Table with monospace font in the first column (for paths/code)."""
data = [[Paragraph(h, STYLES["table_header"]) for h in header]]
for row in rows:
cells = []
for j, c in enumerate(row):
style = STYLES["table_cell_mono"] if j == 0 else STYLES["table_cell"]
cells.append(Paragraph(str(c), style))
data.append(cells)
t = Table(data, colWidths=col_widths, repeatRows=1)
cmds = [
("BACKGROUND", (0, 0), (-1, 0), COLORS["table_header_bg"]),
("TEXTCOLOR", (0, 0), (-1, 0), white),
("FONTNAME", (0, 0), (-1, 0), FONTS["heading"]),
("FONTSIZE", (0, 0), (-1, 0), 9.5),
("BOTTOMPADDING", (0, 0), (-1, 0), 8),
("TOPPADDING", (0, 0), (-1, 0), 8),
("FONTNAME", (0, 1), (-1, -1), FONTS["body"]),
("FONTSIZE", (0, 1), (-1, -1), 9.5),
("TOPPADDING", (0, 1), (-1, -1), 6),
("BOTTOMPADDING", (0, 1), (-1, -1), 6),
("LEFTPADDING", (0, 0), (-1, -1), 8),
("RIGHTPADDING", (0, 0), (-1, -1), 8),
("GRID", (0, 0), (-1, -1), 0.5, COLORS["border"]),
("LINEBELOW", (0, 0), (-1, 0), 1.5, COLORS["primary_dark"]),
("VALIGN", (0, 0), (-1, -1), "TOP"),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
]
for i in range(1, len(data)):
if i % 2 == 0:
cmds.append(("BACKGROUND", (0, i), (-1, i), COLORS["table_alt_row"]))
t.setStyle(TableStyle(cmds))
return t
# ============================================================================
# BOOKMARKED HEADING
# ============================================================================
class BookmarkedHeading(Paragraph):
"""Paragraph that registers with TOC and creates PDF bookmarks."""
def __init__(self, text, style, level=0, bookmark_name=None):
self.bm_name = bookmark_name or text.replace(" ", "_").replace("/", "_")[:60]
self.toc_level = level
self.plain_text = text
super().__init__(text, style)
def draw(self):
self.canv.bookmarkPage(self.bm_name)
self.canv.addOutlineEntry(self.plain_text, self.bm_name, self.toc_level, 0)
super().draw()
# ============================================================================
# DOCUMENT TEMPLATE
# ============================================================================
class ArchDocTemplate(BaseDocTemplate):
def __init__(self, filename):
super().__init__(
filename, pagesize=A4,
leftMargin=MARGIN_LEFT, rightMargin=MARGIN_RIGHT,
topMargin=MARGIN_TOP, bottomMargin=MARGIN_BOTTOM,
title="Semblance Technical Architecture",
author="Semblance",
)
frame = Frame(
MARGIN_LEFT, MARGIN_BOTTOM, CONTENT_WIDTH, FRAME_HEIGHT, id="main",
leftPadding=0, rightPadding=0, topPadding=0, bottomPadding=0,
)
self.addPageTemplates([
PageTemplate(id="cover", frames=[frame], onPage=self._draw_cover),
PageTemplate(id="toc", frames=[frame], onPage=self._on_toc),
PageTemplate(id="chapter", frames=[frame], onPage=lambda c, d: None),
PageTemplate(id="content", frames=[frame], onPage=self._on_content),
])
def afterFlowable(self, flowable):
if isinstance(flowable, BookmarkedHeading):
self.notify("TOCEntry", (
flowable.toc_level, flowable.plain_text,
self.page, flowable.bm_name,
))
@staticmethod
def _draw_cover(canvas, doc):
c = canvas
c.saveState()
w, h = PAGE_WIDTH, PAGE_HEIGHT
# Gradient background (deep wine to rose)
num = 120
strip = h / num
for i in range(num):
t = i / num
r = (107 + (245 - 107) * t) / 255
g = (45 + (238 - 45) * t) / 255
b = (63 + (240 - 63) * t) / 255
c.setFillColorRGB(r, g, b)
c.rect(0, h - (i + 1) * strip, w, strip + 1, fill=1, stroke=0)
# Decorative network of circles (representing architecture nodes)
c.setStrokeColorRGB(1, 1, 1, 0.15)
c.setLineWidth(1.5)
circles = [
(120, 640, 35), (420, 680, 30), (370, 560, 50),
(100, 470, 22), (490, 510, 28), (250, 720, 20),
(340, 440, 25), (200, 550, 40), (480, 620, 18),
]
for cx, cy, r in circles:
c.circle(cx, cy, r, stroke=1, fill=0)
connections = [(0, 2), (1, 2), (0, 3), (2, 4), (5, 0), (6, 4), (7, 2), (8, 1), (7, 0)]
for a, bi in connections:
ax, ay, _ = circles[a]
bx, by, _ = circles[bi]
c.line(ax, ay, bx, by)
# Title
c.setFillColor(white)
c.setFont(FONTS["heading"], 44)
c.drawCentredString(w / 2, 440, "Semblance")
# Subtitle
c.setFont(FONTS["body"], 20)
c.drawCentredString(w / 2, 400, "Technical Architecture Document")
# Line
c.setStrokeColorRGB(1, 1, 1, 0.6)
c.setLineWidth(2)
c.line(w / 2 - 80, 380, w / 2 + 80, 380)
# Description
c.setFont(FONTS["body"], 12)
c.setFillColorRGB(1, 1, 1, 0.85)
c.drawCentredString(w / 2, 355, "AI-Powered Synthetic Focus Group Research Platform")
# Version
c.setFont(FONTS["body"], 11)
c.setFillColorRGB(1, 1, 1, 0.7)
c.drawCentredString(w / 2, 140, "Version 1.0")
c.drawCentredString(w / 2, 122, "February 2026")
c.restoreState()
@staticmethod
def _on_toc(canvas, doc):
canvas.saveState()
canvas.setFont(FONTS["body"], 9)
canvas.setFillColor(COLORS["text_secondary"])
canvas.drawCentredString(PAGE_WIDTH / 2, 40, f"{doc.page}")
canvas.restoreState()
@staticmethod
def _on_content(canvas, doc):
canvas.saveState()
# Header line
canvas.setStrokeColor(COLORS["primary"])
canvas.setLineWidth(0.5)
canvas.line(MARGIN_LEFT, PAGE_HEIGHT - 50, PAGE_WIDTH - MARGIN_RIGHT, PAGE_HEIGHT - 50)
canvas.setFont(FONTS["body"], 8)
canvas.setFillColor(COLORS["text_secondary"])
canvas.drawString(MARGIN_LEFT, PAGE_HEIGHT - 44, "Semblance \u2014 Technical Architecture")
canvas.drawRightString(PAGE_WIDTH - MARGIN_RIGHT, PAGE_HEIGHT - 44, "v1.0")
# Footer
canvas.line(MARGIN_LEFT, 55, PAGE_WIDTH - MARGIN_RIGHT, 55)
canvas.setFont(FONTS["body"], 9)
canvas.drawCentredString(PAGE_WIDTH / 2, 40, f"{doc.page}")
canvas.restoreState()
# ============================================================================
# SHORTHAND HELPERS
# ============================================================================
S = STYLES
def h1(text, bm=None):
return BookmarkedHeading(text, S["h1"], level=0, bookmark_name=bm)
def h2(text, bm=None):
return BookmarkedHeading(text, S["h2"], level=1, bookmark_name=bm)
def h3(text, bm=None):
return BookmarkedHeading(text, S["h3"], level=2, bookmark_name=bm)
def p(text):
return Paragraph(text, S["body"])
def pb(text):
return Paragraph(text, S["body_bold"])
def bullet(text):
return Paragraph(f"\u2022 {text}", S["bullet"])
def bullet2(text):
return Paragraph(f"\u2013 {text}", S["bullet2"])
def tip(text):
return CalloutBox(text, "tip")
def note(text):
return CalloutBox(text, "note")
def warning(text):
return CalloutBox(text, "warning")
def sp(pts=8):
return Spacer(1, pts)
# ============================================================================
# MERMAID DIAGRAM DEFINITIONS
# ============================================================================
DIAGRAMS = {
"system_architecture": """graph TB
subgraph Client["Browser"]
SPA["React SPA<br/>TypeScript + Vite"]
SIO_C["Socket.IO Client"]
end
subgraph Backend["Quart Backend (ASGI)"]
API["REST API<br/>7 Route Groups"]
SIO_S["Socket.IO Server"]
Services["Service Layer<br/>19 Services"]
Prompts["Prompt Templates<br/>20 Markdown Files"]
end
subgraph External["External Services"]
Gemini["Google Gemini<br/>gemini-3-pro-preview"]
OpenAI["OpenAI<br/>GPT-4.1 / GPT-5.2"]
Azure["Azure AD<br/>Microsoft MSAL"]
end
MongoDB[("MongoDB<br/>4 Collections")]
SPA -->|"REST API"| API
SIO_C <-->|"WebSocket"| SIO_S
API --> Services
SIO_S --> Services
Services --> Prompts
Services -->|"LLM Calls"| Gemini
Services -->|"LLM Calls"| OpenAI
Services -->|"CRUD"| MongoDB
SPA -->|"OAuth"| Azure
classDef client fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef backend fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
classDef external fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
classDef db fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
class SPA,SIO_C client
class API,SIO_S,Services,Prompts backend
class Gemini,OpenAI,Azure external
class MongoDB db
""",
"deployment_architecture": """graph TB
User["User Browser"]
subgraph Production["Production Server (ai-sandbox.oliver.solutions)"]
Nginx["Nginx<br/>Reverse Proxy"]
subgraph Static["Static Assets"]
Vite["Vite Build<br/>/semblance/"]
end
subgraph App["Application Server"]
Hypercorn["Hypercorn ASGI<br/>Port 5137"]
Quart["Quart App"]
SocketIO["python-socketio<br/>AsyncServer"]
end
Mongo[("MongoDB")]
end
subgraph APIs["Cloud APIs"]
GeminiAPI["Google Gemini API"]
OpenAIAPI["OpenAI API"]
AzureAPI["Azure AD / MSAL"]
end
User -->|"HTTPS"| Nginx
Nginx -->|"/semblance/*"| Vite
Nginx -->|"/semblance_back/*"| Hypercorn
Hypercorn --> Quart
Hypercorn --> SocketIO
Quart --> Mongo
Quart -->|"API Keys"| GeminiAPI
Quart -->|"API Keys"| OpenAIAPI
User -->|"OAuth Popup"| AzureAPI
classDef user fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef infra fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
classDef app fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
classDef db fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
classDef cloud fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
class User user
class Nginx,Vite infra
class Hypercorn,Quart,SocketIO app
class Mongo db
class GeminiAPI,OpenAIAPI,AzureAPI cloud
""",
"frontend_components": """graph TD
App["<b>App.tsx</b>"]
Providers["<b>Providers</b><br/>QueryClient, BrowserRouter,<br/>MSAL, Auth, WebSocket, Navigation"]
subgraph Pages["Page Routes"]
SU["SyntheticUsers"]
FGS["FocusGroupSession"]
FG["FocusGroups"]
Dash["Dashboard"]
end
subgraph Session["Session Panels (FocusGroupSession)"]
direction LR
DP["Discussion"]
Parts["Participants"]
Themes["Themes"]
Analytics["Analytics"]
Notes["Notes"]
end
subgraph PersonaUI["Persona Components (SyntheticUsers)"]
direction LR
AIR["AI Recruiter"]
UC["Manual Creator"]
PE["Persona Editor"]
end
App --> Providers --> Pages
FGS --> Session
SU --> PersonaUI
classDef appNode fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
classDef provider fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef route fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
classDef panel fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
classDef persona fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
class App appNode
class Providers provider
class SU,FGS,FG,Dash route
class DP,Parts,Themes,Analytics,Notes panel
class AIR,UC,PE persona
""",
"backend_services": """graph TD
Routes["<b>API Routes</b><br/>auth, personas, ai-personas,<br/>focus-groups, focus-group-ai,<br/>folders, tasks"]
subgraph Core["Core Services"]
direction LR
LLM["<b>LLMService</b><br/>Gemini + OpenAI"]
WSM["<b>WebSocketManager</b><br/>Room messaging"]
TM["<b>TaskManager</b><br/>Cancellable tasks"]
end
subgraph AI["AI / Conversation Services (5)"]
direction LR
AIR["<b>AIRunner</b><br/>Dedicated thread"]
ACC["<b>Autonomous</b><br/><b>Controller</b>"]
CDS["<b>Decision +</b><br/><b>Context Services</b>"]
end
subgraph Domain["Domain Services (11)"]
direction LR
PS["<b>Persona Services</b><br/>Generation, Export,<br/>Modification"]
FGS["<b>Focus Group Services</b><br/>Responses, Themes,<br/>Moderator, Summary"]
end
subgraph External["External Systems"]
direction LR
Gemini["Google Gemini"]
OAI["OpenAI"]
DB[("MongoDB")]
end
Routes --> Core
Routes --> AI
Routes --> Domain
AI --> LLM
AI --> WSM
Domain --> LLM
Domain --> WSM
AIR --> ACC --> CDS
LLM --> Gemini
LLM --> OAI
Core --> DB
classDef routes fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
classDef core fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef ai fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
classDef domain fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
classDef ext fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
class Routes routes
class LLM,WSM,TM core
class AIR,ACC,CDS ai
class PS,FGS domain
class Gemini,OAI,DB ext
""",
"entity_relationship": """erDiagram
USER {
ObjectId _id PK
string username UK
string email UK
string password_hash
string role
string auth_type
string microsoft_id
}
PERSONA {
ObjectId _id PK
string name
string age
string gender
string occupation
string location
string personality
number techSavviness
object oceanTraits
array goals
array frustrations
array motivations
string aiSynthesizedBio
array folder_ids FK
ObjectId created_by FK
datetime created_at
}
FOCUS_GROUP {
ObjectId _id PK
string name
string description
string status
string llm_model
string reasoning_effort
array participants
array messages
array themes
object discussion_guide
ObjectId created_by FK
datetime created_at
}
FOLDER {
ObjectId _id PK
string name
ObjectId parent_folder_id FK
number level
ObjectId created_by FK
datetime created_at
}
USER ||--o{ PERSONA : "creates"
USER ||--o{ FOCUS_GROUP : "manages"
USER ||--o{ FOLDER : "creates"
PERSONA }o--o{ FOCUS_GROUP : "participates in"
PERSONA }o--o{ FOLDER : "belongs to"
FOLDER ||--o{ FOLDER : "contains"
""",
"auth_flow": """sequenceDiagram
autonumber
participant U as User Browser
participant R as React SPA
participant A as Quart /api/auth
participant M as Azure AD
participant DB as MongoDB
Note over U,DB: Local JWT Authentication
U->>R: Enter credentials
R->>A: POST /auth/login
A->>DB: Find user by username
DB-->>A: User document
A->>A: Verify bcrypt hash
A->>A: Generate JWT (HS256, 24h)
A-->>R: {token, user}
R->>R: Store in localStorage
Note over U,DB: Microsoft OAuth (MSAL)
U->>R: Click "Sign in with Microsoft"
R->>M: MSAL popup login
M-->>R: idToken
R->>A: POST /auth/microsoft {idToken}
A->>A: Validate MSAL token
A->>DB: Find/create user
A->>A: Generate JWT (HS256, 24h)
A-->>R: {token, user}
R->>R: Store in localStorage
Note over U,DB: WebSocket Authentication
R->>A: Socket.IO connect {auth: token}
A->>A: Validate JWT
A-->>R: connected event
""",
"websocket_flow": """sequenceDiagram
autonumber
participant C as React Client
participant WS as WebSocket Service
participant SIO as Socket.IO Server
participant Room as Focus Group Room
participant AI as AI Runner Thread
participant DB as MongoDB
C->>WS: joinFocusGroup(groupId)
WS->>SIO: emit("join_focus_group")
SIO->>Room: Add session to room
SIO-->>WS: "joined_focus_group"
WS->>C: window.dispatchEvent("ws:joined")
Note over C,DB: Manual Mode Message
C->>SIO: POST /generate-response
SIO->>AI: Generate persona response
AI->>DB: Load persona + context
AI->>AI: LLM generate response
AI->>DB: Save message
AI->>Room: emit("message_update")
Room-->>WS: "message_update"
WS->>C: window.dispatchEvent("ws:message_update")
Note over C,DB: Autonomous Mode
C->>SIO: POST /start-autonomous-mode
SIO->>AI: Start conversation loop
loop Every 3-10 seconds
AI->>AI: Decision engine
AI->>DB: Save message
AI->>Room: emit("message_update")
Room-->>WS: "message_update"
WS->>C: window.dispatchEvent
AI->>Room: emit("ai_status_update")
end
""",
"llm_pipeline": """flowchart TD
Request["LLM Request<br/>(prompt, model, params)"]
Decision{{"Model Type?"}}
subgraph Gemini["Google Gemini Path"]
GClient["Create Gemini Client"]
GConfig["GenerateContentConfig<br/>temperature, max_tokens"]
GCall["genai.generate_content()"]
end
subgraph OpenAI4["OpenAI GPT-4.1 Path"]
O4Client["Create AsyncOpenAI Client"]
O4Call["chat.completions.create()<br/>model=gpt-4.1"]
end
subgraph OpenAI5["OpenAI GPT-5.2 Path"]
O5Client["Create AsyncOpenAI Client"]
O5Call["responses.create()<br/>reasoning=effort<br/>verbosity=level"]
end
Retry{{"Success?"}}
Parse["Parse Response<br/>Extract JSON if needed"]
Return["Return Result"]
RetryLogic["Retry with Backoff<br/>1s, 2s, 4s (max 3)"]
Request --> Decision
Decision -->|"gemini-*"| GClient --> GConfig --> GCall --> Retry
Decision -->|"gpt-4.1"| O4Client --> O4Call --> Retry
Decision -->|"gpt-5.2"| O5Client --> O5Call --> Retry
Retry -->|"Yes"| Parse --> Return
Retry -->|"No"| RetryLogic --> Decision
classDef req fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
classDef gemini fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
classDef oai fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef oai5 fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
classDef logic fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
class Request req
class GClient,GConfig,GCall gemini
class O4Client,O4Call oai
class O5Client,O5Call oai5
class Decision,Retry,RetryLogic logic
class Parse,Return req
""",
"persona_generation": """flowchart LR
Brief["Audience Brief<br/>+ Research Objective"]
Enhance["AI Brief<br/>Enhancement"]
Stage1["Stage 1:<br/>Generate Basic<br/>Profiles"]
Review["User Reviews<br/>Basic Profiles"]
Stage2["Stage 2:<br/>Generate Detailed<br/>Personas"]
Save["Save to<br/>MongoDB"]
Library["View in<br/>Persona Library"]
Brief --> Enhance --> Stage1 --> Review --> Stage2 --> Save --> Library
subgraph Stage1Detail["Stage 1 Output"]
S1["Name, Age, Gender<br/>Occupation, Location<br/>Personality Summary<br/>Interests, Tech Savviness"]
end
subgraph Stage2Detail["Stage 2 Output"]
S2["OCEAN Traits (0-100)<br/>Goals, Frustrations<br/>Motivations, Scenarios<br/>Think-Feel-Do<br/>AI Synthesized Bio"]
end
Stage1 -.-> S1
Stage2 -.-> S2
classDef input fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
classDef process fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef review fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
classDef output fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
classDef detail fill:#FAFAFA,stroke:#E8D9DE,stroke-width:1px,color:#2D1F24
class Brief,Enhance input
class Stage1,Stage2 process
class Review review
class Save,Library output
class S1,S2 detail
""",
"focus_group_states": """stateDiagram-v2
[*] --> new : Create Focus Group
new --> setup : Configure Settings
setup --> setup : Edit Guide / Add Participants
setup --> manual_mode : Start Manual Session
setup --> ai_mode : Start Autonomous Mode
manual_mode --> ai_mode : Switch to Autonomous
ai_mode --> manual_mode : Switch to Manual
manual_mode --> completed : End Session
ai_mode --> completed : End Session / Guide Complete
ai_mode --> error : AI Error / Timeout
completed --> [*]
error --> manual_mode : Resume Manually
error --> [*]
""",
"autonomous_conversation": """sequenceDiagram
autonumber
participant Controller as Autonomous Controller
participant LLM as LLM + Decision Services
participant DB as MongoDB
participant WS as WebSocket
Note over Controller,WS: One Conversation Loop Iteration
Controller->>DB: Load context (messages + participants)
DB-->>Controller: Conversation state
Controller->>LLM: decide_next_action(context)
LLM-->>Controller: {action, participant, reasoning}
alt respond / moderate / probe
Controller->>LLM: Generate message for action
LLM-->>Controller: Message text
Controller->>DB: Save message
Controller->>WS: emit message_update
else end_session
Controller->>DB: status = completed
Controller->>WS: emit ai_status_update
end
Controller->>Controller: Jitter wait (3-10s)
Controller->>Controller: Safety checks (action + silence limits)
Note over Controller: Loop until end_session or limit reached
""",
}
# ============================================================================
# MERMAID RENDERING
# ============================================================================
def render_mermaid_diagrams(output_dir):
"""Render all Mermaid diagrams as PNGs using mmdc CLI."""
os.makedirs(output_dir, exist_ok=True)
# Custom Mermaid config — light backgrounds with dark text for PDF readability
config = {
"theme": "base",
"themeVariables": {
"primaryColor": "#F5EEF0",
"primaryTextColor": "#2D1F24",
"primaryBorderColor": "#9B4D63",
"lineColor": "#6B2D3F",
"secondaryColor": "#EFF6FF",
"secondaryTextColor": "#2D1F24",
"secondaryBorderColor": "#3B82F6",
"tertiaryColor": "#F0FDF4",
"tertiaryTextColor": "#2D1F24",
"tertiaryBorderColor": "#22C55E",
"noteBkgColor": "#FFF7ED",
"noteTextColor": "#2D1F24",
"noteBorderColor": "#F59E0B",
"actorBkg": "#F5EEF0",
"actorBorder": "#9B4D63",
"actorTextColor": "#2D1F24",
"signalColor": "#6B2D3F",
"signalTextColor": "#2D1F24",
"activationBkgColor": "#F5EEF0",
"activationBorderColor": "#9B4D63",
"sequenceNumberColor": "#FFFFFF",
"labelBoxBkgColor": "#F5EEF0",
"labelBoxBorderColor": "#9B4D63",
"labelTextColor": "#2D1F24",
"loopTextColor": "#6B2D3F",
"noteBkgColor": "#FFF7ED",
"noteTextColor": "#2D1F24",
"noteBorderColor": "#D97706",
"fontSize": "16px",
"fontFamily": "Helvetica, Arial, sans-serif",
}
}
config_path = os.path.join(output_dir, "mermaid-config.json")
with open(config_path, "w") as f:
json.dump(config, f)
rendered = {}
for diagram_id, source in DIAGRAMS.items():
input_path = os.path.join(output_dir, f"{diagram_id}.mmd")
output_path = os.path.join(output_dir, f"{diagram_id}.png")
with open(input_path, "w") as f:
f.write(source)
cmd = [
"npx", "-y", "@mermaid-js/mermaid-cli", "mmdc",
"-i", input_path,
"-o", output_path,
"-c", config_path,
"-w", "2400",
"-s", "3",
"-b", "transparent",
]
print(f" Rendering {diagram_id}...")
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if os.path.exists(output_path):
rendered[diagram_id] = output_path
else:
print(f" WARNING: {diagram_id} not rendered: {result.stderr[:200]}")
except subprocess.TimeoutExpired:
print(f" WARNING: {diagram_id} timed out")
except Exception as exc:
print(f" WARNING: {diagram_id} failed: {exc}")
return rendered
# ============================================================================
# DIAGRAM HELPER
# ============================================================================
def diagram(rendered, diagram_id, caption):
"""Return flowable list for a rendered Mermaid diagram."""
path = rendered.get(diagram_id, "")
return [sp(6), DiagramImage(path, caption), sp(4)]
# ============================================================================
# CHAPTERS
# ============================================================================
def build_chapter_1(rendered):
"""Chapter 1: Executive Overview"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(1, "Executive Overview", "System Purpose and Capabilities"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Executive Overview", "ch1"))
e.append(h2("Purpose", "ch1_purpose"))
e.append(p(
"Semblance is an AI-powered synthetic focus group research platform that enables researchers, "
"product teams, and UX professionals to create detailed synthetic personas using large language "
"models, organize them into focus groups, and conduct moderated or fully autonomous research "
"sessions \u2014 all without recruiting real participants."
))
e.append(p(
"The platform supports multi-model AI integration (Google Gemini, OpenAI GPT-4.1 and GPT-5.2), "
"real-time WebSocket communication for live session collaboration, and comprehensive analysis "
"tools including sentiment analysis, theme extraction, and participation analytics."
))
e.append(h2("Key Capabilities", "ch1_capabilities"))
for b in [
"<b>AI Persona Generation</b> \u2014 Two-stage pipeline: basic demographic profiles then full personality expansion (OCEAN traits, goals, frustrations, motivations, scenarios, AI-synthesized biography).",
"<b>Focus Group Simulation</b> \u2014 Manual moderation or fully autonomous AI-driven conversations with real-time WebSocket updates.",
"<b>Multi-Model LLM Support</b> \u2014 Unified service abstracting Google Gemini and OpenAI models with retry logic and model-specific parameter handling.",
"<b>Real-Time Collaboration</b> \u2014 Socket.IO room-based messaging for live session observation with event-driven UI updates.",
"<b>Comprehensive Analysis</b> \u2014 AI-powered theme extraction, sentiment analysis, participation balance scoring, and exportable reports.",
"<b>Enterprise Authentication</b> \u2014 Dual auth: local JWT credentials and Microsoft Entra ID (MSAL) OAuth.",
]:
e.append(bullet(b))
e.append(sp())
e.append(h2("Technology Summary", "ch1_tech"))
e.append(styled_table(
["Layer", "Technologies"],
[
["Frontend", "React 18, TypeScript, Vite, Tailwind CSS, shadcn-ui (Radix UI), React Router, TanStack Query, Socket.IO Client"],
["Backend", "Python, Quart (async Flask), Hypercorn ASGI, python-socketio, PyMongo"],
["Database", "MongoDB (4 collections: users, personas, focus_groups, folders)"],
["AI / LLM", "Google Gemini (gemini-3-pro-preview), OpenAI GPT-4.1, OpenAI GPT-5.2"],
["Authentication", "Custom JWT (HS256, 24h expiry), Microsoft MSAL (Entra ID)"],
["Real-Time", "Socket.IO (WebSocket with polling fallback)"],
],
col_widths=[100, CONTENT_WIDTH - 108],
))
e.append(sp())
e.append(h2("System Architecture Overview", "ch1_arch"))
e.append(p(
"Semblance follows a three-tier architecture: a React single-page application communicates "
"with a Python Quart backend through REST APIs and WebSocket connections. The backend "
"orchestrates multiple LLM providers and persists all data in MongoDB."
))
e.extend(diagram(rendered, "system_architecture",
"Figure 1.1 \u2014 High-Level System Architecture"))
return e
def build_chapter_2(rendered):
"""Chapter 2: System Architecture"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(2, "System Architecture", "Deployment and Infrastructure"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("System Architecture", "ch2"))
e.append(h2("Three-Tier Architecture", "ch2_tiers"))
e.append(p(
"The application is organized into three distinct tiers, each independently deployable:"
))
e.append(bullet(
"<b>Presentation Tier</b> \u2014 React SPA built with Vite, served as static assets. "
"Handles all UI rendering, client-side routing, and WebSocket event dispatching."
))
e.append(bullet(
"<b>Application Tier</b> \u2014 Quart (async Flask) application running under Hypercorn ASGI server. "
"Hosts the REST API (7 blueprint groups), Socket.IO server, 19 business logic services, and "
"a dedicated AI runner thread for autonomous conversations."
))
e.append(bullet(
"<b>Data Tier</b> \u2014 MongoDB document database storing users, personas, focus groups, and folders. "
"Accessed via PyMongo (sync) in route handlers and Motor (async) in the AI runner thread."
))
e.append(sp())
e.append(h2("Deployment Topology", "ch2_deploy"))
e.append(p(
"In production, the application is deployed at <b>ai-sandbox.oliver.solutions</b> behind an "
"Nginx reverse proxy that routes requests to either the static frontend assets or the backend "
"application server."
))
e.extend(diagram(rendered, "deployment_architecture",
"Figure 2.1 \u2014 Production Deployment Architecture"))
e.append(h2("Environment Configuration", "ch2_env"))
e.append(p(
"The application supports dual environments through Vite environment files. "
"Configuration switches base paths, API URLs, WebSocket paths, and authentication redirects."
))
e.append(styled_table(
["Setting", "Development", "Production"],
[
["Base Path", "/", "/semblance/"],
["API Base URL", "/api", "https://ai-sandbox.oliver.solutions/semblance_back/api"],
["WebSocket Path", "/socket.io/", "/semblance_back/socket.io/"],
["Frontend Port", "5173 (Vite dev server)", "Static assets via Nginx"],
["Backend Port", "5137 (Hypercorn)", "5137 (proxied via Nginx)"],
["MSAL Redirect", "http://localhost:5173/", "https://ai-sandbox.oliver.solutions/semblance"],
],
col_widths=[110, (CONTENT_WIDTH - 118) / 2, (CONTENT_WIDTH - 118) / 2],
))
e.append(sp())
e.append(h2("Application Factory Pattern", "ch2_factory"))
e.append(p(
"The backend uses an application factory pattern (<b>create_app()</b> in app/__init__.py) "
"that initializes the Quart app, configures CORS, registers 7 route blueprints, sets up "
"JWT authentication, initializes the WebSocket manager, and starts the AI runner service. "
"Key configuration:"
))
e.append(bullet("<b>JWT Secret</b> \u2014 From JWT_SECRET_KEY environment variable"))
e.append(bullet("<b>Token Expiry</b> \u2014 86,400 seconds (24 hours)"))
e.append(bullet("<b>Max Upload</b> \u2014 16 MB"))
e.append(bullet("<b>Request Timeout</b> \u2014 300 seconds (5 minutes)"))
e.append(bullet("<b>CORS</b> \u2014 allow_origin=\"*\" for all methods"))
e.append(sp())
return e
def build_chapter_3(rendered):
"""Chapter 3: Frontend Architecture"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(3, "Frontend Architecture", "React SPA Structure and Patterns"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Frontend Architecture", "ch3"))
e.append(h2("Technology Stack", "ch3_stack"))
e.append(p(
"The frontend is a React 18 single-page application built with TypeScript and Vite. "
"UI components use shadcn-ui (Radix UI primitives) styled with Tailwind CSS. "
"Data fetching uses TanStack Query with form handling via React Hook Form and Zod validation."
))
e.append(sp())
e.append(h2("Provider Hierarchy", "ch3_providers"))
e.append(p(
"App.tsx wraps the entire application in a nested provider hierarchy. Each provider "
"adds a layer of functionality accessible throughout the component tree:"
))
e.append(styled_table(
["Provider", "Purpose", "Key State"],
[
["QueryClientProvider", "TanStack Query data fetching and caching", "Query cache, stale-while-revalidate"],
["BrowserRouter", "Client-side routing with dynamic base path", "Route location, navigation"],
["MsalProvider", "Microsoft Azure AD authentication", "MSAL instance, account info"],
["AuthProvider", "JWT token management and session persistence", "user, token, isAuthenticated"],
["WebSocketProvider", "Singleton Socket.IO connection management", "socketId, connection state"],
["NavigationProvider", "Navigation state and focus group context", "previousRoute, focusGroupId, folderId"],
],
col_widths=[110, (CONTENT_WIDTH - 200) * 0.55, (CONTENT_WIDTH - 200) * 0.45],
))
e.append(sp())
e.extend(diagram(rendered, "frontend_components",
"Figure 3.1 \u2014 Frontend Component Hierarchy"))
e.append(h2("Route Structure", "ch3_routes"))
e.append(styled_table(
["Path", "Component", "Auth", "Description"],
[
["/", "Index", "No", "Landing page with platform overview"],
["/login", "Login", "No", "Authentication (local + Microsoft OAuth)"],
["/synthetic-users", "SyntheticUsers", "Yes", "Persona library and management"],
["/synthetic-users/:id", "PersonaProfile", "Yes", "Individual persona detail view"],
["/focus-groups", "FocusGroups", "Yes", "Focus group listing and creation"],
["/focus-groups/:id", "FocusGroupSession", "Yes", "Live session interface (multi-panel)"],
["/dashboard", "Dashboard", "Yes", "Analytics and research metrics"],
],
col_widths=[115, 100, 30, CONTENT_WIDTH - 253],
))
e.append(sp())
e.append(h2("State Management Strategy", "ch3_state"))
e.append(p(
"The application uses a layered state management approach rather than a single global store:"
))
e.append(bullet(
"<b>Global Persisted State</b> \u2014 AuthContext (JWT + user in localStorage), "
"NavigationContext (route history in localStorage), TanStack Query cache (server data)."
))
e.append(bullet(
"<b>Component State</b> \u2014 React hooks for UI state (tabs, modals, filters), "
"React Hook Form for form state, temporary editing data."
))
e.append(bullet(
"<b>WebSocket State</b> \u2014 Connection status, real-time updates dispatched as "
"window CustomEvents (ws:message_update, ws:ai_status_update, etc.)."
))
e.append(sp())
e.append(h2("Component Organization", "ch3_components"))
e.append(styled_table(
["Directory", "Contents"],
[
["src/components/ui/", "Reusable shadcn-ui components (Button, Card, Dialog, Tabs, etc.) plus custom components (ProgressModal, MentionInput, SaveStatusIndicator)"],
["src/components/focus-group-session/", "25+ components for the session interface: DiscussionPanel, ParticipantPanel, ThemesPanel, AnalyticsPanel, ReasoningPanel, NotesPanel, AutonomousDashboard"],
["src/components/persona/", "Persona profile viewing and editing: PersonaProfile, PersonaEditor, PersonaPersonality, PersonaAttitudinalProfile, PersonaScenarios"],
["src/components/dashboard/", "Dashboard analytics: StatCard, OverviewTab, UsersTab, FocusGroupsTab"],
["src/components/auth/", "Authentication: MsalProvider (Azure AD setup)"],
["src/hooks/", "Custom hooks: useWebSocket, useCancellableGeneration, usePersonaFiltering, useFocusGroupAutoSave, useFolderManagement"],
["src/services/", "WebSocket singleton service with event dispatching via window CustomEvents"],
["src/types/", "TypeScript type definitions: Persona (70+ fields), CancellableTask, NavigationState"],
],
col_widths=[160, CONTENT_WIDTH - 168],
))
e.append(sp())
return e
def build_chapter_4(rendered):
"""Chapter 4: Backend Architecture"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(4, "Backend Architecture", "Services, Routes, and Prompt Templates"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Backend Architecture", "ch4"))
e.append(h2("ASGI Application Stack", "ch4_stack"))
e.append(p(
"The backend is a Quart application (async Flask) running under Hypercorn, an ASGI server. "
"The ASGI stack layers python-socketio on top of the Quart app, enabling both HTTP and "
"WebSocket communication through a single server process on port 5137."
))
e.append(p(
"A dedicated AI runner thread with its own asyncio event loop handles autonomous conversation "
"execution, isolated from the main request-handling event loop. This avoids Motor (async MongoDB) "
"event loop affinity issues and prevents long-running AI operations from blocking HTTP requests."
))
e.append(sp())
e.append(h2("Service Layer", "ch4_services"))
e.append(p(
"Business logic is organized into 19 service modules, each responsible for a specific domain. "
"Services are stateless (except the AI Runner singleton) and communicate through function calls, "
"the LLM service, and the WebSocket manager."
))
e.extend(diagram(rendered, "backend_services",
"Figure 4.1 \u2014 Backend Service Architecture"))
e.append(h3("Core Services", "ch4_core"))
e.append(mono_table(
["Service", "File", "Purpose"],
[
["LLMService", "llm_service.py", "Multi-model abstraction (Gemini, GPT-4.1, GPT-5.2) with retry logic and JSON parsing"],
["WebSocketManager", "websocket_manager_async.py", "Room-based messaging, event emission, connection tracking"],
["TaskManager", "task_manager.py", "CancellableTask wrapper for long-running operations with per-user tracking"],
["PromptLoader", "utils/prompt_loader.py", "Loads and interpolates 20 markdown prompt templates"],
],
col_widths=[110, 120, CONTENT_WIDTH - 238],
))
e.append(sp())
e.append(h3("AI / Conversation Services", "ch4_ai"))
e.append(mono_table(
["Service", "File", "Purpose"],
[
["AIRunnerService", "ai_runner_service.py", "Singleton: dedicated thread + event loop for autonomous conversations"],
["AutonomousConversation Controller", "autonomous_conversation_controller.py", "State machine orchestrating multi-persona conversation flow"],
["ConversationDecision Service", "conversation_decision_service.py", "LLM-driven decision engine: next speaker, action type, probing"],
["ConversationContext Service", "conversation_context_service.py", "Aggregates messages, participants, and state for LLM context"],
["ConversationState Manager", "conversation_state_manager.py", "Tracks participation metrics, sentiment, energy levels"],
],
col_widths=[110, 140, CONTENT_WIDTH - 258],
))
e.append(sp())
e.append(h3("Persona Services", "ch4_persona"))
e.append(mono_table(
["Service", "File", "Purpose"],
[
["AIPersonaService", "ai_persona_service.py", "Two-stage persona generation with customer data integration"],
["PersonaModification Service", "persona_modification_service.py", "AI-assisted persona editing"],
["PersonaExportService", "persona_export_service.py", "Individual persona profile export"],
["BulkExportService", "bulk_persona_export_service.py", "Batch persona export (MD/JSON/CSV)"],
["CustomerDataService", "customer_data_service.py", "Upload and integrate research data into generation"],
],
col_widths=[110, 145, CONTENT_WIDTH - 263],
))
e.append(sp())
e.append(h3("Focus Group Services", "ch4_fg"))
e.append(mono_table(
["Service", "File", "Purpose"],
[
["FocusGroupService", "focus_group_service.py", "CRUD operations and discussion guide generation"],
["FocusGroupResponse Service", "focus_group_response_service.py", "Generate persona responses with personality-driven prompts"],
["KeyThemeService", "key_theme_service.py", "AI-powered theme extraction from conversation messages"],
["AIModeratorService", "ai_moderator_service.py", "AI moderator intervention and discussion guidance"],
["FocusGroupSummary Service", "focus_group_summary_service.py", "Comprehensive session summary generation"],
["ImageDescription Service", "image_description_service.py", "Multimodal image description for uploaded assets"],
],
col_widths=[110, 145, CONTENT_WIDTH - 263],
))
e.append(sp())
e.append(h2("Prompt Template System", "ch4_prompts"))
e.append(p(
"The backend uses 20 markdown prompt templates stored in <b>/backend/prompts/</b>. "
"The PromptLoader utility loads these files and interpolates context variables (persona data, "
"conversation history, discussion guide) before sending them to the LLM service."
))
e.append(styled_table(
["Template", "Used By"],
[
["persona-basic-generation.md", "Stage 1 persona generation (demographics)"],
["persona-detailed-generation.md", "Stage 2 persona expansion (full profile)"],
["persona-system.md", "System prompt for persona-as-character responses"],
["focus-group-response.md", "In-session persona response generation"],
["conversation-decision-engine.md", "Autonomous mode: next action decision"],
["conversation-participant-selection.md", "Autonomous mode: speaker selection"],
["ai-moderator-system.md", "AI moderator system prompt"],
["probe-generation-prompt.md", "Probing question generation"],
["key-theme-extraction.md", "Theme extraction from conversation"],
["discussion-guide-generation.md", "Structured discussion guide creation"],
["focus-group-summary-generation.md", "Post-session summary generation"],
["audience-brief-enhancement.md", "Research brief AI enhancement"],
["image-description.md", "Multimodal image analysis"],
["persona-interaction-prompt.md", "Persona-to-persona interaction"],
["persona-to-persona-response.md", "Inter-persona conversation"],
["persona-modification.md", "AI-assisted persona editing"],
["persona-summary-generation.md", "Persona summary for display"],
["persona-download-summary.md", "Export summary format"],
["persona-profile-export.md", "Full profile export format"],
["key-theme-system.md", "Theme extraction system prompt"],
],
col_widths=[175, CONTENT_WIDTH - 183],
))
e.append(sp())
return e
def build_chapter_5(rendered):
"""Chapter 5: Data Model"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(5, "Data Model", "MongoDB Collections and Relationships"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Data Model", "ch5"))
e.append(h2("Collections Overview", "ch5_overview"))
e.append(p(
"Semblance stores data in MongoDB across four primary collections. Documents use MongoDB "
"ObjectId references for relationships rather than foreign key constraints."
))
e.extend(diagram(rendered, "entity_relationship",
"Figure 5.1 \u2014 Entity Relationship Diagram"))
e.append(h2("User Collection", "ch5_user"))
e.append(p(
"Stores authentication credentials and profile information. Supports dual auth types."
))
e.append(styled_table(
["Field", "Type", "Description"],
[
["_id", "ObjectId", "Primary key"],
["username", "String (unique)", "Login identifier"],
["email", "String (unique)", "Email address"],
["password_hash", "String", "bcrypt-hashed password"],
["role", "String", "User role (default: \"user\")"],
["auth_type", "String", "\"local\" or \"microsoft\""],
["microsoft_id", "String", "Azure AD object ID (optional)"],
],
col_widths=[100, 100, CONTENT_WIDTH - 208],
))
e.append(sp())
e.append(h2("Persona Collection", "ch5_persona"))
e.append(p(
"The richest data model with 70+ fields covering demographics, OCEAN personality traits, "
"behavioral attributes, motivations, scenarios, and AI-generated content."
))
e.append(h3("Core Fields", "ch5_persona_core"))
e.append(styled_table(
["Field", "Type", "Description"],
[
["_id", "ObjectId", "Primary key"],
["name", "String", "Persona display name"],
["age, gender, occupation", "String", "Demographics"],
["location, education", "String", "Geographic and educational background"],
["personality", "String", "Personality summary text"],
["techSavviness", "Number", "Technology comfort level (0\u2013100)"],
["created_by", "ObjectId", "Reference to User who created this persona"],
["folder_ids", "Array[ObjectId]", "Folders this persona belongs to (many-to-many)"],
],
col_widths=[115, 100, CONTENT_WIDTH - 223],
))
e.append(sp())
e.append(h3("Personality & Psychographic Fields", "ch5_persona_ocean"))
e.append(styled_table(
["Field", "Type", "Description"],
[
["oceanTraits", "Object", "OCEAN scores (0\u2013100): openness, conscientiousness, extraversion, agreeableness, neuroticism"],
["thinkFeelDo", "Object", "Arrays of thinks, feels, does statements"],
["goals", "Array[String]", "Life and professional goals"],
["frustrations", "Array[String]", "Pain points and frustrations"],
["motivations", "Array[String]", "Driving motivations"],
["selfDeterminationNeeds", "Object", "Autonomy, competence, relatedness assessments"],
["scenarios", "Array[String]", "Behavioral scenario descriptions"],
["aiSynthesizedBio", "String", "AI-generated narrative biography (2\u20133 lines)"],
],
col_widths=[125, 85, CONTENT_WIDTH - 218],
))
e.append(sp())
e.append(h2("Focus Group Collection", "ch5_fg"))
e.append(p(
"Stores session configuration, participant references, conversation messages, themes, and "
"discussion guide. Messages and themes are embedded documents within the focus group."
))
e.append(styled_table(
["Field", "Type", "Description"],
[
["_id", "ObjectId", "Primary key"],
["name, description", "String", "Session title and research topic"],
["status", "String", "\"new\", \"manual_mode\", \"ai_mode\", or \"completed\""],
["llm_model", "String", "Selected LLM model (default: gemini-3-pro-preview)"],
["reasoning_effort", "String", "GPT-5.2 reasoning level (minimal/low/medium/high)"],
["participants", "Array[Object]", "Participant objects with persona_id references"],
["messages", "Array[Object]", "Conversation messages (sender, text, timestamp, sentiment)"],
["themes", "Array[Object]", "Extracted themes with supporting quotes"],
["discussion_guide", "Object", "Structured guide with sections and items"],
["created_by", "ObjectId", "Reference to User"],
["autonomous_started_at", "DateTime", "Timestamp of autonomous mode start"],
],
col_widths=[120, 90, CONTENT_WIDTH - 218],
))
e.append(sp())
e.append(h2("Folder Collection", "ch5_folder"))
e.append(p(
"Hierarchical folder structure for organizing personas. Supports two-level nesting."
))
e.append(styled_table(
["Field", "Type", "Description"],
[
["_id", "ObjectId", "Primary key"],
["name", "String", "Folder display name"],
["parent_folder_id", "ObjectId", "Parent folder reference (null for root)"],
["level", "Number", "Depth level (0 = root, 1 = child, max depth: 2)"],
["created_by", "ObjectId", "Reference to User"],
],
col_widths=[115, 80, CONTENT_WIDTH - 203],
))
e.append(sp())
e.append(h2("Relationships", "ch5_rels"))
e.append(bullet("<b>User \u2192 Persona</b> (1:N) \u2014 created_by field on Persona"))
e.append(bullet("<b>User \u2192 Focus Group</b> (1:N) \u2014 created_by field on Focus Group"))
e.append(bullet("<b>User \u2192 Folder</b> (1:N) \u2014 created_by field on Folder"))
e.append(bullet("<b>Persona \u2194 Focus Group</b> (M:N) \u2014 participants array in Focus Group references persona IDs"))
e.append(bullet("<b>Persona \u2194 Folder</b> (M:N) \u2014 folder_ids array on Persona references folder IDs"))
e.append(bullet("<b>Folder \u2192 Folder</b> (1:N) \u2014 parent_folder_id for hierarchy"))
e.append(sp())
return e
def build_chapter_6(rendered):
"""Chapter 6: Authentication & Authorization"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(6, "Authentication", "JWT, Microsoft OAuth, and WebSocket Auth"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Authentication & Authorization", "ch6"))
e.append(h2("Dual Authentication System", "ch6_dual"))
e.append(p(
"Semblance supports two authentication methods that produce identical JWT tokens, "
"allowing the rest of the application to be auth-method-agnostic:"
))
e.append(bullet(
"<b>Local JWT</b> \u2014 Username/password authentication with bcrypt password hashing. "
"Tokens are HS256-signed with a 24-hour expiry."
))
e.append(bullet(
"<b>Microsoft OAuth (MSAL)</b> \u2014 Azure AD authentication via MSAL popup flow. "
"The frontend obtains an idToken from Azure, sends it to the backend for validation, "
"and receives a Semblance JWT in return."
))
e.append(sp())
e.extend(diagram(rendered, "auth_flow",
"Figure 6.1 \u2014 Authentication Flow (Local JWT and Microsoft OAuth)"))
e.append(h2("JWT Token Lifecycle", "ch6_jwt"))
e.append(styled_table(
["Property", "Value"],
[
["Algorithm", "HS256"],
["Expiration", "24 hours"],
["Claims", "sub (user_id as string)"],
["Storage", "localStorage (auth_token key)"],
["Header Format", "Authorization: Bearer {token}"],
["Validation", "Checked on every API request via interceptor; validated server-side via @jwt_required decorator"],
],
col_widths=[120, CONTENT_WIDTH - 128],
))
e.append(sp())
e.append(h2("Frontend Token Management", "ch6_frontend"))
e.append(p(
"The Axios API client includes request and response interceptors for automatic token management:"
))
e.append(bullet("<b>Request interceptor</b> \u2014 Extracts JWT from localStorage, validates expiration by decoding the payload, attaches as Bearer token header."))
e.append(bullet("<b>Response interceptor</b> \u2014 Catches 401 responses, dispatches auth_error_event, clears localStorage, redirects to /login."))
e.append(bullet("<b>Session restoration</b> \u2014 On app mount, checks for existing token in localStorage, validates via GET /auth/me, restores session if valid."))
e.append(sp())
e.append(h2("WebSocket Authentication", "ch6_ws"))
e.append(p(
"Socket.IO connections authenticate via JWT token passed in the auth parameter during "
"the connection handshake. The server validates the token before accepting the connection:"
))
e.append(bullet("Client sends: <b>auth: { token: jwt_token }</b> in Socket.IO connection options"))
e.append(bullet("Server extracts and validates the JWT on the connect event"))
e.append(bullet("On success: emits <b>connected</b> event with session info"))
e.append(bullet("On failure: emits <b>auth_error</b> event and disconnects"))
e.append(sp())
e.append(h2("Route Protection", "ch6_protection"))
e.append(p(
"Backend routes are protected with the <b>@jwt_required()</b> decorator (custom Quart-compatible "
"implementation using PyJWT directly, not Flask-JWT-Extended). The decorator validates the "
"Authorization header and makes the user identity available via <b>get_jwt_identity()</b>."
))
e.append(p(
"Frontend routes use the <b>ProtectedRoute</b> wrapper component that checks AuthContext "
"isAuthenticated state and redirects unauthenticated users to /login."
))
e.append(sp())
return e
def build_chapter_7(rendered):
"""Chapter 7: Real-Time Communication"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(7, "Real-Time Communication", "WebSocket Architecture and Events"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Real-Time Communication", "ch7"))
e.append(h2("Socket.IO Architecture", "ch7_arch"))
e.append(p(
"The application uses Socket.IO for bidirectional real-time communication between the "
"React frontend and Quart backend. The backend uses python-socketio's AsyncServer "
"(native ASGI compatibility), while the frontend uses the socket.io-client library "
"wrapped in a singleton service."
))
e.append(sp())
e.append(h2("Room-Based Messaging", "ch7_rooms"))
e.append(p(
"Each focus group session has a dedicated Socket.IO room. When a user opens a session, "
"the client emits a <b>join_focus_group</b> event, and the server adds the connection to "
"the room. All subsequent events (messages, status updates, theme discoveries) are broadcast "
"to the room, enabling multiple observers to watch a live session."
))
e.append(sp())
e.extend(diagram(rendered, "websocket_flow",
"Figure 7.1 \u2014 WebSocket Communication Flow"))
e.append(h2("Frontend Event Dispatching", "ch7_dispatch"))
e.append(p(
"The frontend uses a hybrid approach: the WebSocket singleton service binds specific listeners "
"for known events, then re-dispatches all events as window CustomEvents with a <b>ws:</b> prefix. "
"This decouples React components from the WebSocket implementation:"
))
e.append(note(
"Pattern: Socket.IO event \"message_update\" \u2192 window.dispatchEvent(new CustomEvent(\"ws:message_update\", {detail: payload})). "
"Components listen via window.addEventListener without needing direct socket references."
))
e.append(sp())
e.append(h2("WebSocket Event Catalog", "ch7_events"))
e.append(h3("Client \u2192 Server", "ch7_c2s"))
e.append(styled_table(
["Event", "Payload", "Purpose"],
[
["connect", "auth: {token}", "Authenticate WebSocket connection"],
["join_focus_group", "{focus_group_id}", "Join session room"],
["leave_focus_group", "{focus_group_id}", "Leave session room"],
["cancel_task", "{task_id}", "Cancel running AI task"],
],
col_widths=[120, 120, CONTENT_WIDTH - 248],
))
e.append(sp())
e.append(h3("Server \u2192 Client", "ch7_s2c"))
e.append(styled_table(
["Event", "Purpose"],
[
["connected", "Connection success confirmation with session info"],
["auth_error", "Authentication failure notification"],
["joined_focus_group", "Room join confirmation"],
["message_update", "New message in conversation (includes sender, text, timestamp)"],
["ai_status_update", "AI mode status change (running/paused/completed/error)"],
["moderator_status_update", "Moderator action notification"],
["theme_update", "Key theme discovered or updated"],
["focus_group_update", "Focus group properties changed"],
["mode_event_update", "Session mode switch (manual \u2194 autonomous)"],
["analytics_update", "Conversation analytics data"],
["conversation_state_update", "Conversation state change"],
["task_started / task_completed", "Long-running task lifecycle events"],
["task_cancelled / task_failed", "Task termination events"],
["bulk_export_progress", "Export operation progress percentage"],
],
col_widths=[155, CONTENT_WIDTH - 163],
))
e.append(sp())
e.append(h2("Reconnection Strategy", "ch7_reconnect"))
e.append(p(
"The Socket.IO client is configured with automatic reconnection. On reconnect, the service "
"rebinds all event listeners (to survive reconnection cycles) and auto-rejoins the previous "
"focus group room. Token refresh is attempted before reconnection to handle expired sessions."
))
e.append(styled_table(
["Setting", "Value"],
[
["Transport", "WebSocket only (no polling fallback)"],
["Reconnection", "Enabled (automatic)"],
["Connection Timeout", "60 seconds"],
["Ping Interval", "45 seconds"],
["Ping Timeout", "120 seconds"],
],
col_widths=[140, CONTENT_WIDTH - 148],
))
e.append(sp())
return e
def build_chapter_8(rendered):
"""Chapter 8: AI/LLM Integration"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(8, "AI/LLM Integration", "Multi-Model Service and Task Management"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("AI/LLM Integration", "ch8"))
e.append(h2("Multi-Model LLM Service", "ch8_llm"))
e.append(p(
"The <b>LLMService</b> provides a unified interface for all AI operations, abstracting away "
"provider differences between Google Gemini and OpenAI. Each call creates a fresh client "
"instance to avoid event loop affinity issues in the async ASGI environment."
))
e.append(styled_table(
["Model", "Provider", "API", "Special Parameters"],
[
["gemini-3-pro-preview", "Google", "genai.Client.generate_content()", "temperature, max_tokens"],
["gpt-4.1", "OpenAI", "chat.completions.create()", "temperature, max_tokens"],
["gpt-5.2", "OpenAI", "responses.create()", "reasoning_effort (minimal/low/medium/high), verbosity (low/medium/high)"],
],
col_widths=[100, 55, 135, CONTENT_WIDTH - 298],
))
e.append(sp())
e.extend(diagram(rendered, "llm_pipeline",
"Figure 8.1 \u2014 LLM Request Pipeline"))
e.append(h2("Retry and Error Handling", "ch8_retry"))
e.append(p(
"All LLM calls are wrapped in a retry mechanism with exponential backoff. On failure, "
"the service retries up to 3 times with delays of 1s, 2s, and 4s. The service also "
"handles JSON parsing of responses, stripping markdown code blocks when needed."
))
e.append(sp())
e.append(h2("AI Runner Service", "ch8_runner"))
e.append(p(
"The <b>AIRunnerService</b> is a singleton that manages a dedicated background thread with "
"its own asyncio event loop. This isolation ensures:"
))
e.append(bullet("Autonomous conversations don't block HTTP request handling"))
e.append(bullet("Motor (async MongoDB driver) runs on a consistent event loop"))
e.append(bullet("Thread-safe task registry enables concurrent conversation management"))
e.append(sp())
e.append(p("Lifecycle:"))
e.append(bullet("<b>init_ai_runner()</b> \u2014 Called on app startup, creates dedicated thread and event loop"))
e.append(bullet("<b>submit_conversation()</b> \u2014 Schedules conversation coroutine on the AI event loop"))
e.append(bullet("<b>stop_conversation()</b> \u2014 Cancels a specific conversation by focus group ID"))
e.append(bullet("<b>Shutdown</b> \u2014 Graceful cleanup: cancels all running tasks, stops event loop, joins thread"))
e.append(sp())
e.append(h2("Task Management", "ch8_tasks"))
e.append(p(
"Long-running operations (persona generation, theme extraction, summary generation) are wrapped "
"in <b>CancellableTask</b> objects tracked by the TaskManager. Each task has a unique ID, "
"is associated with a user, and can be cancelled via the REST API or WebSocket."
))
e.append(p(
"On the frontend, the <b>useCancellableGeneration</b> hook manages task lifecycle state "
"and listens for WebSocket task events (task_started, task_completed, task_failed, task_cancelled)."
))
e.append(sp())
return e
def build_chapter_9(rendered):
"""Chapter 9: Core Feature Flows"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(9, "Core Feature Flows", "Personas, Focus Groups, and Autonomous AI"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Core Feature Flows", "ch9"))
# --- Persona Generation Pipeline ---
e.append(h2("Persona Generation Pipeline", "ch9_persona"))
e.append(p(
"Persona generation uses a two-stage pipeline that progressively builds detail. "
"The first stage generates basic demographic profiles from an audience brief, allowing "
"the user to review before the second stage expands each into a fully detailed persona."
))
e.extend(diagram(rendered, "persona_generation",
"Figure 9.1 \u2014 Two-Stage Persona Generation Pipeline"))
e.append(p(
"<b>Stage 1 \u2014 Basic Profiles:</b> The LLM receives the audience brief (optionally AI-enhanced) "
"and generates N basic profiles with name, age, gender, occupation, location, personality summary, "
"interests, and tech savviness. Optional customer data files are included for grounding."
))
e.append(p(
"<b>Stage 2 \u2014 Detailed Personas:</b> Each basic profile is expanded individually. The LLM adds "
"OCEAN personality traits (scored 0\u2013100), goals, frustrations, motivations, behavioral scenarios, "
"Think-Feel-Do mappings, self-determination needs, and an AI-synthesized biography. Each completed "
"persona is saved to MongoDB immediately."
))
e.append(sp())
# --- Focus Group Lifecycle ---
e.append(h2("Focus Group Session Lifecycle", "ch9_lifecycle"))
e.append(p(
"Focus groups progress through a state machine with four primary states. Users can switch "
"between manual and autonomous modes during a live session."
))
e.extend(diagram(rendered, "focus_group_states",
"Figure 9.2 \u2014 Focus Group State Machine"))
e.append(styled_table(
["State", "Description", "Transitions"],
[
["new", "Freshly created, no session started", "Configure \u2192 setup"],
["setup", "Configuring guide, participants, settings", "Start Manual or Start Autonomous"],
["manual_mode", "User-controlled moderation", "Switch to AI, End Session"],
["ai_mode", "Autonomous AI-driven conversation", "Switch to Manual, End Session, Guide Complete"],
["completed", "Session finished", "Terminal state"],
["error", "AI error or timeout", "Resume Manually, Terminal"],
],
col_widths=[85, (CONTENT_WIDTH - 93) * 0.5, (CONTENT_WIDTH - 93) * 0.5],
))
e.append(sp())
# --- Autonomous Conversation System ---
e.append(h2("Autonomous Conversation System", "ch9_auto"))
e.append(p(
"The autonomous conversation controller orchestrates multi-persona discussions without human "
"intervention. It runs in a dedicated thread (via AIRunnerService) and uses the LLM-powered "
"decision engine to determine each action."
))
e.extend(diagram(rendered, "autonomous_conversation",
"Figure 9.3 \u2014 Autonomous Conversation Loop (One Iteration)"))
e.append(h3("Decision Engine Actions", "ch9_decisions"))
e.append(styled_table(
["Action", "Description"],
[
["participant_respond", "Selected persona provides a contextual response"],
["moderator_speak", "AI moderator advances discussion or redirects"],
["participant_interaction", "Two personas engage in direct dialogue"],
["probe_trigger", "Probing question to deepen exploration"],
["end_session", "Conclude the conversation (guide complete or limits reached)"],
],
col_widths=[130, CONTENT_WIDTH - 138],
))
e.append(sp())
e.append(h3("Safety Limits", "ch9_safety"))
e.append(styled_table(
["Limit", "Value", "Purpose"],
[
["Max Actions", "500", "Prevents runaway conversations"],
["Max Consecutive Silence", "3", "Ends session if no meaningful responses"],
["Response Timeout", "30 seconds", "Prevents hanging on unresponsive LLM"],
["Inter-Action Delay", "3\u201310 seconds (random)", "Simulates natural conversation pace"],
["Dominance Threshold", "40%", "Flags when a participant dominates discussion"],
["Reasoning History", "Last 20 decisions", "Prevents repetitive decision patterns"],
],
col_widths=[120, 80, CONTENT_WIDTH - 208],
))
e.append(sp())
return e
def build_chapter_10(rendered):
"""Chapter 10: API Reference"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(10, "API Reference", "REST Endpoint Catalog"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("API Reference", "ch10"))
e.append(p(
"The backend exposes 7 route groups via Flask/Quart blueprints. All endpoints except "
"authentication routes require a valid JWT token in the Authorization header."
))
# --- Auth ---
e.append(h2("/api/auth", "ch10_auth"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["POST", "/register", "No", "Register new user account"],
["POST", "/login", "No", "Login with username/password, returns JWT"],
["POST", "/microsoft", "No", "Microsoft MSAL authentication, returns JWT"],
["GET", "/me", "Yes", "Validate token and return user profile"],
],
col_widths=[45, 100, 30, CONTENT_WIDTH - 183],
))
e.append(sp())
# --- Personas ---
e.append(h2("/api/personas", "ch10_personas"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["GET", "/", "Yes", "Get current user's personas"],
["GET", "/all", "Yes", "Get all personas"],
["GET", "/:id", "Yes", "Get specific persona by ID"],
["POST", "/", "Yes", "Create new persona"],
["POST", "/batch", "Yes", "Create multiple personas"],
["PUT", "/:id", "Yes", "Update persona"],
["DELETE", "/:id", "Yes", "Delete persona"],
["POST", "/:id/export-profile", "Yes", "Export persona as formatted profile"],
["POST", "/bulk-export", "Yes", "Export multiple personas (MD/JSON/CSV)"],
["POST", "/:id/modify-with-ai", "Yes", "AI-assisted persona modification"],
],
col_widths=[45, 130, 30, CONTENT_WIDTH - 213],
))
e.append(sp())
# --- AI Personas ---
e.append(h2("/api/ai-personas", "ch10_ai"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["POST", "/generate-basic-profiles", "Yes", "Stage 1: Generate basic demographic profiles"],
["POST", "/generate-personas", "Yes", "Stage 2: Expand profiles to full personas"],
["POST", "/enhance-audience-brief", "Yes", "AI-enhance the audience brief"],
["POST", "/upload-customer-data", "Yes", "Upload research documents for grounding"],
["DELETE", "/cleanup-customer-data/:id", "Yes", "Clean up uploaded customer data"],
["POST", "/batch-generate-summaries", "Yes", "Generate summaries for multiple personas"],
],
col_widths=[50, 160, 30, CONTENT_WIDTH - 248],
))
e.append(sp())
# --- Focus Groups ---
e.append(h2("/api/focus-groups", "ch10_fg"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["GET", "/", "Yes", "Get user's focus groups"],
["GET", "/:id", "Yes", "Get specific focus group"],
["POST", "/", "Yes", "Create focus group"],
["PUT", "/:id", "Yes", "Update focus group"],
["DELETE", "/:id", "Yes", "Delete focus group"],
["POST", "/:id/participants", "Yes", "Add participant to group"],
["DELETE", "/:id/participants/:pid", "Yes", "Remove participant"],
["POST", "/:id/messages", "Yes", "Send message in session"],
["GET", "/:id/messages", "Yes", "Get session messages"],
["POST", "/:id/assets", "Yes", "Upload creative assets"],
["POST", "/:id/generate-discussion-guide", "Yes", "Generate AI discussion guide"],
["POST", "/:id/notes", "Yes", "Create session note"],
],
col_widths=[50, 170, 30, CONTENT_WIDTH - 258],
))
e.append(sp())
# --- Focus Group AI ---
e.append(h2("/api/focus-group-ai", "ch10_fgai"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["POST", "/generate-response", "Yes", "Generate persona response in session"],
["POST", "/generate-key-themes", "Yes", "Extract themes from conversation"],
["POST", "/autonomous/start/:id", "Yes", "Start autonomous conversation"],
["POST", "/autonomous/stop/:id", "Yes", "Stop autonomous conversation"],
["GET", "/autonomous/status/:id", "Yes", "Get autonomous mode status"],
["POST", "/moderator/advance/:id", "Yes", "Advance moderator to next topic"],
["POST", "/moderator/end-session/:id", "Yes", "End session via moderator"],
["GET", "/conversation/state/:id", "Yes", "Get conversation state"],
["GET", "/conversation/analytics/:id", "Yes", "Get conversation analytics"],
["POST", "/conversation/intervene/:id", "Yes", "Manual intervention in autonomous mode"],
],
col_widths=[45, 155, 30, CONTENT_WIDTH - 238],
))
e.append(sp())
# --- Folders ---
e.append(h2("/api/folders", "ch10_folders"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["GET", "/", "Yes", "Get folder hierarchy tree"],
["GET", "/:id", "Yes", "Get specific folder"],
["POST", "/", "Yes", "Create folder"],
["PUT", "/:id", "Yes", "Update folder"],
["DELETE", "/:id", "Yes", "Delete folder"],
["POST", "/:id/personas", "Yes", "Add persona to folder"],
["DELETE", "/:id/personas/:pid", "Yes", "Remove persona from folder"],
["POST", "/:id/personas/batch", "Yes", "Add multiple personas to folder"],
],
col_widths=[50, 135, 30, CONTENT_WIDTH - 223],
))
e.append(sp())
# --- Tasks ---
e.append(h2("/api/tasks", "ch10_tasks"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["DELETE", "/:task_id", "Yes", "Cancel a running task"],
["GET", "/user/:user_id", "Yes", "Get user's active tasks"],
],
col_widths=[50, 120, 30, CONTENT_WIDTH - 208],
))
e.append(sp())
return e
def build_chapter_11(rendered):
"""Chapter 11: Data Flow"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(11, "Data Flow", "End-to-End Request and Conversation Patterns"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Data Flow", "ch11"))
e.append(h2("End-to-End Request Flow", "ch11_e2e"))
e.append(p(
"A typical user interaction follows this path through the system:"
))
e.append(bullet("<b>1. User Action</b> \u2014 Click, form submit, or navigation event in the React SPA"))
e.append(bullet("<b>2. API Request</b> \u2014 Axios sends REST request with JWT Bearer token via the API client"))
e.append(bullet("<b>3. Route Handler</b> \u2014 Quart blueprint validates JWT, extracts user identity, calls service layer"))
e.append(bullet("<b>4. Service Processing</b> \u2014 Business logic executes: LLM calls, database operations, prompt templating"))
e.append(bullet("<b>5. Data Persistence</b> \u2014 MongoDB operations via PyMongo (or Motor in AI thread)"))
e.append(bullet("<b>6. WebSocket Broadcast</b> \u2014 For real-time operations, events are emitted to the focus group room"))
e.append(bullet("<b>7. Response</b> \u2014 JSON response returned to frontend; WebSocket events dispatched as window events"))
e.append(bullet("<b>8. UI Update</b> \u2014 React components re-render via TanStack Query cache invalidation or WebSocket event handlers"))
e.append(sp())
e.append(h2("AI Conversation Data Flow", "ch11_ai"))
e.append(p(
"During an autonomous conversation, data flows through a specialized pipeline:"
))
e.append(bullet("<b>1. User starts autonomous mode</b> \u2014 POST /autonomous/start/:id"))
e.append(bullet("<b>2. AI Runner</b> \u2014 Submits conversation coroutine to dedicated thread"))
e.append(bullet("<b>3. Conversation Loop</b> \u2014 Controller runs continuously until end condition"))
e.append(bullet("<b>4. Context Assembly</b> \u2014 ConversationContextService loads messages, participants, guide from MongoDB"))
e.append(bullet("<b>5. Decision</b> \u2014 ConversationDecisionService sends context + prompt to LLM, gets structured action"))
e.append(bullet("<b>6. Execution</b> \u2014 Controller executes action (generate response, moderate, probe)"))
e.append(bullet("<b>7. Persistence</b> \u2014 Message saved to MongoDB focus group document"))
e.append(bullet("<b>8. Broadcast</b> \u2014 WebSocketManager emits message_update and status events to room"))
e.append(bullet("<b>9. Frontend</b> \u2014 Window events trigger React component updates in real time"))
e.append(bullet("<b>10. Loop</b> \u2014 3\u201310 second delay, then repeat from step 4"))
e.append(sp())
e.append(h2("Key Architectural Patterns", "ch11_patterns"))
e.append(h3("Singleton WebSocket Service", "ch11_ws"))
e.append(p(
"The frontend creates a single Socket.IO instance at module level, shared across all components. "
"This prevents multiple socket connections and ensures consistent event routing. Events are "
"re-dispatched as window CustomEvents, decoupling React from the WebSocket implementation."
))
e.append(h3("Dedicated AI Thread", "ch11_thread"))
e.append(p(
"Autonomous conversations run in a dedicated Python thread with its own asyncio event loop. "
"This prevents long-running AI operations from blocking HTTP request handling and provides "
"a stable event loop for Motor (async MongoDB driver)."
))
e.append(h3("Two-Pass Document Rendering", "ch11_twopass"))
e.append(p(
"Focus group sessions use an event-driven architecture where database writes and WebSocket "
"broadcasts happen atomically. The WebSocket manager maintains room membership, ensuring "
"only connected observers receive updates."
))
e.append(h3("Prompt Template Engine", "ch11_prompts"))
e.append(p(
"All LLM prompts are externalized as markdown files in /backend/prompts/. The PromptLoader "
"reads templates at runtime and interpolates context variables. This separation enables "
"prompt iteration without code changes."
))
e.append(sp())
e.append(note(
"This document was auto-generated from the Semblance codebase. For the most current details, "
"consult the source code directly. Service files are in /backend/app/services/, route files "
"in /backend/app/routes/, and frontend components in /src/components/."
))
return e
# ============================================================================
# MAIN BUILD
# ============================================================================
def build_architecture_doc(output_path):
print("=" * 60)
print("Building Semblance Technical Architecture Document")
print("=" * 60)
# Create temp directory for Mermaid diagrams
diagram_dir = tempfile.mkdtemp(prefix="semblance_diagrams_")
print(f"\n Diagram directory: {diagram_dir}")
# Step 1: Render Mermaid diagrams
print("\n Rendering Mermaid diagrams...")
rendered = render_mermaid_diagrams(diagram_dir)
print(f" Rendered {len(rendered)}/{len(DIAGRAMS)} diagrams\n")
# Step 2: Build document
doc = ArchDocTemplate(output_path)
story = []
# Cover page
print(" Cover page...")
story.append(NextPageTemplate("cover"))
story.append(Spacer(1, 1))
# Table of Contents
print(" Table of Contents...")
story.append(NextPageTemplate("toc"))
story.append(PageBreak())
story.append(BookmarkedHeading("Table of Contents", STYLES["h1"], level=0, bookmark_name="toc"))
story.append(Spacer(1, 10))
toc = TableOfContents()
toc.levelStyles = [STYLES["toc0"], STYLES["toc1"], STYLES["toc2"]]
story.append(toc)
# Chapters
chapters = [
("Chapter 1: Executive Overview", build_chapter_1),
("Chapter 2: System Architecture", build_chapter_2),
("Chapter 3: Frontend Architecture", build_chapter_3),
("Chapter 4: Backend Architecture", build_chapter_4),
("Chapter 5: Data Model", build_chapter_5),
("Chapter 6: Authentication & Authorization", build_chapter_6),
("Chapter 7: Real-Time Communication", build_chapter_7),
("Chapter 8: AI/LLM Integration", build_chapter_8),
("Chapter 9: Core Feature Flows", build_chapter_9),
("Chapter 10: API Reference", build_chapter_10),
("Chapter 11: Data Flow", build_chapter_11),
]
for name, builder in chapters:
print(f" {name}...")
story.extend(builder(rendered))
# Build PDF (multi-pass for TOC)
print("\n Rendering PDF (multi-pass for TOC)...")
doc.multiBuild(story)
# Cleanup diagram temp files
try:
shutil.rmtree(diagram_dir)
except Exception:
pass
size_kb = os.path.getsize(output_path) / 1024
print(f"\nDone! {os.path.abspath(output_path)} ({size_kb:.0f} KB)")
if __name__ == "__main__":
output = sys.argv[1] if len(sys.argv) > 1 else os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
"semblance_architecture.pdf"
)
build_architecture_doc(output)