#!/usr/bin/env python3
"""
Semblance - Technical Architecture Document Generator
Generates a professionally formatted PDF with Mermaid diagrams rendered as PNGs.
Usage:
cd backend && source venv/bin/activate
python scripts/generate_architecture_doc.py [output_path]
"""
import sys
import os
import subprocess
import json
import tempfile
import shutil
from reportlab.platypus import (
BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer, PageBreak,
NextPageTemplate, Table, TableStyle, Flowable, KeepTogether, Image,
)
from reportlab.platypus.tableofcontents import TableOfContents
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import ParagraphStyle
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY
from reportlab.lib.colors import HexColor, white, black
from reportlab.lib.units import inch, mm
from reportlab.lib import colors
# ============================================================================
# DESIGN SYSTEM (matches user manual)
# ============================================================================
COLORS = {
"primary": HexColor("#E8A0B4"),
"primary_dark": HexColor("#9B4D63"),
"primary_deeper": HexColor("#6B2D3F"),
"secondary": HexColor("#F5EEF0"),
"accent": HexColor("#3B82F6"),
"text_primary": HexColor("#2D1F24"),
"text_secondary": HexColor("#7A6068"),
"text_light": HexColor("#FFFFFF"),
"background": HexColor("#FAFAFA"),
"border": HexColor("#E8D9DE"),
"code_bg": HexColor("#F8F4F5"),
"tip_bg": HexColor("#EFF6FF"),
"tip_border": HexColor("#3B82F6"),
"note_bg": HexColor("#FFF7ED"),
"note_border": HexColor("#F59E0B"),
"warning_bg": HexColor("#FEF2F2"),
"warning_border": HexColor("#EF4444"),
"success_bg": HexColor("#F0FDF4"),
"success_border": HexColor("#22C55E"),
"chapter_bg": HexColor("#6B2D3F"),
"table_header_bg": HexColor("#6B2D3F"),
"table_alt_row": HexColor("#FAF5F7"),
}
FONTS = {
"heading": "Helvetica-Bold",
"body": "Helvetica",
"body_italic": "Helvetica-Oblique",
"mono": "Courier",
"mono_bold": "Courier-Bold",
}
PAGE_WIDTH, PAGE_HEIGHT = A4
MARGIN_LEFT = 60
MARGIN_RIGHT = 60
MARGIN_TOP = 72
MARGIN_BOTTOM = 72
CONTENT_WIDTH = PAGE_WIDTH - MARGIN_LEFT - MARGIN_RIGHT
FRAME_HEIGHT = PAGE_HEIGHT - MARGIN_TOP - MARGIN_BOTTOM
# ============================================================================
# PARAGRAPH STYLES
# ============================================================================
def get_styles():
return {
"h1": ParagraphStyle(
"Heading1", fontName=FONTS["heading"], fontSize=24, leading=30,
textColor=COLORS["primary_deeper"], spaceBefore=28, spaceAfter=14,
keepWithNext=True,
),
"h2": ParagraphStyle(
"Heading2", fontName=FONTS["heading"], fontSize=18, leading=24,
textColor=COLORS["primary_dark"], spaceBefore=22, spaceAfter=10,
keepWithNext=True,
),
"h3": ParagraphStyle(
"Heading3", fontName=FONTS["heading"], fontSize=14, leading=18,
textColor=COLORS["primary_dark"], spaceBefore=16, spaceAfter=8,
keepWithNext=True,
),
"body": ParagraphStyle(
"Body", fontName=FONTS["body"], fontSize=10.5, leading=15,
textColor=COLORS["text_primary"], spaceBefore=3, spaceAfter=7,
alignment=TA_JUSTIFY,
),
"body_bold": ParagraphStyle(
"BodyBold", fontName=FONTS["heading"], fontSize=10.5, leading=15,
textColor=COLORS["text_primary"], spaceBefore=3, spaceAfter=4,
),
"bullet": ParagraphStyle(
"Bullet", fontName=FONTS["body"], fontSize=10.5, leading=15,
textColor=COLORS["text_primary"], leftIndent=20, bulletIndent=8,
spaceBefore=2, spaceAfter=2,
),
"bullet2": ParagraphStyle(
"Bullet2", fontName=FONTS["body"], fontSize=10, leading=14,
textColor=COLORS["text_secondary"], leftIndent=40, bulletIndent=28,
spaceBefore=1, spaceAfter=1,
),
"caption": ParagraphStyle(
"Caption", fontName=FONTS["body_italic"], fontSize=9, leading=12,
textColor=COLORS["text_secondary"], alignment=TA_CENTER,
spaceBefore=4, spaceAfter=14,
),
"code_block": ParagraphStyle(
"CodeBlock", fontName=FONTS["mono"], fontSize=8, leading=11,
textColor=COLORS["text_primary"], spaceBefore=4, spaceAfter=4,
leftIndent=12, backColor=COLORS["code_bg"],
),
"toc0": ParagraphStyle(
"TOC0", fontName=FONTS["heading"], fontSize=13, leading=22,
leftIndent=0, spaceBefore=10, textColor=COLORS["primary_deeper"],
),
"toc1": ParagraphStyle(
"TOC1", fontName=FONTS["body"], fontSize=11, leading=17,
leftIndent=20, spaceBefore=3, textColor=COLORS["text_primary"],
),
"toc2": ParagraphStyle(
"TOC2", fontName=FONTS["body"], fontSize=10, leading=15,
leftIndent=40, spaceBefore=2, textColor=COLORS["text_secondary"],
),
"table_header": ParagraphStyle(
"TableHeader", fontName=FONTS["heading"], fontSize=9.5, leading=13,
textColor=COLORS["text_light"],
),
"table_cell": ParagraphStyle(
"TableCell", fontName=FONTS["body"], fontSize=9.5, leading=13,
textColor=COLORS["text_primary"],
),
"table_cell_mono": ParagraphStyle(
"TableCellMono", fontName=FONTS["mono"], fontSize=8.5, leading=12,
textColor=COLORS["text_primary"],
),
"callout_body": ParagraphStyle(
"CalloutBody", fontName=FONTS["body"], fontSize=10, leading=14,
textColor=COLORS["text_primary"],
),
}
STYLES = get_styles()
# ============================================================================
# CUSTOM FLOWABLES
# ============================================================================
class ChapterTitlePage(Flowable):
"""Chapter opener with large number and colored banner."""
def __init__(self, number, title, subtitle=""):
super().__init__()
self.number = number
self.title = title
self.subtitle = subtitle
def wrap(self, availWidth, availHeight):
self.width = availWidth
self.height = FRAME_HEIGHT
return self.width, self.height
def draw(self):
c = self.canv
h = self.height
w = self.width
banner_h = h * 0.40
c.setFillColor(COLORS["chapter_bg"])
c.rect(-MARGIN_LEFT, h - banner_h,
w + MARGIN_LEFT + MARGIN_RIGHT, banner_h + MARGIN_TOP,
fill=1, stroke=0)
c.setFillColor(COLORS["primary"])
c.setFont(FONTS["body"], 14)
c.drawCentredString(w / 2, h - banner_h + banner_h * 0.72,
f"CHAPTER {self.number}")
c.setFillColorRGB(1, 1, 1, 0.12)
c.setFont(FONTS["heading"], 160)
c.drawCentredString(w / 2, h - banner_h + banner_h * 0.18,
str(self.number))
c.setFillColor(white)
c.setFont(FONTS["heading"], 30)
c.drawCentredString(w / 2, h - banner_h + banner_h * 0.38,
self.title)
if self.subtitle:
c.setFillColorRGB(1, 1, 1, 0.75)
c.setFont(FONTS["body"], 13)
c.drawCentredString(w / 2, h - banner_h + banner_h * 0.22,
self.subtitle)
c.setStrokeColor(COLORS["primary"])
c.setLineWidth(3)
lw = 60
c.line(w / 2 - lw / 2, h - banner_h - 20, w / 2 + lw / 2, h - banner_h - 20)
class CalloutBox(Flowable):
"""Styled callout box with colored left border."""
CONFIGS = {
"tip": {"bg": COLORS["tip_bg"], "border": COLORS["tip_border"], "label": "TIP"},
"note": {"bg": COLORS["note_bg"], "border": COLORS["note_border"], "label": "NOTE"},
"warning": {"bg": COLORS["warning_bg"], "border": COLORS["warning_border"], "label": "WARNING"},
}
def __init__(self, text, callout_type="tip"):
super().__init__()
self.text = text
self.config = self.CONFIGS[callout_type]
self._para = Paragraph(self.text, STYLES["callout_body"])
def wrap(self, availWidth, availHeight):
inner_w = availWidth - 26
w, h = self._para.wrap(inner_w, availHeight)
self.width = availWidth
self.height = h + 32
return self.width, self.height
def draw(self):
c = self.canv
c.setFillColor(self.config["bg"])
c.roundRect(0, 0, self.width, self.height, 4, fill=1, stroke=0)
c.setFillColor(self.config["border"])
c.rect(0, 0, 4, self.height, fill=1, stroke=0)
c.setFont(FONTS["heading"], 8.5)
c.setFillColor(self.config["border"])
c.drawString(14, self.height - 15, self.config["label"])
self._para.drawOn(c, 14, 6)
class HorizontalRule(Flowable):
def __init__(self):
super().__init__()
def wrap(self, availWidth, availHeight):
self.width = availWidth
self.height = 12
return self.width, self.height
def draw(self):
self.canv.setStrokeColor(COLORS["border"])
self.canv.setLineWidth(0.5)
self.canv.line(0, 6, self.width, 6)
class DiagramImage(Flowable):
"""Embed a PNG diagram with optional caption, scaled to fit content width."""
def __init__(self, image_path, caption="", max_width=None):
super().__init__()
self.image_path = image_path
self.caption_text = caption
self.max_width = max_width or CONTENT_WIDTH
self._img = None
self._cap = None
if caption:
self._cap = Paragraph(caption, STYLES["caption"])
def wrap(self, availWidth, availHeight):
max_w = min(self.max_width, availWidth)
if os.path.exists(self.image_path):
self._img = Image(self.image_path)
iw, ih = self._img.imageWidth, self._img.imageHeight
if iw > max_w:
scale = max_w / iw
self._img.drawWidth = iw * scale
self._img.drawHeight = ih * scale
else:
self._img.drawWidth = iw
self._img.drawHeight = ih
# Cap height to prevent overly tall images
max_h = availHeight * 0.65
if self._img.drawHeight > max_h:
scale = max_h / self._img.drawHeight
self._img.drawWidth *= scale
self._img.drawHeight *= scale
self._img.wrap(availWidth, availHeight)
img_h = self._img.drawHeight
else:
img_h = 40 # placeholder height
cap_h = 0
if self._cap:
_, cap_h = self._cap.wrap(availWidth, availHeight)
cap_h += 6
self.width = availWidth
self.height = img_h + cap_h + 12
return self.width, self.height
def draw(self):
c = self.canv
cap_h = 0
if self._cap:
_, cap_h = self._cap.wrap(self.width, 9999)
cap_h += 6
self._cap.drawOn(c, 0, 0)
if self._img and os.path.exists(self.image_path):
x_offset = (self.width - self._img.drawWidth) / 2
self._img.drawOn(c, x_offset, cap_h + 4)
elif not os.path.exists(self.image_path):
c.setFillColor(COLORS["code_bg"])
c.roundRect(0, cap_h + 4, self.width, 30, 4, fill=1, stroke=0)
c.setFont(FONTS["body_italic"], 9)
c.setFillColor(COLORS["text_secondary"])
c.drawCentredString(self.width / 2, cap_h + 16,
f"[Diagram not found: {os.path.basename(self.image_path)}]")
# ============================================================================
# TABLE HELPER
# ============================================================================
def styled_table(header, rows, col_widths=None):
data = [[Paragraph(h, STYLES["table_header"]) for h in header]]
for row in rows:
data.append([Paragraph(str(c), STYLES["table_cell"]) for c in row])
t = Table(data, colWidths=col_widths, repeatRows=1)
cmds = [
("BACKGROUND", (0, 0), (-1, 0), COLORS["table_header_bg"]),
("TEXTCOLOR", (0, 0), (-1, 0), white),
("FONTNAME", (0, 0), (-1, 0), FONTS["heading"]),
("FONTSIZE", (0, 0), (-1, 0), 9.5),
("BOTTOMPADDING", (0, 0), (-1, 0), 8),
("TOPPADDING", (0, 0), (-1, 0), 8),
("FONTNAME", (0, 1), (-1, -1), FONTS["body"]),
("FONTSIZE", (0, 1), (-1, -1), 9.5),
("TOPPADDING", (0, 1), (-1, -1), 6),
("BOTTOMPADDING", (0, 1), (-1, -1), 6),
("LEFTPADDING", (0, 0), (-1, -1), 8),
("RIGHTPADDING", (0, 0), (-1, -1), 8),
("GRID", (0, 0), (-1, -1), 0.5, COLORS["border"]),
("LINEBELOW", (0, 0), (-1, 0), 1.5, COLORS["primary_dark"]),
("VALIGN", (0, 0), (-1, -1), "TOP"),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
]
for i in range(1, len(data)):
if i % 2 == 0:
cmds.append(("BACKGROUND", (0, i), (-1, i), COLORS["table_alt_row"]))
t.setStyle(TableStyle(cmds))
return t
def mono_table(header, rows, col_widths=None):
"""Table with monospace font in the first column (for paths/code)."""
data = [[Paragraph(h, STYLES["table_header"]) for h in header]]
for row in rows:
cells = []
for j, c in enumerate(row):
style = STYLES["table_cell_mono"] if j == 0 else STYLES["table_cell"]
cells.append(Paragraph(str(c), style))
data.append(cells)
t = Table(data, colWidths=col_widths, repeatRows=1)
cmds = [
("BACKGROUND", (0, 0), (-1, 0), COLORS["table_header_bg"]),
("TEXTCOLOR", (0, 0), (-1, 0), white),
("FONTNAME", (0, 0), (-1, 0), FONTS["heading"]),
("FONTSIZE", (0, 0), (-1, 0), 9.5),
("BOTTOMPADDING", (0, 0), (-1, 0), 8),
("TOPPADDING", (0, 0), (-1, 0), 8),
("FONTNAME", (0, 1), (-1, -1), FONTS["body"]),
("FONTSIZE", (0, 1), (-1, -1), 9.5),
("TOPPADDING", (0, 1), (-1, -1), 6),
("BOTTOMPADDING", (0, 1), (-1, -1), 6),
("LEFTPADDING", (0, 0), (-1, -1), 8),
("RIGHTPADDING", (0, 0), (-1, -1), 8),
("GRID", (0, 0), (-1, -1), 0.5, COLORS["border"]),
("LINEBELOW", (0, 0), (-1, 0), 1.5, COLORS["primary_dark"]),
("VALIGN", (0, 0), (-1, -1), "TOP"),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
]
for i in range(1, len(data)):
if i % 2 == 0:
cmds.append(("BACKGROUND", (0, i), (-1, i), COLORS["table_alt_row"]))
t.setStyle(TableStyle(cmds))
return t
# ============================================================================
# BOOKMARKED HEADING
# ============================================================================
class BookmarkedHeading(Paragraph):
"""Paragraph that registers with TOC and creates PDF bookmarks."""
def __init__(self, text, style, level=0, bookmark_name=None):
self.bm_name = bookmark_name or text.replace(" ", "_").replace("/", "_")[:60]
self.toc_level = level
self.plain_text = text
super().__init__(text, style)
def draw(self):
self.canv.bookmarkPage(self.bm_name)
self.canv.addOutlineEntry(self.plain_text, self.bm_name, self.toc_level, 0)
super().draw()
# ============================================================================
# DOCUMENT TEMPLATE
# ============================================================================
class ArchDocTemplate(BaseDocTemplate):
def __init__(self, filename):
super().__init__(
filename, pagesize=A4,
leftMargin=MARGIN_LEFT, rightMargin=MARGIN_RIGHT,
topMargin=MARGIN_TOP, bottomMargin=MARGIN_BOTTOM,
title="Semblance Technical Architecture",
author="Semblance",
)
frame = Frame(
MARGIN_LEFT, MARGIN_BOTTOM, CONTENT_WIDTH, FRAME_HEIGHT, id="main",
leftPadding=0, rightPadding=0, topPadding=0, bottomPadding=0,
)
self.addPageTemplates([
PageTemplate(id="cover", frames=[frame], onPage=self._draw_cover),
PageTemplate(id="toc", frames=[frame], onPage=self._on_toc),
PageTemplate(id="chapter", frames=[frame], onPage=lambda c, d: None),
PageTemplate(id="content", frames=[frame], onPage=self._on_content),
])
def afterFlowable(self, flowable):
if isinstance(flowable, BookmarkedHeading):
self.notify("TOCEntry", (
flowable.toc_level, flowable.plain_text,
self.page, flowable.bm_name,
))
@staticmethod
def _draw_cover(canvas, doc):
c = canvas
c.saveState()
w, h = PAGE_WIDTH, PAGE_HEIGHT
# Gradient background (deep wine to rose)
num = 120
strip = h / num
for i in range(num):
t = i / num
r = (107 + (245 - 107) * t) / 255
g = (45 + (238 - 45) * t) / 255
b = (63 + (240 - 63) * t) / 255
c.setFillColorRGB(r, g, b)
c.rect(0, h - (i + 1) * strip, w, strip + 1, fill=1, stroke=0)
# Decorative network of circles (representing architecture nodes)
c.setStrokeColorRGB(1, 1, 1, 0.15)
c.setLineWidth(1.5)
circles = [
(120, 640, 35), (420, 680, 30), (370, 560, 50),
(100, 470, 22), (490, 510, 28), (250, 720, 20),
(340, 440, 25), (200, 550, 40), (480, 620, 18),
]
for cx, cy, r in circles:
c.circle(cx, cy, r, stroke=1, fill=0)
connections = [(0, 2), (1, 2), (0, 3), (2, 4), (5, 0), (6, 4), (7, 2), (8, 1), (7, 0)]
for a, bi in connections:
ax, ay, _ = circles[a]
bx, by, _ = circles[bi]
c.line(ax, ay, bx, by)
# Title
c.setFillColor(white)
c.setFont(FONTS["heading"], 44)
c.drawCentredString(w / 2, 440, "Semblance")
# Subtitle
c.setFont(FONTS["body"], 20)
c.drawCentredString(w / 2, 400, "Technical Architecture Document")
# Line
c.setStrokeColorRGB(1, 1, 1, 0.6)
c.setLineWidth(2)
c.line(w / 2 - 80, 380, w / 2 + 80, 380)
# Description
c.setFont(FONTS["body"], 12)
c.setFillColorRGB(1, 1, 1, 0.85)
c.drawCentredString(w / 2, 355, "AI-Powered Synthetic Focus Group Research Platform")
# Version
c.setFont(FONTS["body"], 11)
c.setFillColorRGB(1, 1, 1, 0.7)
c.drawCentredString(w / 2, 140, "Version 1.0")
c.drawCentredString(w / 2, 122, "February 2026")
c.restoreState()
@staticmethod
def _on_toc(canvas, doc):
canvas.saveState()
canvas.setFont(FONTS["body"], 9)
canvas.setFillColor(COLORS["text_secondary"])
canvas.drawCentredString(PAGE_WIDTH / 2, 40, f"{doc.page}")
canvas.restoreState()
@staticmethod
def _on_content(canvas, doc):
canvas.saveState()
# Header line
canvas.setStrokeColor(COLORS["primary"])
canvas.setLineWidth(0.5)
canvas.line(MARGIN_LEFT, PAGE_HEIGHT - 50, PAGE_WIDTH - MARGIN_RIGHT, PAGE_HEIGHT - 50)
canvas.setFont(FONTS["body"], 8)
canvas.setFillColor(COLORS["text_secondary"])
canvas.drawString(MARGIN_LEFT, PAGE_HEIGHT - 44, "Semblance \u2014 Technical Architecture")
canvas.drawRightString(PAGE_WIDTH - MARGIN_RIGHT, PAGE_HEIGHT - 44, "v1.0")
# Footer
canvas.line(MARGIN_LEFT, 55, PAGE_WIDTH - MARGIN_RIGHT, 55)
canvas.setFont(FONTS["body"], 9)
canvas.drawCentredString(PAGE_WIDTH / 2, 40, f"{doc.page}")
canvas.restoreState()
# ============================================================================
# SHORTHAND HELPERS
# ============================================================================
S = STYLES
def h1(text, bm=None):
return BookmarkedHeading(text, S["h1"], level=0, bookmark_name=bm)
def h2(text, bm=None):
return BookmarkedHeading(text, S["h2"], level=1, bookmark_name=bm)
def h3(text, bm=None):
return BookmarkedHeading(text, S["h3"], level=2, bookmark_name=bm)
def p(text):
return Paragraph(text, S["body"])
def pb(text):
return Paragraph(text, S["body_bold"])
def bullet(text):
return Paragraph(f"\u2022 {text}", S["bullet"])
def bullet2(text):
return Paragraph(f"\u2013 {text}", S["bullet2"])
def tip(text):
return CalloutBox(text, "tip")
def note(text):
return CalloutBox(text, "note")
def warning(text):
return CalloutBox(text, "warning")
def sp(pts=8):
return Spacer(1, pts)
# ============================================================================
# MERMAID DIAGRAM DEFINITIONS
# ============================================================================
DIAGRAMS = {
"system_architecture": """graph TB
subgraph Client["Browser"]
SPA["React SPA
TypeScript + Vite"]
SIO_C["Socket.IO Client"]
end
subgraph Backend["Quart Backend (ASGI)"]
API["REST API
7 Route Groups"]
SIO_S["Socket.IO Server"]
Services["Service Layer
19 Services"]
Prompts["Prompt Templates
20 Markdown Files"]
end
subgraph External["External Services"]
Gemini["Google Gemini
gemini-3-pro-preview"]
OpenAI["OpenAI
GPT-4.1 / GPT-5.2"]
Azure["Azure AD
Microsoft MSAL"]
end
MongoDB[("MongoDB
4 Collections")]
SPA -->|"REST API"| API
SIO_C <-->|"WebSocket"| SIO_S
API --> Services
SIO_S --> Services
Services --> Prompts
Services -->|"LLM Calls"| Gemini
Services -->|"LLM Calls"| OpenAI
Services -->|"CRUD"| MongoDB
SPA -->|"OAuth"| Azure
classDef client fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef backend fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
classDef external fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
classDef db fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
class SPA,SIO_C client
class API,SIO_S,Services,Prompts backend
class Gemini,OpenAI,Azure external
class MongoDB db
""",
"deployment_architecture": """graph TB
User["User Browser"]
subgraph Production["Production Server (optical-dev.oliver.solutions)"]
Nginx["Nginx
Reverse Proxy"]
subgraph Static["Static Assets"]
Vite["Vite Build
/semblance/"]
end
subgraph App["Application Server"]
Hypercorn["Hypercorn ASGI
Port 5137"]
Quart["Quart App"]
SocketIO["python-socketio
AsyncServer"]
end
Mongo[("MongoDB")]
end
subgraph APIs["Cloud APIs"]
GeminiAPI["Google Gemini API"]
OpenAIAPI["OpenAI API"]
AzureAPI["Azure AD / MSAL"]
end
User -->|"HTTPS"| Nginx
Nginx -->|"/semblance/*"| Vite
Nginx -->|"/semblance_back/*"| Hypercorn
Hypercorn --> Quart
Hypercorn --> SocketIO
Quart --> Mongo
Quart -->|"API Keys"| GeminiAPI
Quart -->|"API Keys"| OpenAIAPI
User -->|"OAuth Popup"| AzureAPI
classDef user fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef infra fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
classDef app fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
classDef db fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
classDef cloud fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
class User user
class Nginx,Vite infra
class Hypercorn,Quart,SocketIO app
class Mongo db
class GeminiAPI,OpenAIAPI,AzureAPI cloud
""",
"frontend_components": """graph TD
App["App.tsx"]
Providers["Providers
QueryClient, BrowserRouter,
MSAL, Auth, WebSocket, Navigation"]
subgraph Pages["Page Routes"]
SU["SyntheticUsers"]
FGS["FocusGroupSession"]
FG["FocusGroups"]
Dash["Dashboard"]
end
subgraph Session["Session Panels (FocusGroupSession)"]
direction LR
DP["Discussion"]
Parts["Participants"]
Themes["Themes"]
Analytics["Analytics"]
Notes["Notes"]
end
subgraph PersonaUI["Persona Components (SyntheticUsers)"]
direction LR
AIR["AI Recruiter"]
UC["Manual Creator"]
PE["Persona Editor"]
end
App --> Providers --> Pages
FGS --> Session
SU --> PersonaUI
classDef appNode fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
classDef provider fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef route fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
classDef panel fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
classDef persona fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
class App appNode
class Providers provider
class SU,FGS,FG,Dash route
class DP,Parts,Themes,Analytics,Notes panel
class AIR,UC,PE persona
""",
"backend_services": """graph TD
Routes["API Routes
auth, personas, ai-personas,
focus-groups, focus-group-ai,
folders, tasks"]
subgraph Core["Core Services"]
direction LR
LLM["LLMService
Gemini + OpenAI"]
WSM["WebSocketManager
Room messaging"]
TM["TaskManager
Cancellable tasks"]
end
subgraph AI["AI / Conversation Services (5)"]
direction LR
AIR["AIRunner
Dedicated thread"]
ACC["Autonomous
Controller"]
CDS["Decision +
Context Services"]
end
subgraph Domain["Domain Services (11)"]
direction LR
PS["Persona Services
Generation, Export,
Modification"]
FGS["Focus Group Services
Responses, Themes,
Moderator, Summary"]
end
subgraph External["External Systems"]
direction LR
Gemini["Google Gemini"]
OAI["OpenAI"]
DB[("MongoDB")]
end
Routes --> Core
Routes --> AI
Routes --> Domain
AI --> LLM
AI --> WSM
Domain --> LLM
Domain --> WSM
AIR --> ACC --> CDS
LLM --> Gemini
LLM --> OAI
Core --> DB
classDef routes fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
classDef core fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef ai fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
classDef domain fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
classDef ext fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
class Routes routes
class LLM,WSM,TM core
class AIR,ACC,CDS ai
class PS,FGS domain
class Gemini,OAI,DB ext
""",
"entity_relationship": """erDiagram
USER {
ObjectId _id PK
string username UK
string email UK
string password_hash
string role
string auth_type
string microsoft_id
}
PERSONA {
ObjectId _id PK
string name
string age
string gender
string occupation
string location
string personality
number techSavviness
object oceanTraits
array goals
array frustrations
array motivations
string aiSynthesizedBio
array folder_ids FK
ObjectId created_by FK
datetime created_at
}
FOCUS_GROUP {
ObjectId _id PK
string name
string description
string status
string llm_model
string reasoning_effort
array participants
array messages
array themes
object discussion_guide
ObjectId created_by FK
datetime created_at
}
FOLDER {
ObjectId _id PK
string name
ObjectId parent_folder_id FK
number level
ObjectId created_by FK
datetime created_at
}
USER ||--o{ PERSONA : "creates"
USER ||--o{ FOCUS_GROUP : "manages"
USER ||--o{ FOLDER : "creates"
PERSONA }o--o{ FOCUS_GROUP : "participates in"
PERSONA }o--o{ FOLDER : "belongs to"
FOLDER ||--o{ FOLDER : "contains"
""",
"auth_flow": """sequenceDiagram
autonumber
participant U as User Browser
participant R as React SPA
participant A as Quart /api/auth
participant M as Azure AD
participant DB as MongoDB
Note over U,DB: Local JWT Authentication
U->>R: Enter credentials
R->>A: POST /auth/login
A->>DB: Find user by username
DB-->>A: User document
A->>A: Verify bcrypt hash
A->>A: Generate JWT (HS256, 24h)
A-->>R: {token, user}
R->>R: Store in localStorage
Note over U,DB: Microsoft OAuth (MSAL)
U->>R: Click "Sign in with Microsoft"
R->>M: MSAL popup login
M-->>R: idToken
R->>A: POST /auth/microsoft {idToken}
A->>A: Validate MSAL token
A->>DB: Find/create user
A->>A: Generate JWT (HS256, 24h)
A-->>R: {token, user}
R->>R: Store in localStorage
Note over U,DB: WebSocket Authentication
R->>A: Socket.IO connect {auth: token}
A->>A: Validate JWT
A-->>R: connected event
""",
"websocket_flow": """sequenceDiagram
autonumber
participant C as React Client
participant WS as WebSocket Service
participant SIO as Socket.IO Server
participant Room as Focus Group Room
participant AI as AI Runner Thread
participant DB as MongoDB
C->>WS: joinFocusGroup(groupId)
WS->>SIO: emit("join_focus_group")
SIO->>Room: Add session to room
SIO-->>WS: "joined_focus_group"
WS->>C: window.dispatchEvent("ws:joined")
Note over C,DB: Manual Mode Message
C->>SIO: POST /generate-response
SIO->>AI: Generate persona response
AI->>DB: Load persona + context
AI->>AI: LLM generate response
AI->>DB: Save message
AI->>Room: emit("message_update")
Room-->>WS: "message_update"
WS->>C: window.dispatchEvent("ws:message_update")
Note over C,DB: Autonomous Mode
C->>SIO: POST /start-autonomous-mode
SIO->>AI: Start conversation loop
loop Every 3-10 seconds
AI->>AI: Decision engine
AI->>DB: Save message
AI->>Room: emit("message_update")
Room-->>WS: "message_update"
WS->>C: window.dispatchEvent
AI->>Room: emit("ai_status_update")
end
""",
"llm_pipeline": """flowchart TD
Request["LLM Request
(prompt, model, params)"]
Decision{{"Model Type?"}}
subgraph Gemini["Google Gemini Path"]
GClient["Create Gemini Client"]
GConfig["GenerateContentConfig
temperature, max_tokens"]
GCall["genai.generate_content()"]
end
subgraph OpenAI4["OpenAI GPT-4.1 Path"]
O4Client["Create AsyncOpenAI Client"]
O4Call["chat.completions.create()
model=gpt-4.1"]
end
subgraph OpenAI5["OpenAI GPT-5.2 Path"]
O5Client["Create AsyncOpenAI Client"]
O5Call["responses.create()
reasoning=effort
verbosity=level"]
end
Retry{{"Success?"}}
Parse["Parse Response
Extract JSON if needed"]
Return["Return Result"]
RetryLogic["Retry with Backoff
1s, 2s, 4s (max 3)"]
Request --> Decision
Decision -->|"gemini-*"| GClient --> GConfig --> GCall --> Retry
Decision -->|"gpt-4.1"| O4Client --> O4Call --> Retry
Decision -->|"gpt-5.2"| O5Client --> O5Call --> Retry
Retry -->|"Yes"| Parse --> Return
Retry -->|"No"| RetryLogic --> Decision
classDef req fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
classDef gemini fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
classDef oai fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef oai5 fill:#F5EEF0,stroke:#9B4D63,stroke-width:2px,color:#4A1E2B
classDef logic fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
class Request req
class GClient,GConfig,GCall gemini
class O4Client,O4Call oai
class O5Client,O5Call oai5
class Decision,Retry,RetryLogic logic
class Parse,Return req
""",
"persona_generation": """flowchart LR
Brief["Audience Brief
+ Research Objective"]
Enhance["AI Brief
Enhancement"]
Stage1["Stage 1:
Generate Basic
Profiles"]
Review["User Reviews
Basic Profiles"]
Stage2["Stage 2:
Generate Detailed
Personas"]
Save["Save to
MongoDB"]
Library["View in
Persona Library"]
Brief --> Enhance --> Stage1 --> Review --> Stage2 --> Save --> Library
subgraph Stage1Detail["Stage 1 Output"]
S1["Name, Age, Gender
Occupation, Location
Personality Summary
Interests, Tech Savviness"]
end
subgraph Stage2Detail["Stage 2 Output"]
S2["OCEAN Traits (0-100)
Goals, Frustrations
Motivations, Scenarios
Think-Feel-Do
AI Synthesized Bio"]
end
Stage1 -.-> S1
Stage2 -.-> S2
classDef input fill:#F5EEF0,stroke:#6B2D3F,stroke-width:2px,color:#4A1E2B
classDef process fill:#DBEAFE,stroke:#3B82F6,stroke-width:2px,color:#1E3A5F
classDef review fill:#FEF3C7,stroke:#D97706,stroke-width:2px,color:#78350F
classDef output fill:#DCFCE7,stroke:#22C55E,stroke-width:2px,color:#14532D
classDef detail fill:#FAFAFA,stroke:#E8D9DE,stroke-width:1px,color:#2D1F24
class Brief,Enhance input
class Stage1,Stage2 process
class Review review
class Save,Library output
class S1,S2 detail
""",
"focus_group_states": """stateDiagram-v2
[*] --> new : Create Focus Group
new --> setup : Configure Settings
setup --> setup : Edit Guide / Add Participants
setup --> manual_mode : Start Manual Session
setup --> ai_mode : Start Autonomous Mode
manual_mode --> ai_mode : Switch to Autonomous
ai_mode --> manual_mode : Switch to Manual
manual_mode --> completed : End Session
ai_mode --> completed : End Session / Guide Complete
ai_mode --> error : AI Error / Timeout
completed --> [*]
error --> manual_mode : Resume Manually
error --> [*]
""",
"autonomous_conversation": """sequenceDiagram
autonumber
participant Controller as Autonomous Controller
participant LLM as LLM + Decision Services
participant DB as MongoDB
participant WS as WebSocket
Note over Controller,WS: One Conversation Loop Iteration
Controller->>DB: Load context (messages + participants)
DB-->>Controller: Conversation state
Controller->>LLM: decide_next_action(context)
LLM-->>Controller: {action, participant, reasoning}
alt respond / moderate / probe
Controller->>LLM: Generate message for action
LLM-->>Controller: Message text
Controller->>DB: Save message
Controller->>WS: emit message_update
else end_session
Controller->>DB: status = completed
Controller->>WS: emit ai_status_update
end
Controller->>Controller: Jitter wait (3-10s)
Controller->>Controller: Safety checks (action + silence limits)
Note over Controller: Loop until end_session or limit reached
""",
}
# ============================================================================
# MERMAID RENDERING
# ============================================================================
def render_mermaid_diagrams(output_dir):
"""Render all Mermaid diagrams as PNGs using mmdc CLI."""
os.makedirs(output_dir, exist_ok=True)
# Custom Mermaid config — light backgrounds with dark text for PDF readability
config = {
"theme": "base",
"themeVariables": {
"primaryColor": "#F5EEF0",
"primaryTextColor": "#2D1F24",
"primaryBorderColor": "#9B4D63",
"lineColor": "#6B2D3F",
"secondaryColor": "#EFF6FF",
"secondaryTextColor": "#2D1F24",
"secondaryBorderColor": "#3B82F6",
"tertiaryColor": "#F0FDF4",
"tertiaryTextColor": "#2D1F24",
"tertiaryBorderColor": "#22C55E",
"noteBkgColor": "#FFF7ED",
"noteTextColor": "#2D1F24",
"noteBorderColor": "#F59E0B",
"actorBkg": "#F5EEF0",
"actorBorder": "#9B4D63",
"actorTextColor": "#2D1F24",
"signalColor": "#6B2D3F",
"signalTextColor": "#2D1F24",
"activationBkgColor": "#F5EEF0",
"activationBorderColor": "#9B4D63",
"sequenceNumberColor": "#FFFFFF",
"labelBoxBkgColor": "#F5EEF0",
"labelBoxBorderColor": "#9B4D63",
"labelTextColor": "#2D1F24",
"loopTextColor": "#6B2D3F",
"noteBkgColor": "#FFF7ED",
"noteTextColor": "#2D1F24",
"noteBorderColor": "#D97706",
"fontSize": "16px",
"fontFamily": "Helvetica, Arial, sans-serif",
}
}
config_path = os.path.join(output_dir, "mermaid-config.json")
with open(config_path, "w") as f:
json.dump(config, f)
rendered = {}
for diagram_id, source in DIAGRAMS.items():
input_path = os.path.join(output_dir, f"{diagram_id}.mmd")
output_path = os.path.join(output_dir, f"{diagram_id}.png")
with open(input_path, "w") as f:
f.write(source)
# S-M1: Pinned version to ensure reproducible diagram generation
cmd = [
"npx", "-y", "@mermaid-js/mermaid-cli@11.4.1", "mmdc",
"-i", input_path,
"-o", output_path,
"-c", config_path,
"-w", "2400",
"-s", "3",
"-b", "transparent",
]
print(f" Rendering {diagram_id}...")
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if os.path.exists(output_path):
rendered[diagram_id] = output_path
else:
print(f" WARNING: {diagram_id} not rendered: {result.stderr[:200]}")
except subprocess.TimeoutExpired:
print(f" WARNING: {diagram_id} timed out")
except Exception as exc:
print(f" WARNING: {diagram_id} failed: {exc}")
return rendered
# ============================================================================
# DIAGRAM HELPER
# ============================================================================
def diagram(rendered, diagram_id, caption):
"""Return flowable list for a rendered Mermaid diagram."""
path = rendered.get(diagram_id, "")
return [sp(6), DiagramImage(path, caption), sp(4)]
# ============================================================================
# CHAPTERS
# ============================================================================
def build_chapter_1(rendered):
"""Chapter 1: Executive Overview"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(1, "Executive Overview", "System Purpose and Capabilities"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Executive Overview", "ch1"))
e.append(h2("Purpose", "ch1_purpose"))
e.append(p(
"Semblance is an AI-powered synthetic focus group research platform that enables researchers, "
"product teams, and UX professionals to create detailed synthetic personas using large language "
"models, organize them into focus groups, and conduct moderated or fully autonomous research "
"sessions \u2014 all without recruiting real participants."
))
e.append(p(
"The platform supports multi-model AI integration (Google Gemini, OpenAI GPT-4.1 and GPT-5.2), "
"real-time WebSocket communication for live session collaboration, and comprehensive analysis "
"tools including sentiment analysis, theme extraction, and participation analytics."
))
e.append(h2("Key Capabilities", "ch1_capabilities"))
for b in [
"AI Persona Generation \u2014 Two-stage pipeline: basic demographic profiles then full personality expansion (OCEAN traits, goals, frustrations, motivations, scenarios, AI-synthesized biography).",
"Focus Group Simulation \u2014 Manual moderation or fully autonomous AI-driven conversations with real-time WebSocket updates.",
"Multi-Model LLM Support \u2014 Unified service abstracting Google Gemini and OpenAI models with retry logic and model-specific parameter handling.",
"Real-Time Collaboration \u2014 Socket.IO room-based messaging for live session observation with event-driven UI updates.",
"Comprehensive Analysis \u2014 AI-powered theme extraction, sentiment analysis, participation balance scoring, and exportable reports.",
"Enterprise Authentication \u2014 Dual auth: local JWT credentials and Microsoft Entra ID (MSAL) OAuth.",
]:
e.append(bullet(b))
e.append(sp())
e.append(h2("Technology Summary", "ch1_tech"))
e.append(styled_table(
["Layer", "Technologies"],
[
["Frontend", "React 18, TypeScript, Vite, Tailwind CSS, shadcn-ui (Radix UI), React Router, TanStack Query, Socket.IO Client"],
["Backend", "Python, Quart (async Flask), Hypercorn ASGI, python-socketio, PyMongo"],
["Database", "MongoDB (4 collections: users, personas, focus_groups, folders)"],
["AI / LLM", "Google Gemini (gemini-3-pro-preview), OpenAI GPT-4.1, OpenAI GPT-5.2"],
["Authentication", "Custom JWT (HS256, 24h expiry), Microsoft MSAL (Entra ID)"],
["Real-Time", "Socket.IO (WebSocket with polling fallback)"],
],
col_widths=[100, CONTENT_WIDTH - 108],
))
e.append(sp())
e.append(h2("System Architecture Overview", "ch1_arch"))
e.append(p(
"Semblance follows a three-tier architecture: a React single-page application communicates "
"with a Python Quart backend through REST APIs and WebSocket connections. The backend "
"orchestrates multiple LLM providers and persists all data in MongoDB."
))
e.extend(diagram(rendered, "system_architecture",
"Figure 1.1 \u2014 High-Level System Architecture"))
return e
def build_chapter_2(rendered):
"""Chapter 2: System Architecture"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(2, "System Architecture", "Deployment and Infrastructure"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("System Architecture", "ch2"))
e.append(h2("Three-Tier Architecture", "ch2_tiers"))
e.append(p(
"The application is organized into three distinct tiers, each independently deployable:"
))
e.append(bullet(
"Presentation Tier \u2014 React SPA built with Vite, served as static assets. "
"Handles all UI rendering, client-side routing, and WebSocket event dispatching."
))
e.append(bullet(
"Application Tier \u2014 Quart (async Flask) application running under Hypercorn ASGI server. "
"Hosts the REST API (7 blueprint groups), Socket.IO server, 19 business logic services, and "
"a dedicated AI runner thread for autonomous conversations."
))
e.append(bullet(
"Data Tier \u2014 MongoDB document database storing users, personas, focus groups, and folders. "
"Accessed via PyMongo (sync) in route handlers and Motor (async) in the AI runner thread."
))
e.append(sp())
e.append(h2("Deployment Topology", "ch2_deploy"))
e.append(p(
"In production, the application is deployed at optical-dev.oliver.solutions behind an "
"Nginx reverse proxy that routes requests to either the static frontend assets or the backend "
"application server."
))
e.extend(diagram(rendered, "deployment_architecture",
"Figure 2.1 \u2014 Production Deployment Architecture"))
e.append(h2("Environment Configuration", "ch2_env"))
e.append(p(
"The application supports dual environments through Vite environment files. "
"Configuration switches base paths, API URLs, WebSocket paths, and authentication redirects."
))
e.append(styled_table(
["Setting", "Development", "Production"],
[
["Base Path", "/", "/semblance/"],
["API Base URL", "/api", "https://optical-dev.oliver.solutions/semblance_back/api"],
["WebSocket Path", "/socket.io/", "/semblance_back/socket.io/"],
["Frontend Port", "5173 (Vite dev server)", "Static assets via Nginx"],
["Backend Port", "5137 (Hypercorn)", "5137 (proxied via Nginx)"],
["MSAL Redirect", "http://localhost:5173/", "https://optical-dev.oliver.solutions/semblance"],
],
col_widths=[110, (CONTENT_WIDTH - 118) / 2, (CONTENT_WIDTH - 118) / 2],
))
e.append(sp())
e.append(h2("Application Factory Pattern", "ch2_factory"))
e.append(p(
"The backend uses an application factory pattern (create_app() in app/__init__.py) "
"that initializes the Quart app, configures CORS, registers 7 route blueprints, sets up "
"JWT authentication, initializes the WebSocket manager, and starts the AI runner service. "
"Key configuration:"
))
e.append(bullet("JWT Secret \u2014 From JWT_SECRET_KEY environment variable"))
e.append(bullet("Token Expiry \u2014 86,400 seconds (24 hours)"))
e.append(bullet("Max Upload \u2014 16 MB"))
e.append(bullet("Request Timeout \u2014 300 seconds (5 minutes)"))
e.append(bullet("CORS \u2014 allow_origin=\"*\" for all methods"))
e.append(sp())
return e
def build_chapter_3(rendered):
"""Chapter 3: Frontend Architecture"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(3, "Frontend Architecture", "React SPA Structure and Patterns"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Frontend Architecture", "ch3"))
e.append(h2("Technology Stack", "ch3_stack"))
e.append(p(
"The frontend is a React 18 single-page application built with TypeScript and Vite. "
"UI components use shadcn-ui (Radix UI primitives) styled with Tailwind CSS. "
"Data fetching uses TanStack Query with form handling via React Hook Form and Zod validation."
))
e.append(sp())
e.append(h2("Provider Hierarchy", "ch3_providers"))
e.append(p(
"App.tsx wraps the entire application in a nested provider hierarchy. Each provider "
"adds a layer of functionality accessible throughout the component tree:"
))
e.append(styled_table(
["Provider", "Purpose", "Key State"],
[
["QueryClientProvider", "TanStack Query data fetching and caching", "Query cache, stale-while-revalidate"],
["BrowserRouter", "Client-side routing with dynamic base path", "Route location, navigation"],
["MsalProvider", "Microsoft Azure AD authentication", "MSAL instance, account info"],
["AuthProvider", "JWT token management and session persistence", "user, token, isAuthenticated"],
["WebSocketProvider", "Singleton Socket.IO connection management", "socketId, connection state"],
["NavigationProvider", "Navigation state and focus group context", "previousRoute, focusGroupId, folderId"],
],
col_widths=[110, (CONTENT_WIDTH - 200) * 0.55, (CONTENT_WIDTH - 200) * 0.45],
))
e.append(sp())
e.extend(diagram(rendered, "frontend_components",
"Figure 3.1 \u2014 Frontend Component Hierarchy"))
e.append(h2("Route Structure", "ch3_routes"))
e.append(styled_table(
["Path", "Component", "Auth", "Description"],
[
["/", "Index", "No", "Landing page with platform overview"],
["/login", "Login", "No", "Authentication (local + Microsoft OAuth)"],
["/synthetic-users", "SyntheticUsers", "Yes", "Persona library and management"],
["/synthetic-users/:id", "PersonaProfile", "Yes", "Individual persona detail view"],
["/focus-groups", "FocusGroups", "Yes", "Focus group listing and creation"],
["/focus-groups/:id", "FocusGroupSession", "Yes", "Live session interface (multi-panel)"],
["/dashboard", "Dashboard", "Yes", "Analytics and research metrics"],
],
col_widths=[115, 100, 30, CONTENT_WIDTH - 253],
))
e.append(sp())
e.append(h2("State Management Strategy", "ch3_state"))
e.append(p(
"The application uses a layered state management approach rather than a single global store:"
))
e.append(bullet(
"Global Persisted State \u2014 AuthContext (JWT + user in localStorage), "
"NavigationContext (route history in localStorage), TanStack Query cache (server data)."
))
e.append(bullet(
"Component State \u2014 React hooks for UI state (tabs, modals, filters), "
"React Hook Form for form state, temporary editing data."
))
e.append(bullet(
"WebSocket State \u2014 Connection status, real-time updates dispatched as "
"window CustomEvents (ws:message_update, ws:ai_status_update, etc.)."
))
e.append(sp())
e.append(h2("Component Organization", "ch3_components"))
e.append(styled_table(
["Directory", "Contents"],
[
["src/components/ui/", "Reusable shadcn-ui components (Button, Card, Dialog, Tabs, etc.) plus custom components (ProgressModal, MentionInput, SaveStatusIndicator)"],
["src/components/focus-group-session/", "25+ components for the session interface: DiscussionPanel, ParticipantPanel, ThemesPanel, AnalyticsPanel, ReasoningPanel, NotesPanel, AutonomousDashboard"],
["src/components/persona/", "Persona profile viewing and editing: PersonaProfile, PersonaEditor, PersonaPersonality, PersonaAttitudinalProfile, PersonaScenarios"],
["src/components/dashboard/", "Dashboard analytics: StatCard, OverviewTab, UsersTab, FocusGroupsTab"],
["src/components/auth/", "Authentication: MsalProvider (Azure AD setup)"],
["src/hooks/", "Custom hooks: useWebSocket, useCancellableGeneration, usePersonaFiltering, useFocusGroupAutoSave, useFolderManagement"],
["src/services/", "WebSocket singleton service with event dispatching via window CustomEvents"],
["src/types/", "TypeScript type definitions: Persona (70+ fields), CancellableTask, NavigationState"],
],
col_widths=[160, CONTENT_WIDTH - 168],
))
e.append(sp())
return e
def build_chapter_4(rendered):
"""Chapter 4: Backend Architecture"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(4, "Backend Architecture", "Services, Routes, and Prompt Templates"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Backend Architecture", "ch4"))
e.append(h2("ASGI Application Stack", "ch4_stack"))
e.append(p(
"The backend is a Quart application (async Flask) running under Hypercorn, an ASGI server. "
"The ASGI stack layers python-socketio on top of the Quart app, enabling both HTTP and "
"WebSocket communication through a single server process on port 5137."
))
e.append(p(
"A dedicated AI runner thread with its own asyncio event loop handles autonomous conversation "
"execution, isolated from the main request-handling event loop. This avoids Motor (async MongoDB) "
"event loop affinity issues and prevents long-running AI operations from blocking HTTP requests."
))
e.append(sp())
e.append(h2("Service Layer", "ch4_services"))
e.append(p(
"Business logic is organized into 19 service modules, each responsible for a specific domain. "
"Services are stateless (except the AI Runner singleton) and communicate through function calls, "
"the LLM service, and the WebSocket manager."
))
e.extend(diagram(rendered, "backend_services",
"Figure 4.1 \u2014 Backend Service Architecture"))
e.append(h3("Core Services", "ch4_core"))
e.append(mono_table(
["Service", "File", "Purpose"],
[
["LLMService", "llm_service.py", "Multi-model abstraction (Gemini, GPT-4.1, GPT-5.2) with retry logic and JSON parsing"],
["WebSocketManager", "websocket_manager_async.py", "Room-based messaging, event emission, connection tracking"],
["TaskManager", "task_manager.py", "CancellableTask wrapper for long-running operations with per-user tracking"],
["PromptLoader", "utils/prompt_loader.py", "Loads and interpolates 20 markdown prompt templates"],
],
col_widths=[110, 120, CONTENT_WIDTH - 238],
))
e.append(sp())
e.append(h3("AI / Conversation Services", "ch4_ai"))
e.append(mono_table(
["Service", "File", "Purpose"],
[
["AIRunnerService", "ai_runner_service.py", "Singleton: dedicated thread + event loop for autonomous conversations"],
["AutonomousConversation Controller", "autonomous_conversation_controller.py", "State machine orchestrating multi-persona conversation flow"],
["ConversationDecision Service", "conversation_decision_service.py", "LLM-driven decision engine: next speaker, action type, probing"],
["ConversationContext Service", "conversation_context_service.py", "Aggregates messages, participants, and state for LLM context"],
["ConversationState Manager", "conversation_state_manager.py", "Tracks participation metrics, sentiment, energy levels"],
],
col_widths=[110, 140, CONTENT_WIDTH - 258],
))
e.append(sp())
e.append(h3("Persona Services", "ch4_persona"))
e.append(mono_table(
["Service", "File", "Purpose"],
[
["AIPersonaService", "ai_persona_service.py", "Two-stage persona generation with customer data integration"],
["PersonaModification Service", "persona_modification_service.py", "AI-assisted persona editing"],
["PersonaExportService", "persona_export_service.py", "Individual persona profile export"],
["BulkExportService", "bulk_persona_export_service.py", "Batch persona export (MD/JSON/CSV)"],
["CustomerDataService", "customer_data_service.py", "Upload and integrate research data into generation"],
],
col_widths=[110, 145, CONTENT_WIDTH - 263],
))
e.append(sp())
e.append(h3("Focus Group Services", "ch4_fg"))
e.append(mono_table(
["Service", "File", "Purpose"],
[
["FocusGroupService", "focus_group_service.py", "CRUD operations and discussion guide generation"],
["FocusGroupResponse Service", "focus_group_response_service.py", "Generate persona responses with personality-driven prompts"],
["KeyThemeService", "key_theme_service.py", "AI-powered theme extraction from conversation messages"],
["AIModeratorService", "ai_moderator_service.py", "AI moderator intervention and discussion guidance"],
["FocusGroupSummary Service", "focus_group_summary_service.py", "Comprehensive session summary generation"],
["ImageDescription Service", "image_description_service.py", "Multimodal image description for uploaded assets"],
],
col_widths=[110, 145, CONTENT_WIDTH - 263],
))
e.append(sp())
e.append(h2("Prompt Template System", "ch4_prompts"))
e.append(p(
"The backend uses 20 markdown prompt templates stored in /backend/prompts/. "
"The PromptLoader utility loads these files and interpolates context variables (persona data, "
"conversation history, discussion guide) before sending them to the LLM service."
))
e.append(styled_table(
["Template", "Used By"],
[
["persona-basic-generation.md", "Stage 1 persona generation (demographics)"],
["persona-detailed-generation.md", "Stage 2 persona expansion (full profile)"],
["persona-system.md", "System prompt for persona-as-character responses"],
["focus-group-response.md", "In-session persona response generation"],
["conversation-decision-engine.md", "Autonomous mode: next action decision"],
["conversation-participant-selection.md", "Autonomous mode: speaker selection"],
["ai-moderator-system.md", "AI moderator system prompt"],
["probe-generation-prompt.md", "Probing question generation"],
["key-theme-extraction.md", "Theme extraction from conversation"],
["discussion-guide-generation.md", "Structured discussion guide creation"],
["focus-group-summary-generation.md", "Post-session summary generation"],
["audience-brief-enhancement.md", "Research brief AI enhancement"],
["image-description.md", "Multimodal image analysis"],
["persona-interaction-prompt.md", "Persona-to-persona interaction"],
["persona-to-persona-response.md", "Inter-persona conversation"],
["persona-modification.md", "AI-assisted persona editing"],
["persona-summary-generation.md", "Persona summary for display"],
["persona-download-summary.md", "Export summary format"],
["persona-profile-export.md", "Full profile export format"],
["key-theme-system.md", "Theme extraction system prompt"],
],
col_widths=[175, CONTENT_WIDTH - 183],
))
e.append(sp())
return e
def build_chapter_5(rendered):
"""Chapter 5: Data Model"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(5, "Data Model", "MongoDB Collections and Relationships"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Data Model", "ch5"))
e.append(h2("Collections Overview", "ch5_overview"))
e.append(p(
"Semblance stores data in MongoDB across four primary collections. Documents use MongoDB "
"ObjectId references for relationships rather than foreign key constraints."
))
e.extend(diagram(rendered, "entity_relationship",
"Figure 5.1 \u2014 Entity Relationship Diagram"))
e.append(h2("User Collection", "ch5_user"))
e.append(p(
"Stores authentication credentials and profile information. Supports dual auth types."
))
e.append(styled_table(
["Field", "Type", "Description"],
[
["_id", "ObjectId", "Primary key"],
["username", "String (unique)", "Login identifier"],
["email", "String (unique)", "Email address"],
["password_hash", "String", "bcrypt-hashed password"],
["role", "String", "User role (default: \"user\")"],
["auth_type", "String", "\"local\" or \"microsoft\""],
["microsoft_id", "String", "Azure AD object ID (optional)"],
],
col_widths=[100, 100, CONTENT_WIDTH - 208],
))
e.append(sp())
e.append(h2("Persona Collection", "ch5_persona"))
e.append(p(
"The richest data model with 70+ fields covering demographics, OCEAN personality traits, "
"behavioral attributes, motivations, scenarios, and AI-generated content."
))
e.append(h3("Core Fields", "ch5_persona_core"))
e.append(styled_table(
["Field", "Type", "Description"],
[
["_id", "ObjectId", "Primary key"],
["name", "String", "Persona display name"],
["age, gender, occupation", "String", "Demographics"],
["location, education", "String", "Geographic and educational background"],
["personality", "String", "Personality summary text"],
["techSavviness", "Number", "Technology comfort level (0\u2013100)"],
["created_by", "ObjectId", "Reference to User who created this persona"],
["folder_ids", "Array[ObjectId]", "Folders this persona belongs to (many-to-many)"],
],
col_widths=[115, 100, CONTENT_WIDTH - 223],
))
e.append(sp())
e.append(h3("Personality & Psychographic Fields", "ch5_persona_ocean"))
e.append(styled_table(
["Field", "Type", "Description"],
[
["oceanTraits", "Object", "OCEAN scores (0\u2013100): openness, conscientiousness, extraversion, agreeableness, neuroticism"],
["thinkFeelDo", "Object", "Arrays of thinks, feels, does statements"],
["goals", "Array[String]", "Life and professional goals"],
["frustrations", "Array[String]", "Pain points and frustrations"],
["motivations", "Array[String]", "Driving motivations"],
["selfDeterminationNeeds", "Object", "Autonomy, competence, relatedness assessments"],
["scenarios", "Array[String]", "Behavioral scenario descriptions"],
["aiSynthesizedBio", "String", "AI-generated narrative biography (2\u20133 lines)"],
],
col_widths=[125, 85, CONTENT_WIDTH - 218],
))
e.append(sp())
e.append(h2("Focus Group Collection", "ch5_fg"))
e.append(p(
"Stores session configuration, participant references, conversation messages, themes, and "
"discussion guide. Messages and themes are embedded documents within the focus group."
))
e.append(styled_table(
["Field", "Type", "Description"],
[
["_id", "ObjectId", "Primary key"],
["name, description", "String", "Session title and research topic"],
["status", "String", "\"new\", \"manual_mode\", \"ai_mode\", or \"completed\""],
["llm_model", "String", "Selected LLM model (default: gemini-3-pro-preview)"],
["reasoning_effort", "String", "GPT-5.2 reasoning level (minimal/low/medium/high)"],
["participants", "Array[Object]", "Participant objects with persona_id references"],
["messages", "Array[Object]", "Conversation messages (sender, text, timestamp, sentiment)"],
["themes", "Array[Object]", "Extracted themes with supporting quotes"],
["discussion_guide", "Object", "Structured guide with sections and items"],
["created_by", "ObjectId", "Reference to User"],
["autonomous_started_at", "DateTime", "Timestamp of autonomous mode start"],
],
col_widths=[120, 90, CONTENT_WIDTH - 218],
))
e.append(sp())
e.append(h2("Folder Collection", "ch5_folder"))
e.append(p(
"Hierarchical folder structure for organizing personas. Supports two-level nesting."
))
e.append(styled_table(
["Field", "Type", "Description"],
[
["_id", "ObjectId", "Primary key"],
["name", "String", "Folder display name"],
["parent_folder_id", "ObjectId", "Parent folder reference (null for root)"],
["level", "Number", "Depth level (0 = root, 1 = child, max depth: 2)"],
["created_by", "ObjectId", "Reference to User"],
],
col_widths=[115, 80, CONTENT_WIDTH - 203],
))
e.append(sp())
e.append(h2("Relationships", "ch5_rels"))
e.append(bullet("User \u2192 Persona (1:N) \u2014 created_by field on Persona"))
e.append(bullet("User \u2192 Focus Group (1:N) \u2014 created_by field on Focus Group"))
e.append(bullet("User \u2192 Folder (1:N) \u2014 created_by field on Folder"))
e.append(bullet("Persona \u2194 Focus Group (M:N) \u2014 participants array in Focus Group references persona IDs"))
e.append(bullet("Persona \u2194 Folder (M:N) \u2014 folder_ids array on Persona references folder IDs"))
e.append(bullet("Folder \u2192 Folder (1:N) \u2014 parent_folder_id for hierarchy"))
e.append(sp())
return e
def build_chapter_6(rendered):
"""Chapter 6: Authentication & Authorization"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(6, "Authentication", "JWT, Microsoft OAuth, and WebSocket Auth"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Authentication & Authorization", "ch6"))
e.append(h2("Dual Authentication System", "ch6_dual"))
e.append(p(
"Semblance supports two authentication methods that produce identical JWT tokens, "
"allowing the rest of the application to be auth-method-agnostic:"
))
e.append(bullet(
"Local JWT \u2014 Username/password authentication with bcrypt password hashing. "
"Tokens are HS256-signed with a 24-hour expiry."
))
e.append(bullet(
"Microsoft OAuth (MSAL) \u2014 Azure AD authentication via MSAL popup flow. "
"The frontend obtains an idToken from Azure, sends it to the backend for validation, "
"and receives a Semblance JWT in return."
))
e.append(sp())
e.extend(diagram(rendered, "auth_flow",
"Figure 6.1 \u2014 Authentication Flow (Local JWT and Microsoft OAuth)"))
e.append(h2("JWT Token Lifecycle", "ch6_jwt"))
e.append(styled_table(
["Property", "Value"],
[
["Algorithm", "HS256"],
["Expiration", "24 hours"],
["Claims", "sub (user_id as string)"],
["Storage", "localStorage (auth_token key)"],
["Header Format", "Authorization: Bearer {token}"],
["Validation", "Checked on every API request via interceptor; validated server-side via @jwt_required decorator"],
],
col_widths=[120, CONTENT_WIDTH - 128],
))
e.append(sp())
e.append(h2("Frontend Token Management", "ch6_frontend"))
e.append(p(
"The Axios API client includes request and response interceptors for automatic token management:"
))
e.append(bullet("Request interceptor \u2014 Extracts JWT from localStorage, validates expiration by decoding the payload, attaches as Bearer token header."))
e.append(bullet("Response interceptor \u2014 Catches 401 responses, dispatches auth_error_event, clears localStorage, redirects to /login."))
e.append(bullet("Session restoration \u2014 On app mount, checks for existing token in localStorage, validates via GET /auth/me, restores session if valid."))
e.append(sp())
e.append(h2("WebSocket Authentication", "ch6_ws"))
e.append(p(
"Socket.IO connections authenticate via JWT token passed in the auth parameter during "
"the connection handshake. The server validates the token before accepting the connection:"
))
e.append(bullet("Client sends: auth: { token: jwt_token } in Socket.IO connection options"))
e.append(bullet("Server extracts and validates the JWT on the connect event"))
e.append(bullet("On success: emits connected event with session info"))
e.append(bullet("On failure: emits auth_error event and disconnects"))
e.append(sp())
e.append(h2("Route Protection", "ch6_protection"))
e.append(p(
"Backend routes are protected with the @jwt_required() decorator (custom Quart-compatible "
"implementation using PyJWT directly, not Flask-JWT-Extended). The decorator validates the "
"Authorization header and makes the user identity available via get_jwt_identity()."
))
e.append(p(
"Frontend routes use the ProtectedRoute wrapper component that checks AuthContext "
"isAuthenticated state and redirects unauthenticated users to /login."
))
e.append(sp())
return e
def build_chapter_7(rendered):
"""Chapter 7: Real-Time Communication"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(7, "Real-Time Communication", "WebSocket Architecture and Events"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Real-Time Communication", "ch7"))
e.append(h2("Socket.IO Architecture", "ch7_arch"))
e.append(p(
"The application uses Socket.IO for bidirectional real-time communication between the "
"React frontend and Quart backend. The backend uses python-socketio's AsyncServer "
"(native ASGI compatibility), while the frontend uses the socket.io-client library "
"wrapped in a singleton service."
))
e.append(sp())
e.append(h2("Room-Based Messaging", "ch7_rooms"))
e.append(p(
"Each focus group session has a dedicated Socket.IO room. When a user opens a session, "
"the client emits a join_focus_group event, and the server adds the connection to "
"the room. All subsequent events (messages, status updates, theme discoveries) are broadcast "
"to the room, enabling multiple observers to watch a live session."
))
e.append(sp())
e.extend(diagram(rendered, "websocket_flow",
"Figure 7.1 \u2014 WebSocket Communication Flow"))
e.append(h2("Frontend Event Dispatching", "ch7_dispatch"))
e.append(p(
"The frontend uses a hybrid approach: the WebSocket singleton service binds specific listeners "
"for known events, then re-dispatches all events as window CustomEvents with a ws: prefix. "
"This decouples React components from the WebSocket implementation:"
))
e.append(note(
"Pattern: Socket.IO event \"message_update\" \u2192 window.dispatchEvent(new CustomEvent(\"ws:message_update\", {detail: payload})). "
"Components listen via window.addEventListener without needing direct socket references."
))
e.append(sp())
e.append(h2("WebSocket Event Catalog", "ch7_events"))
e.append(h3("Client \u2192 Server", "ch7_c2s"))
e.append(styled_table(
["Event", "Payload", "Purpose"],
[
["connect", "auth: {token}", "Authenticate WebSocket connection"],
["join_focus_group", "{focus_group_id}", "Join session room"],
["leave_focus_group", "{focus_group_id}", "Leave session room"],
["cancel_task", "{task_id}", "Cancel running AI task"],
],
col_widths=[120, 120, CONTENT_WIDTH - 248],
))
e.append(sp())
e.append(h3("Server \u2192 Client", "ch7_s2c"))
e.append(styled_table(
["Event", "Purpose"],
[
["connected", "Connection success confirmation with session info"],
["auth_error", "Authentication failure notification"],
["joined_focus_group", "Room join confirmation"],
["message_update", "New message in conversation (includes sender, text, timestamp)"],
["ai_status_update", "AI mode status change (running/paused/completed/error)"],
["moderator_status_update", "Moderator action notification"],
["theme_update", "Key theme discovered or updated"],
["focus_group_update", "Focus group properties changed"],
["mode_event_update", "Session mode switch (manual \u2194 autonomous)"],
["analytics_update", "Conversation analytics data"],
["conversation_state_update", "Conversation state change"],
["task_started / task_completed", "Long-running task lifecycle events"],
["task_cancelled / task_failed", "Task termination events"],
["bulk_export_progress", "Export operation progress percentage"],
],
col_widths=[155, CONTENT_WIDTH - 163],
))
e.append(sp())
e.append(h2("Reconnection Strategy", "ch7_reconnect"))
e.append(p(
"The Socket.IO client is configured with automatic reconnection. On reconnect, the service "
"rebinds all event listeners (to survive reconnection cycles) and auto-rejoins the previous "
"focus group room. Token refresh is attempted before reconnection to handle expired sessions."
))
e.append(styled_table(
["Setting", "Value"],
[
["Transport", "WebSocket only (no polling fallback)"],
["Reconnection", "Enabled (automatic)"],
["Connection Timeout", "60 seconds"],
["Ping Interval", "45 seconds"],
["Ping Timeout", "120 seconds"],
],
col_widths=[140, CONTENT_WIDTH - 148],
))
e.append(sp())
return e
def build_chapter_8(rendered):
"""Chapter 8: AI/LLM Integration"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(8, "AI/LLM Integration", "Multi-Model Service and Task Management"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("AI/LLM Integration", "ch8"))
e.append(h2("Multi-Model LLM Service", "ch8_llm"))
e.append(p(
"The LLMService provides a unified interface for all AI operations, abstracting away "
"provider differences between Google Gemini and OpenAI. Each call creates a fresh client "
"instance to avoid event loop affinity issues in the async ASGI environment."
))
e.append(styled_table(
["Model", "Provider", "API", "Special Parameters"],
[
["gemini-3-pro-preview", "Google", "genai.Client.generate_content()", "temperature, max_tokens"],
["gpt-4.1", "OpenAI", "chat.completions.create()", "temperature, max_tokens"],
["gpt-5.2", "OpenAI", "responses.create()", "reasoning_effort (minimal/low/medium/high), verbosity (low/medium/high)"],
],
col_widths=[100, 55, 135, CONTENT_WIDTH - 298],
))
e.append(sp())
e.extend(diagram(rendered, "llm_pipeline",
"Figure 8.1 \u2014 LLM Request Pipeline"))
e.append(h2("Retry and Error Handling", "ch8_retry"))
e.append(p(
"All LLM calls are wrapped in a retry mechanism with exponential backoff. On failure, "
"the service retries up to 3 times with delays of 1s, 2s, and 4s. The service also "
"handles JSON parsing of responses, stripping markdown code blocks when needed."
))
e.append(sp())
e.append(h2("AI Runner Service", "ch8_runner"))
e.append(p(
"The AIRunnerService is a singleton that manages a dedicated background thread with "
"its own asyncio event loop. This isolation ensures:"
))
e.append(bullet("Autonomous conversations don't block HTTP request handling"))
e.append(bullet("Motor (async MongoDB driver) runs on a consistent event loop"))
e.append(bullet("Thread-safe task registry enables concurrent conversation management"))
e.append(sp())
e.append(p("Lifecycle:"))
e.append(bullet("init_ai_runner() \u2014 Called on app startup, creates dedicated thread and event loop"))
e.append(bullet("submit_conversation() \u2014 Schedules conversation coroutine on the AI event loop"))
e.append(bullet("stop_conversation() \u2014 Cancels a specific conversation by focus group ID"))
e.append(bullet("Shutdown \u2014 Graceful cleanup: cancels all running tasks, stops event loop, joins thread"))
e.append(sp())
e.append(h2("Task Management", "ch8_tasks"))
e.append(p(
"Long-running operations (persona generation, theme extraction, summary generation) are wrapped "
"in CancellableTask objects tracked by the TaskManager. Each task has a unique ID, "
"is associated with a user, and can be cancelled via the REST API or WebSocket."
))
e.append(p(
"On the frontend, the useCancellableGeneration hook manages task lifecycle state "
"and listens for WebSocket task events (task_started, task_completed, task_failed, task_cancelled)."
))
e.append(sp())
return e
def build_chapter_9(rendered):
"""Chapter 9: Core Feature Flows"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(9, "Core Feature Flows", "Personas, Focus Groups, and Autonomous AI"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Core Feature Flows", "ch9"))
# --- Persona Generation Pipeline ---
e.append(h2("Persona Generation Pipeline", "ch9_persona"))
e.append(p(
"Persona generation uses a two-stage pipeline that progressively builds detail. "
"The first stage generates basic demographic profiles from an audience brief, allowing "
"the user to review before the second stage expands each into a fully detailed persona."
))
e.extend(diagram(rendered, "persona_generation",
"Figure 9.1 \u2014 Two-Stage Persona Generation Pipeline"))
e.append(p(
"Stage 1 \u2014 Basic Profiles: The LLM receives the audience brief (optionally AI-enhanced) "
"and generates N basic profiles with name, age, gender, occupation, location, personality summary, "
"interests, and tech savviness. Optional customer data files are included for grounding."
))
e.append(p(
"Stage 2 \u2014 Detailed Personas: Each basic profile is expanded individually. The LLM adds "
"OCEAN personality traits (scored 0\u2013100), goals, frustrations, motivations, behavioral scenarios, "
"Think-Feel-Do mappings, self-determination needs, and an AI-synthesized biography. Each completed "
"persona is saved to MongoDB immediately."
))
e.append(sp())
# --- Focus Group Lifecycle ---
e.append(h2("Focus Group Session Lifecycle", "ch9_lifecycle"))
e.append(p(
"Focus groups progress through a state machine with four primary states. Users can switch "
"between manual and autonomous modes during a live session."
))
e.extend(diagram(rendered, "focus_group_states",
"Figure 9.2 \u2014 Focus Group State Machine"))
e.append(styled_table(
["State", "Description", "Transitions"],
[
["new", "Freshly created, no session started", "Configure \u2192 setup"],
["setup", "Configuring guide, participants, settings", "Start Manual or Start Autonomous"],
["manual_mode", "User-controlled moderation", "Switch to AI, End Session"],
["ai_mode", "Autonomous AI-driven conversation", "Switch to Manual, End Session, Guide Complete"],
["completed", "Session finished", "Terminal state"],
["error", "AI error or timeout", "Resume Manually, Terminal"],
],
col_widths=[85, (CONTENT_WIDTH - 93) * 0.5, (CONTENT_WIDTH - 93) * 0.5],
))
e.append(sp())
# --- Autonomous Conversation System ---
e.append(h2("Autonomous Conversation System", "ch9_auto"))
e.append(p(
"The autonomous conversation controller orchestrates multi-persona discussions without human "
"intervention. It runs in a dedicated thread (via AIRunnerService) and uses the LLM-powered "
"decision engine to determine each action."
))
e.extend(diagram(rendered, "autonomous_conversation",
"Figure 9.3 \u2014 Autonomous Conversation Loop (One Iteration)"))
e.append(h3("Decision Engine Actions", "ch9_decisions"))
e.append(styled_table(
["Action", "Description"],
[
["participant_respond", "Selected persona provides a contextual response"],
["moderator_speak", "AI moderator advances discussion or redirects"],
["participant_interaction", "Two personas engage in direct dialogue"],
["probe_trigger", "Probing question to deepen exploration"],
["end_session", "Conclude the conversation (guide complete or limits reached)"],
],
col_widths=[130, CONTENT_WIDTH - 138],
))
e.append(sp())
e.append(h3("Safety Limits", "ch9_safety"))
e.append(styled_table(
["Limit", "Value", "Purpose"],
[
["Max Actions", "500", "Prevents runaway conversations"],
["Max Consecutive Silence", "3", "Ends session if no meaningful responses"],
["Response Timeout", "30 seconds", "Prevents hanging on unresponsive LLM"],
["Inter-Action Delay", "3\u201310 seconds (random)", "Simulates natural conversation pace"],
["Dominance Threshold", "40%", "Flags when a participant dominates discussion"],
["Reasoning History", "Last 20 decisions", "Prevents repetitive decision patterns"],
],
col_widths=[120, 80, CONTENT_WIDTH - 208],
))
e.append(sp())
return e
def build_chapter_10(rendered):
"""Chapter 10: API Reference"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(10, "API Reference", "REST Endpoint Catalog"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("API Reference", "ch10"))
e.append(p(
"The backend exposes 7 route groups via Flask/Quart blueprints. All endpoints except "
"authentication routes require a valid JWT token in the Authorization header."
))
# --- Auth ---
e.append(h2("/api/auth", "ch10_auth"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["POST", "/register", "No", "Register new user account"],
["POST", "/login", "No", "Login with username/password, returns JWT"],
["POST", "/microsoft", "No", "Microsoft MSAL authentication, returns JWT"],
["GET", "/me", "Yes", "Validate token and return user profile"],
],
col_widths=[45, 100, 30, CONTENT_WIDTH - 183],
))
e.append(sp())
# --- Personas ---
e.append(h2("/api/personas", "ch10_personas"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["GET", "/", "Yes", "Get current user's personas"],
["GET", "/all", "Yes", "Get all personas"],
["GET", "/:id", "Yes", "Get specific persona by ID"],
["POST", "/", "Yes", "Create new persona"],
["POST", "/batch", "Yes", "Create multiple personas"],
["PUT", "/:id", "Yes", "Update persona"],
["DELETE", "/:id", "Yes", "Delete persona"],
["POST", "/:id/export-profile", "Yes", "Export persona as formatted profile"],
["POST", "/bulk-export", "Yes", "Export multiple personas (MD/JSON/CSV)"],
["POST", "/:id/modify-with-ai", "Yes", "AI-assisted persona modification"],
],
col_widths=[45, 130, 30, CONTENT_WIDTH - 213],
))
e.append(sp())
# --- AI Personas ---
e.append(h2("/api/ai-personas", "ch10_ai"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["POST", "/generate-basic-profiles", "Yes", "Stage 1: Generate basic demographic profiles"],
["POST", "/generate-personas", "Yes", "Stage 2: Expand profiles to full personas"],
["POST", "/enhance-audience-brief", "Yes", "AI-enhance the audience brief"],
["POST", "/upload-customer-data", "Yes", "Upload research documents for grounding"],
["DELETE", "/cleanup-customer-data/:id", "Yes", "Clean up uploaded customer data"],
["POST", "/batch-generate-summaries", "Yes", "Generate summaries for multiple personas"],
],
col_widths=[50, 160, 30, CONTENT_WIDTH - 248],
))
e.append(sp())
# --- Focus Groups ---
e.append(h2("/api/focus-groups", "ch10_fg"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["GET", "/", "Yes", "Get user's focus groups"],
["GET", "/:id", "Yes", "Get specific focus group"],
["POST", "/", "Yes", "Create focus group"],
["PUT", "/:id", "Yes", "Update focus group"],
["DELETE", "/:id", "Yes", "Delete focus group"],
["POST", "/:id/participants", "Yes", "Add participant to group"],
["DELETE", "/:id/participants/:pid", "Yes", "Remove participant"],
["POST", "/:id/messages", "Yes", "Send message in session"],
["GET", "/:id/messages", "Yes", "Get session messages"],
["POST", "/:id/assets", "Yes", "Upload creative assets"],
["POST", "/:id/generate-discussion-guide", "Yes", "Generate AI discussion guide"],
["POST", "/:id/notes", "Yes", "Create session note"],
],
col_widths=[50, 170, 30, CONTENT_WIDTH - 258],
))
e.append(sp())
# --- Focus Group AI ---
e.append(h2("/api/focus-group-ai", "ch10_fgai"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["POST", "/generate-response", "Yes", "Generate persona response in session"],
["POST", "/generate-key-themes", "Yes", "Extract themes from conversation"],
["POST", "/autonomous/start/:id", "Yes", "Start autonomous conversation"],
["POST", "/autonomous/stop/:id", "Yes", "Stop autonomous conversation"],
["GET", "/autonomous/status/:id", "Yes", "Get autonomous mode status"],
["POST", "/moderator/advance/:id", "Yes", "Advance moderator to next topic"],
["POST", "/moderator/end-session/:id", "Yes", "End session via moderator"],
["GET", "/conversation/state/:id", "Yes", "Get conversation state"],
["GET", "/conversation/analytics/:id", "Yes", "Get conversation analytics"],
["POST", "/conversation/intervene/:id", "Yes", "Manual intervention in autonomous mode"],
],
col_widths=[45, 155, 30, CONTENT_WIDTH - 238],
))
e.append(sp())
# --- Folders ---
e.append(h2("/api/folders", "ch10_folders"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["GET", "/", "Yes", "Get folder hierarchy tree"],
["GET", "/:id", "Yes", "Get specific folder"],
["POST", "/", "Yes", "Create folder"],
["PUT", "/:id", "Yes", "Update folder"],
["DELETE", "/:id", "Yes", "Delete folder"],
["POST", "/:id/personas", "Yes", "Add persona to folder"],
["DELETE", "/:id/personas/:pid", "Yes", "Remove persona from folder"],
["POST", "/:id/personas/batch", "Yes", "Add multiple personas to folder"],
],
col_widths=[50, 135, 30, CONTENT_WIDTH - 223],
))
e.append(sp())
# --- Tasks ---
e.append(h2("/api/tasks", "ch10_tasks"))
e.append(styled_table(
["Method", "Path", "Auth", "Purpose"],
[
["DELETE", "/:task_id", "Yes", "Cancel a running task"],
["GET", "/user/:user_id", "Yes", "Get user's active tasks"],
],
col_widths=[50, 120, 30, CONTENT_WIDTH - 208],
))
e.append(sp())
return e
def build_chapter_11(rendered):
"""Chapter 11: Data Flow"""
e = []
e.append(NextPageTemplate("chapter"))
e.append(PageBreak())
e.append(ChapterTitlePage(11, "Data Flow", "End-to-End Request and Conversation Patterns"))
e.append(NextPageTemplate("content"))
e.append(PageBreak())
e.append(h1("Data Flow", "ch11"))
e.append(h2("End-to-End Request Flow", "ch11_e2e"))
e.append(p(
"A typical user interaction follows this path through the system:"
))
e.append(bullet("1. User Action \u2014 Click, form submit, or navigation event in the React SPA"))
e.append(bullet("2. API Request \u2014 Axios sends REST request with JWT Bearer token via the API client"))
e.append(bullet("3. Route Handler \u2014 Quart blueprint validates JWT, extracts user identity, calls service layer"))
e.append(bullet("4. Service Processing \u2014 Business logic executes: LLM calls, database operations, prompt templating"))
e.append(bullet("5. Data Persistence \u2014 MongoDB operations via PyMongo (or Motor in AI thread)"))
e.append(bullet("6. WebSocket Broadcast \u2014 For real-time operations, events are emitted to the focus group room"))
e.append(bullet("7. Response \u2014 JSON response returned to frontend; WebSocket events dispatched as window events"))
e.append(bullet("8. UI Update \u2014 React components re-render via TanStack Query cache invalidation or WebSocket event handlers"))
e.append(sp())
e.append(h2("AI Conversation Data Flow", "ch11_ai"))
e.append(p(
"During an autonomous conversation, data flows through a specialized pipeline:"
))
e.append(bullet("1. User starts autonomous mode \u2014 POST /autonomous/start/:id"))
e.append(bullet("2. AI Runner \u2014 Submits conversation coroutine to dedicated thread"))
e.append(bullet("3. Conversation Loop \u2014 Controller runs continuously until end condition"))
e.append(bullet("4. Context Assembly \u2014 ConversationContextService loads messages, participants, guide from MongoDB"))
e.append(bullet("5. Decision \u2014 ConversationDecisionService sends context + prompt to LLM, gets structured action"))
e.append(bullet("6. Execution \u2014 Controller executes action (generate response, moderate, probe)"))
e.append(bullet("7. Persistence \u2014 Message saved to MongoDB focus group document"))
e.append(bullet("8. Broadcast \u2014 WebSocketManager emits message_update and status events to room"))
e.append(bullet("9. Frontend \u2014 Window events trigger React component updates in real time"))
e.append(bullet("10. Loop \u2014 3\u201310 second delay, then repeat from step 4"))
e.append(sp())
e.append(h2("Key Architectural Patterns", "ch11_patterns"))
e.append(h3("Singleton WebSocket Service", "ch11_ws"))
e.append(p(
"The frontend creates a single Socket.IO instance at module level, shared across all components. "
"This prevents multiple socket connections and ensures consistent event routing. Events are "
"re-dispatched as window CustomEvents, decoupling React from the WebSocket implementation."
))
e.append(h3("Dedicated AI Thread", "ch11_thread"))
e.append(p(
"Autonomous conversations run in a dedicated Python thread with its own asyncio event loop. "
"This prevents long-running AI operations from blocking HTTP request handling and provides "
"a stable event loop for Motor (async MongoDB driver)."
))
e.append(h3("Two-Pass Document Rendering", "ch11_twopass"))
e.append(p(
"Focus group sessions use an event-driven architecture where database writes and WebSocket "
"broadcasts happen atomically. The WebSocket manager maintains room membership, ensuring "
"only connected observers receive updates."
))
e.append(h3("Prompt Template Engine", "ch11_prompts"))
e.append(p(
"All LLM prompts are externalized as markdown files in /backend/prompts/. The PromptLoader "
"reads templates at runtime and interpolates context variables. This separation enables "
"prompt iteration without code changes."
))
e.append(sp())
e.append(note(
"This document was auto-generated from the Semblance codebase. For the most current details, "
"consult the source code directly. Service files are in /backend/app/services/, route files "
"in /backend/app/routes/, and frontend components in /src/components/."
))
return e
# ============================================================================
# MAIN BUILD
# ============================================================================
def build_architecture_doc(output_path):
print("=" * 60)
print("Building Semblance Technical Architecture Document")
print("=" * 60)
# Create temp directory for Mermaid diagrams
diagram_dir = tempfile.mkdtemp(prefix="semblance_diagrams_")
print(f"\n Diagram directory: {diagram_dir}")
# Step 1: Render Mermaid diagrams
print("\n Rendering Mermaid diagrams...")
rendered = render_mermaid_diagrams(diagram_dir)
print(f" Rendered {len(rendered)}/{len(DIAGRAMS)} diagrams\n")
# Step 2: Build document
doc = ArchDocTemplate(output_path)
story = []
# Cover page
print(" Cover page...")
story.append(NextPageTemplate("cover"))
story.append(Spacer(1, 1))
# Table of Contents
print(" Table of Contents...")
story.append(NextPageTemplate("toc"))
story.append(PageBreak())
story.append(BookmarkedHeading("Table of Contents", STYLES["h1"], level=0, bookmark_name="toc"))
story.append(Spacer(1, 10))
toc = TableOfContents()
toc.levelStyles = [STYLES["toc0"], STYLES["toc1"], STYLES["toc2"]]
story.append(toc)
# Chapters
chapters = [
("Chapter 1: Executive Overview", build_chapter_1),
("Chapter 2: System Architecture", build_chapter_2),
("Chapter 3: Frontend Architecture", build_chapter_3),
("Chapter 4: Backend Architecture", build_chapter_4),
("Chapter 5: Data Model", build_chapter_5),
("Chapter 6: Authentication & Authorization", build_chapter_6),
("Chapter 7: Real-Time Communication", build_chapter_7),
("Chapter 8: AI/LLM Integration", build_chapter_8),
("Chapter 9: Core Feature Flows", build_chapter_9),
("Chapter 10: API Reference", build_chapter_10),
("Chapter 11: Data Flow", build_chapter_11),
]
for name, builder in chapters:
print(f" {name}...")
story.extend(builder(rendered))
# Build PDF (multi-pass for TOC)
print("\n Rendering PDF (multi-pass for TOC)...")
doc.multiBuild(story)
# Cleanup diagram temp files
try:
shutil.rmtree(diagram_dir)
except Exception:
pass
size_kb = os.path.getsize(output_path) / 1024
print(f"\nDone! {os.path.abspath(output_path)} ({size_kb:.0f} KB)")
if __name__ == "__main__":
output = sys.argv[1] if len(sys.argv) > 1 else os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
"semblance_architecture.pdf"
)
build_architecture_doc(output)