fix(report): PDF formatting — no empty lines, strip markdown, fix deprecated fpdf2 API

- sanitize_llm(): strips **bold**, ## headers, converts - bullets → •, collapses 3+ newlines
- body(): splits LLM text into paragraphs at \n\n, joins intra-paragraph lines into single
  line so multi_cell never creates blank gaps from double newlines
- quote_block(): collapse newlines in quote text to single space
- multi_cell(0, ...) → multi_cell(EPW, ...) for theme titles
- cell(0, 5, ..., ln=True) → cell(EPW, 5, ..., new_x=LMARGIN, new_y=NEXT)
- Participation analytics: ln=0/True → new_x/new_y API, track x_after_name correctly

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-05-25 12:50:02 +01:00
parent 6d2f59fe74
commit b74bd7cf99

View file

@ -1233,67 +1233,89 @@ async def download_full_report(focus_group_id):
WHITE = (255, 255, 255)
LIGHT_BG = (245, 245, 250)
EPW = pdf.epw # effective page width (no margins)
import re as _re
def sanitize_llm(text):
"""Strip markdown syntax and collapse excessive blank lines."""
# Remove markdown headings
text = _re.sub(r'^#{1,6}\s+', '', text, flags=_re.MULTILINE)
# Strip bold/italic markers
text = _re.sub(r'\*{1,3}([^*\n]+)\*{1,3}', r'\1', text)
# Convert markdown bullets to unicode bullet
text = _re.sub(r'^\s*[-*+]\s+', '', text, flags=_re.MULTILINE)
# Collapse 3+ newlines to 2
text = _re.sub(r'\n{3,}', '\n\n', text)
return text.strip()
def set_r(size=10, bold=False):
pdf.set_font("R", "B" if bold else "", size)
def h1(text):
pdf.ln(4)
set_r(16, bold=True)
pdf.set_text_color(*BRAND_ORANGE)
pdf.multi_cell(0, 8, text)
pdf.set_text_color(*DARK)
pdf.ln(2)
def h2(text):
pdf.ln(6)
set_r(12, bold=True)
pdf.set_text_color(*BRAND_ORANGE)
pdf.multi_cell(0, 7, text)
pdf.multi_cell(EPW, 7, text)
pdf.set_text_color(*DARK)
pdf.set_draw_color(*BRAND_ORANGE)
pdf.set_line_width(0.3)
pdf.line(pdf.get_x(), pdf.get_y(), pdf.get_x() + 170, pdf.get_y())
pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + EPW, pdf.get_y())
pdf.ln(3)
def body(text, color=None):
set_r(10)
pdf.set_text_color(*(color or DARK))
pdf.multi_cell(0, 5.5, text)
# Split into paragraphs; within each paragraph join lines to avoid
# mid-paragraph line breaks creating extra whitespace
paragraphs = [p.strip() for p in sanitize_llm(text).split('\n\n') if p.strip()]
for i, para in enumerate(paragraphs):
# Collapse single newlines inside a paragraph into a space
para_text = _re.sub(r'\n', ' ', para)
pdf.multi_cell(EPW, 5.5, para_text)
if i < len(paragraphs) - 1:
pdf.ln(2)
pdf.set_text_color(*DARK)
def meta_row(label, value):
set_r(9, bold=True)
# Bold muted label above normal value
set_r(8, bold=True)
pdf.set_text_color(*MUTED)
pdf.cell(40, 6, label + ":", ln=0)
set_r(9)
pdf.multi_cell(EPW, 5, label.upper())
set_r(10)
pdf.set_text_color(*DARK)
pdf.multi_cell(0, 6, value)
pdf.multi_cell(EPW, 5.5, value)
pdf.ln(2)
def quote_block(text):
pdf.set_fill_color(*LIGHT_BG)
pdf.set_draw_color(*BRAND_ORANGE)
pdf.set_line_width(0.5)
x = pdf.get_x()
lm = pdf.l_margin
y = pdf.get_y()
set_r(9)
pdf.set_text_color(*DARK)
pdf.set_x(x + 4)
pdf.multi_cell(0, 5, text, fill=False)
line_h = pdf.get_y() - y
pdf.line(x, y, x, y + line_h)
pdf.set_left_margin(lm + 5)
pdf.set_x(lm + 5)
# Clean quote text — collapse any newlines into spaces
clean = _re.sub(r'\n+', ' ', text.strip())
pdf.multi_cell(EPW - 5, 5, clean)
pdf.line(lm, y, lm, pdf.get_y())
pdf.set_left_margin(lm)
pdf.set_x(lm)
pdf.ln(1)
# Cover page
pdf.add_page()
EPW = pdf.epw
pdf.set_fill_color(*DARK)
pdf.rect(0, 0, 210, 60, "F")
pdf.set_y(15)
set_r(20, bold=True)
pdf.set_text_color(*BRAND_ORANGE)
pdf.cell(0, 12, L["title"], ln=True, align="C")
pdf.cell(EPW, 12, L["title"], new_x="LMARGIN", new_y="NEXT", align="C")
set_r(13)
pdf.set_text_color(*WHITE)
pdf.cell(0, 8, fg_name, ln=True, align="C")
pdf.cell(EPW, 8, fg_name, new_x="LMARGIN", new_y="NEXT", align="C")
pdf.set_text_color(*DARK)
pdf.set_y(70)
@ -1320,13 +1342,13 @@ async def download_full_report(focus_group_id):
pdf.ln(3)
set_r(11, bold=True)
pdf.set_text_color(*DARK)
pdf.multi_cell(0, 6, f"{i}. {t.get('title', '')}")
pdf.multi_cell(EPW, 6, f"{i}. {t.get('title', '')}")
body(t.get("description", ""), color=MUTED)
quotes = t.get("quotes", [])
if quotes:
set_r(9, bold=True)
pdf.set_text_color(*MUTED)
pdf.cell(0, 5, L["quotes"] + ":", ln=True)
pdf.cell(EPW, 5, L["quotes"] + ":", new_x="LMARGIN", new_y="NEXT")
pdf.set_text_color(*DARK)
for q in quotes[:5]:
if isinstance(q, str):
@ -1347,12 +1369,13 @@ async def download_full_report(focus_group_id):
bar_w = int(pct * 1.2)
set_r(9)
pdf.set_text_color(*DARK)
pdf.cell(50, 5, name, ln=0)
pdf.cell(50, 5, name, new_x="RIGHT", new_y="TOP")
pdf.set_fill_color(*BRAND_ORANGE)
y_bar = pdf.get_y() + 1
pdf.rect(pdf.get_x(), y_bar, bar_w, 3, "F")
pdf.set_x(pdf.get_x() + bar_w + 2)
pdf.cell(0, 5, f"{count} ({pct}%)", ln=True)
x_after_name = pdf.get_x()
pdf.rect(x_after_name, y_bar, bar_w, 3, "F")
pdf.set_x(x_after_name + bar_w + 2)
pdf.cell(EPW - 50 - bar_w - 2, 5, f"{count} ({pct}%)", new_x="LMARGIN", new_y="NEXT")
pdf.ln(2)
# Filename: ASCII-only slug