diff --git a/backend/Dockerfile b/backend/Dockerfile index 04fcc518..696a73f5 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -5,6 +5,7 @@ WORKDIR /app # Install system deps for bcrypt, pymongo, etc. RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ + fonts-dejavu-core \ && rm -rf /var/lib/apt/lists/* # Install Python dependencies first (layer caching) diff --git a/backend/app/routes/focus_groups.py b/backend/app/routes/focus_groups.py index 4871feb7..169e82d3 100755 --- a/backend/app/routes/focus_groups.py +++ b/backend/app/routes/focus_groups.py @@ -1093,9 +1093,48 @@ async def download_discussion_guide(focus_group_id): @focus_groups_bp.route('//report/download', methods=['GET']) @jwt_required() async def download_full_report(focus_group_id): - """Generate and download a full research report as markdown.""" + """Generate and download a full research report as PDF.""" from app.services.llm_service import LLMService, LLMServiceError - from app.utils.prompt_loader import load_prompt # noqa: PLC0415 + from app.utils.prompt_loader import load_prompt + import io + from fpdf import FPDF + + lang = request.args.get("lang", "ru") + + LABELS = { + "ru": { + "title": "Отчёт по фокус-группе", + "date": "Дата", + "participants": "Участники", + "messages": "Сообщений обменяно", + "brief": "Бриф исследования", + "exec_summary": "Резюме", + "key_decisions": "Ключевые выводы и решения", + "key_themes": "Ключевые темы", + "quotes": "Опорные цитаты", + "participation": "Аналитика участия", + "transcript": "Полная стенограмма", + "no_themes": "Темы ещё не сгенерированы.", + "language_name": "русском", + }, + "en": { + "title": "Focus Group Research Report", + "date": "Date", + "participants": "Participants", + "messages": "Messages exchanged", + "brief": "Research Brief", + "exec_summary": "Executive Summary", + "key_decisions": "Key Decisions & Conclusions", + "key_themes": "Key Themes", + "quotes": "Supporting Quotes", + "participation": "Participation Analytics", + "transcript": "Full Transcript", + "no_themes": "No themes have been generated yet.", + "language_name": "English", + }, + } + L = LABELS.get(lang, LABELS["en"]) + language_name = {"ru": "Russian", "en": "English"}.get(lang, "English") try: user_id = get_jwt_identity() @@ -1117,103 +1156,226 @@ async def download_full_report(focus_group_id): p = await Persona.find_by_id(pid) if p: personas.append(p) - participant_names = ", ".join(p.get("name", "Unknown") for p in personas) or "No participants" + participant_names = ", ".join(p.get("name", "Unknown") for p in personas) or "—" - # Messages - messages = await FocusGroup.get_messages(focus_group_id) + # Participation stats + messages_raw = await FocusGroup.get_messages(focus_group_id) + message_counts = {} chat_lines = [] - for m in messages: - sender = m.get("senderName") or m.get("senderId", "Unknown") + for m in messages_raw: + sender_id = m.get("senderId", "") + sender_name = m.get("senderName") or sender_id text = m.get("text", "").strip() - if text: - chat_lines.append(f"**{sender}:** {text}") - transcript_excerpt = "\n\n".join(chat_lines[:30]) or "No messages recorded." - full_transcript = "\n\n".join(chat_lines) or "No messages recorded." + if not text: + continue + # Find persona name + for p in personas: + if str(p.get("_id", "")) == sender_id or str(p.get("id", "")) == sender_id: + sender_name = p.get("name", sender_name) + break + chat_lines.append(f"{sender_name}: {text}") + if sender_id not in ("moderator", "facilitator"): + message_counts[sender_name] = message_counts.get(sender_name, 0) + 1 + + transcript_excerpt = "\n\n".join(chat_lines[:30]) or "—" # Themes themes = await FocusGroup.get_generated_themes(focus_group_id) generated_themes = [t for t in themes if t.get("source") == "generated"] themes_summary_lines = [] for t in generated_themes: - themes_summary_lines.append(f"**{t.get('title', 'Theme')}:** {t.get('description', '')}") - themes_summary = "\n".join(themes_summary_lines) or "No themes generated yet." + themes_summary_lines.append(f"{t.get('title', '')}: {t.get('description', '')}") + themes_summary = "\n".join(themes_summary_lines) or "—" - # Executive summary via LLM - prompt = load_prompt("report-executive-summary", variables={ - "topic": fg_topic, - "participant_count": len(personas), - "participant_names": participant_names, - "message_count": len(chat_lines), - "themes_summary": themes_summary, - "transcript_excerpt": transcript_excerpt, - }) + # LLM: executive summary try: - executive_summary = await LLMService.generate_content(prompt, model_name="mini") + exec_prompt = load_prompt("report-executive-summary", variables={ + "topic": fg_topic, + "participant_count": len(personas), + "participant_names": participant_names, + "message_count": len(chat_lines), + "themes_summary": themes_summary, + "transcript_excerpt": transcript_excerpt, + "language": language_name, + }) + executive_summary = await LLMService.generate_content(exec_prompt, model_name="mini") except LLMServiceError: - executive_summary = "Executive summary unavailable — LLM service error." + executive_summary = "—" - # Build markdown report - sanitized = "".join(c for c in fg_name if c.isalnum() or c in " -_").strip().replace(" ", "-").lower() - report_lines = [ - f"# Research Report: {fg_name}", - f"", - f"**Date:** {date_str} ", - f"**Participants:** {participant_names} ", - f"**Messages exchanged:** {len(chat_lines)} ", - f"", - f"---", - f"", - f"## Executive Summary", - f"", - executive_summary, - f"", - f"---", - f"", - f"## Key Themes", - f"", - ] + # LLM: key decisions + try: + dec_prompt = load_prompt("report-key-decisions", variables={ + "topic": fg_topic, + "participant_count": len(personas), + "participant_names": participant_names, + "research_brief": fg_topic, + "themes_summary": themes_summary, + "transcript_excerpt": transcript_excerpt, + "language": language_name, + }) + key_decisions = await LLMService.generate_content(dec_prompt, model_name="mini") + except LLMServiceError: + key_decisions = "—" + + # ── Build PDF ────────────────────────────────────────────────────────── + FONT_DIR = "/usr/share/fonts/truetype/dejavu" + FONT_REGULAR = f"{FONT_DIR}/DejaVuSans.ttf" + FONT_BOLD = f"{FONT_DIR}/DejaVuSans-Bold.ttf" + + pdf = FPDF() + pdf.set_auto_page_break(auto=True, margin=20) + pdf.add_font("R", "", FONT_REGULAR) + pdf.add_font("R", "B", FONT_BOLD) + + BRAND_ORANGE = (222, 143, 63) + DARK = (30, 30, 40) + MUTED = (110, 110, 130) + WHITE = (255, 255, 255) + LIGHT_BG = (245, 245, 250) + + def set_r(size=10, bold=False): + pdf.set_font("R", "B" if bold else "", size) + + def h1(text): + pdf.ln(4) + set_r(16, bold=True) + pdf.set_text_color(*BRAND_ORANGE) + pdf.multi_cell(0, 8, text) + pdf.set_text_color(*DARK) + pdf.ln(2) + + def h2(text): + pdf.ln(6) + set_r(12, bold=True) + pdf.set_text_color(*BRAND_ORANGE) + pdf.multi_cell(0, 7, text) + pdf.set_text_color(*DARK) + pdf.set_draw_color(*BRAND_ORANGE) + pdf.set_line_width(0.3) + pdf.line(pdf.get_x(), pdf.get_y(), pdf.get_x() + 170, pdf.get_y()) + pdf.ln(3) + + def body(text, color=None): + set_r(10) + pdf.set_text_color(*(color or DARK)) + pdf.multi_cell(0, 5.5, text) + pdf.set_text_color(*DARK) + + def meta_row(label, value): + set_r(9, bold=True) + pdf.set_text_color(*MUTED) + pdf.cell(40, 6, label + ":", ln=0) + set_r(9) + pdf.set_text_color(*DARK) + pdf.multi_cell(0, 6, value) + + def quote_block(text): + pdf.set_fill_color(*LIGHT_BG) + pdf.set_draw_color(*BRAND_ORANGE) + pdf.set_line_width(0.5) + x = pdf.get_x() + y = pdf.get_y() + set_r(9) + pdf.set_text_color(*DARK) + pdf.set_x(x + 4) + pdf.multi_cell(0, 5, text, fill=False) + line_h = pdf.get_y() - y + pdf.line(x, y, x, y + line_h) + pdf.ln(1) + + # Cover page + pdf.add_page() + pdf.set_fill_color(*DARK) + pdf.rect(0, 0, 210, 60, "F") + pdf.set_y(15) + set_r(20, bold=True) + pdf.set_text_color(*BRAND_ORANGE) + pdf.cell(0, 12, L["title"], ln=True, align="C") + set_r(13) + pdf.set_text_color(*WHITE) + pdf.cell(0, 8, fg_name, ln=True, align="C") + pdf.set_text_color(*DARK) + + pdf.set_y(70) + meta_row(L["date"], date_str) + meta_row(L["participants"], participant_names) + meta_row(L["messages"], str(len(chat_lines))) + if fg_topic and fg_topic != "Not specified": + pdf.ln(2) + h2(L["brief"]) + body(fg_topic) + + # Executive summary + h2(L["exec_summary"]) + body(executive_summary) + + # Key decisions + h2(L["key_decisions"]) + body(key_decisions) + + # Key themes + h2(L["key_themes"]) if generated_themes: for i, t in enumerate(generated_themes, 1): - report_lines.append(f"### {i}. {t.get('title', 'Theme')}") - report_lines.append(f"") - report_lines.append(t.get("description", "")) - report_lines.append(f"") + pdf.ln(3) + set_r(11, bold=True) + pdf.set_text_color(*DARK) + pdf.multi_cell(0, 6, f"{i}. {t.get('title', '')}") + body(t.get("description", ""), color=MUTED) quotes = t.get("quotes", []) if quotes: - report_lines.append("**Supporting Quotes:**") - report_lines.append("") - for q in quotes: + set_r(9, bold=True) + pdf.set_text_color(*MUTED) + pdf.cell(0, 5, L["quotes"] + ":", ln=True) + pdf.set_text_color(*DARK) + for q in quotes[:5]: if isinstance(q, str): - report_lines.append(f"> {q}") + qtext = q else: - speaker = q.get("speaker", "") - text = q.get("text", "") - report_lines.append(f"> **{speaker}:** {text}" if speaker else f"> {text}") - report_lines.append("") - report_lines.append("---") - report_lines.append("") + sp = q.get("speaker", "") + qtext = f"{sp}: {q.get('text', '')}" if sp else q.get("text", "") + quote_block(qtext) else: - report_lines.append("*No themes have been generated for this session yet.*") - report_lines.append("") - report_lines.append("---") - report_lines.append("") + body(L["no_themes"], color=MUTED) - report_lines += [ - f"## Full Transcript", - f"", - full_transcript, - f"", - ] + # Participation analytics + if message_counts: + h2(L["participation"]) + total = sum(message_counts.values()) or 1 + for name, count in sorted(message_counts.items(), key=lambda x: -x[1]): + pct = int(count / total * 100) + bar_w = int(pct * 1.2) + set_r(9) + pdf.set_text_color(*DARK) + pdf.cell(50, 5, name, ln=0) + pdf.set_fill_color(*BRAND_ORANGE) + y_bar = pdf.get_y() + 1 + pdf.rect(pdf.get_x(), y_bar, bar_w, 3, "F") + pdf.set_x(pdf.get_x() + bar_w + 2) + pdf.cell(0, 5, f"{count} ({pct}%)", ln=True) + pdf.ln(2) - content = "\n".join(report_lines) - filename = f"report-{sanitized}-{date_str}.md" + # Filename: ASCII-only slug + import unicodedata + import re + nfkd = unicodedata.normalize("NFKD", fg_name) + ascii_name = nfkd.encode("ascii", "ignore").decode("ascii") + slug = re.sub(r"[^a-z0-9]+", "-", ascii_name.lower()).strip("-") or "report" + filename = f"report-{slug}-{date_str}.pdf" + + buf = io.BytesIO() + pdf.output(buf) + buf.seek(0) + + from urllib.parse import quote as url_quote + safe_filename = url_quote(f"report-{fg_name}-{date_str}.pdf".encode("utf-8")) return Response( - content, - mimetype="text/markdown", + buf.read(), + mimetype="application/pdf", headers={ - "Content-Disposition": f'attachment; filename="{filename}"', - "Content-Type": "text/markdown; charset=utf-8", + "Content-Disposition": f"attachment; filename=\"{filename}\"; filename*=UTF-8''{safe_filename}", + "Content-Type": "application/pdf", }, ) diff --git a/backend/prompts/report-executive-summary.md b/backend/prompts/report-executive-summary.md index fcd299da..960c20b8 100644 --- a/backend/prompts/report-executive-summary.md +++ b/backend/prompts/report-executive-summary.md @@ -1,5 +1,7 @@ You are a qualitative research analyst. Write a concise executive summary for a focus group research report. +Respond entirely in {language}. Do not mix languages. + ## Research Session **Topic:** {topic} **Participants:** {participant_count} AI personas — {participant_names} diff --git a/backend/prompts/report-key-decisions.md b/backend/prompts/report-key-decisions.md new file mode 100644 index 00000000..7c6f562d --- /dev/null +++ b/backend/prompts/report-key-decisions.md @@ -0,0 +1,25 @@ +You are a senior qualitative research analyst preparing a stakeholder report. + +## Research Session +**Topic:** {topic} +**Participants:** {participant_count} personas — {participant_names} + +## Research Brief +{research_brief} + +## Key Themes +{themes_summary} + +## Transcript Excerpt +{transcript_excerpt} + +## Instructions +Respond entirely in {language}. Do not mix languages. + +Write a structured section called "Key Decisions & Conclusions" containing: + +1. **3–5 key findings** directly relevant to the research brief — what the group revealed about the product/service/topic +2. **Strategic recommendations** — 2–3 actionable next steps for the client based on the discussion +3. **Open questions** — 1–2 areas that need further research + +Be specific and evidence-based. Each finding should reference actual participant quotes or themes. Write in clear, professional prose suitable for a business stakeholder. Do not include a heading — start directly with the findings. diff --git a/backend/requirements.txt b/backend/requirements.txt index 89661951..74f0c08b 100755 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -32,3 +32,6 @@ python-dotenv==1.1.1 # Token estimation (used by backfill_usage.py script) tiktoken>=0.9.0 + +# PDF report generation +fpdf2>=2.8.0 diff --git a/src/components/ThemeHighlighter.tsx b/src/components/ThemeHighlighter.tsx index 5872432d..841571ee 100755 --- a/src/components/ThemeHighlighter.tsx +++ b/src/components/ThemeHighlighter.tsx @@ -16,14 +16,14 @@ interface ThemeHighlighterProps { onQuoteClick?: (quote: string | QuoteData, messageId?: string) => void; } -const ThemeHighlighter = ({ - themes, - messages, +const ThemeHighlighter = ({ + themes, + messages, personas = [], onThemeDelete, onQuoteClick }: ThemeHighlighterProps) => { - + const handleThemeDelete = (e: React.MouseEvent, themeId: string) => { e.stopPropagation(); if (onThemeDelete) { @@ -32,62 +32,40 @@ const ThemeHighlighter = ({ } }; - // Helper function to get persona name by ID const getPersona = (id: string) => { return personas.find(p => p.id === id || p._id === id); }; - // Helper function to parse attributed quotes const parseAttributedQuote = (quote: string) => { - // First, check if quote has message ID format and strip it let cleanQuote = quote; const msgIdMatch = quote.match(/^\[MSG_ID:[^\]]+\]\s*(.*)$/); - if (msgIdMatch) { - cleanQuote = msgIdMatch[1]; - } - - // Check if quote has attribution format [Name]: text + if (msgIdMatch) cleanQuote = msgIdMatch[1]; + const attributionMatch = cleanQuote.match(/^\[([^\]]+)\]:\s*(.*)$/); - if (attributionMatch) { - return { - persona: attributionMatch[1], - text: attributionMatch[2] - }; - } - - // Check if quote has simple attribution format Name: text (without brackets) + if (attributionMatch) return { persona: attributionMatch[1], text: attributionMatch[2] }; + const simpleAttributionMatch = cleanQuote.match(/^([^:]+):\s*(.*)$/); if (simpleAttributionMatch && simpleAttributionMatch[1].trim() !== cleanQuote.trim()) { - return { - persona: simpleAttributionMatch[1].trim(), - text: simpleAttributionMatch[2] - }; + return { persona: simpleAttributionMatch[1].trim(), text: simpleAttributionMatch[2] }; } - - // Fallback for quotes without attribution - return { - persona: null, - text: cleanQuote - }; + return { persona: null, text: cleanQuote }; }; - - - // Split themes into highlighted and generated - const highlightedThemes = themes.filter(theme => - 'source' in theme ? theme.source === 'highlight' : true // For backward compatibility + + const highlightedThemes = themes.filter(theme => + 'source' in theme ? theme.source === 'highlight' : true ) as HighlightedTheme[]; - - const generatedThemes = themes.filter(theme => + + const generatedThemes = themes.filter(theme => 'source' in theme && theme.source === 'generated' ) as GeneratedTheme[]; - + return (

Key Themes

- +
{generatedThemes.length > 0 && (
@@ -97,39 +75,38 @@ const ThemeHighlighter = ({
{generatedThemes.map((theme) => ( - {onThemeDelete && ( )} {theme.title} -

{theme.description}

+

{theme.description}

{theme.quotes && theme.quotes.length > 0 && (
-

Supporting Quotes:

+

Supporting Quotes:

{theme.quotes.map((quote, index) => { - // Handle both string and QuoteData formats const isQuoteData = typeof quote === 'object' && quote !== null; const quoteText = isQuoteData ? quote.text : quote; const speaker = isQuoteData ? quote.speaker : parseAttributedQuote(quote).persona; const messageId = isQuoteData ? quote.message_id : undefined; const originalQuote = isQuoteData ? quote.original : quote; - + return ( -
{ e.stopPropagation(); if (onQuoteClick) { @@ -139,15 +116,13 @@ const ThemeHighlighter = ({ title={messageId ? `Message ID: ${messageId}` : 'Click to find original message'} > {speaker && ( - + {speaker}: )} "{quoteText}" {messageId && ( - - ✓ - + )}
); @@ -161,7 +136,7 @@ const ThemeHighlighter = ({
)} - + {highlightedThemes.length > 0 && (
@@ -170,34 +145,21 @@ const ThemeHighlighter = ({
{highlightedThemes.map((theme) => { - // Find the associated message to get full text and speaker info - const associatedMessage = theme.messages.length > 0 + const associatedMessage = theme.messages.length > 0 ? messages.find(msg => msg.id === theme.messages[0]) : null; - - // Get full text (up to 200 characters) instead of truncated version const fullText = associatedMessage?.text || theme.text; - const displayText = fullText.length > 200 - ? fullText.substring(0, 200) + '...' - : fullText; - - // Get speaker information + const displayText = fullText.length > 200 ? fullText.substring(0, 200) + '...' : fullText; const senderId = associatedMessage?.senderId; let speakerName = ''; - if (senderId === 'moderator') { - speakerName = 'AI Moderator'; - } else if (senderId === 'facilitator') { - speakerName = 'Human Facilitator'; - } else if (senderId) { - // Get the actual participant name from personas - const persona = getPersona(senderId); - speakerName = persona?.name || 'Unknown Participant'; - } + if (senderId === 'moderator') speakerName = 'AI Moderator'; + else if (senderId === 'facilitator') speakerName = 'Human Facilitator'; + else if (senderId) speakerName = getPersona(senderId)?.name || 'Unknown Participant'; return ( - { e.stopPropagation(); if (onQuoteClick && associatedMessage) { @@ -208,26 +170,24 @@ const ThemeHighlighter = ({ > {onThemeDelete && ( )} - + {speakerName && ( - - {speakerName} - + {speakerName} )} -

+

"{displayText}"

-
+
Click to view in discussion
@@ -238,18 +198,18 @@ const ThemeHighlighter = ({
)} - + {themes.length === 0 && ( -
- -

No themes have been identified yet.

-

+

+ +

No themes have been identified yet.

+

Highlight important messages in the discussion or generate themes automatically.

)}
- +
); }; diff --git a/src/components/focus-group-session/AnalyticsPanel.tsx b/src/components/focus-group-session/AnalyticsPanel.tsx index 426f195f..1e5105c0 100755 --- a/src/components/focus-group-session/AnalyticsPanel.tsx +++ b/src/components/focus-group-session/AnalyticsPanel.tsx @@ -279,9 +279,9 @@ const AnalyticsPanel = ({ messages, themes, personas = [] }: AnalyticsPanelProps const color = sentimentData.find(s => s.name === dominant)?.color || '#93c5fd'; return ( -
+
- + {data.name}
@@ -298,7 +298,7 @@ const AnalyticsPanel = ({ messages, themes, personas = [] }: AnalyticsPanelProps

Focus Group Balance Assessment

-
+
{balanceAssessment.isBalanced ? 'Balanced Focus Group' : 'Potential Balance Issues'}

{balanceAssessment.reason}

diff --git a/src/components/focus-group-session/ThemesPanel.tsx b/src/components/focus-group-session/ThemesPanel.tsx index e19458e1..bb7ae8b4 100755 --- a/src/components/focus-group-session/ThemesPanel.tsx +++ b/src/components/focus-group-session/ThemesPanel.tsx @@ -7,6 +7,7 @@ import { Persona } from '@/types/persona'; import { toast } from 'sonner'; import { focusGroupsApi } from '@/lib/api'; import { useState } from 'react'; +import { useTranslation } from 'react-i18next'; interface ThemesPanelProps { themes: Theme[]; @@ -30,11 +31,13 @@ const ThemesPanel = ({ onGenerateKeyThemes }: ThemesPanelProps) => { const [exportingReport, setExportingReport] = useState(false); + const { i18n } = useTranslation(); const exportFullReport = async () => { setExportingReport(true); try { - const result = await focusGroupsApi.downloadFullReport(focusGroupId); + const lang = i18n.language?.split('-')[0] || 'ru'; + const result = await focusGroupsApi.downloadFullReport(focusGroupId, lang); toast.success("Report downloaded", { description: result.filename }); } catch { toast.error("Failed to download report", { description: "Please try again." }); diff --git a/src/lib/api.ts b/src/lib/api.ts index 147c4768..8236060a 100755 --- a/src/lib/api.ts +++ b/src/lib/api.ts @@ -516,19 +516,22 @@ export const focusGroupsApi = { } }, - downloadFullReport: async (focusGroupId: string) => { + downloadFullReport: async (focusGroupId: string, lang = 'ru') => { try { - const response = await api.get(`/focus-groups/${focusGroupId}/report/download`, { + const response = await api.get(`/focus-groups/${focusGroupId}/report/download?lang=${lang}`, { responseType: 'blob', - timeout: 60000, + timeout: 120000, }); const contentDisposition = response.headers['content-disposition']; - let filename = 'report.md'; + let filename = 'report.pdf'; if (contentDisposition) { - const match = contentDisposition.match(/filename="([^"]+)"/); - if (match) filename = match[1]; + // prefer filename* (UTF-8) over filename + const utf8Match = contentDisposition.match(/filename\*=UTF-8''([^;]+)/i); + const asciiMatch = contentDisposition.match(/filename="([^"]+)"/); + if (utf8Match) filename = decodeURIComponent(utf8Match[1]); + else if (asciiMatch) filename = asciiMatch[1]; } - const blob = new Blob([response.data], { type: 'text/markdown' }); + const blob = new Blob([response.data], { type: 'application/pdf' }); const url = URL.createObjectURL(blob); const anchor = document.createElement('a'); anchor.href = url;