Report quality overhaul: 11 feedback items

1. Remove Desk Research (Stage 7 skipped, sources removed from report)
2. Fix comments scraping: increase cap to 2000, handle alt field names
3. Dynamic stats bar: hide zero-value stats instead of showing "0 Comments"
4. Prompt improvements: enforce timeliness, comment-based insights, creator spotlight algorithm (2-10 videos, exclude >50% dominance)
5. Date filtering: pass date params to Apify actors (oldestCreateTime, onlyPostsNewerThan, uploadDate) + log filter counts
6. Pullquotes: 3-4 generated editorial dividers between sections
7. Thumbnails: download top 50 coverUrl as base64, store on EnrichedVideo
8. Visual Language section: 5 batches of 10 through Claude Vision, synthesized into 5-6 visual codes with thumbnail cards
9. Sticky navigation bar with anchor links to all sections
10. New types: VisualCode, thumbnailUrl on Video, thumbnailBase64 on EnrichedVideo, pullquotes/visualCodes on ReportJSON

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
DJP 2026-04-08 09:52:08 -04:00
parent 3dcdf0cc69
commit f2d6f56831
8 changed files with 302 additions and 70 deletions

View file

@ -205,5 +205,5 @@ export async function runActor<T = unknown>(
export function getLimits() {
return IS_TEST
? { resultsPerPage: 100, resultsLimit: 100, maxResults: 100, maxComments: 100, transcriptBatch: 10, profileLimit: 100 }
: { resultsPerPage: 200, resultsLimit: 100, maxResults: 100, maxComments: 1000, transcriptBatch: 25, profileLimit: 200 };
: { resultsPerPage: 200, resultsLimit: 100, maxResults: 100, maxComments: 2000, transcriptBatch: 25, profileLimit: 200 };
}

View file

@ -283,3 +283,38 @@ export async function callClaudeJSON<T>(prompt: string, model?: string, options?
}
throw new Error('Unreachable');
}
/** Call Claude with images (vision) — accepts base64 data URIs + a text prompt */
export async function callClaudeVision(
imageBase64s: string[],
textPrompt: string,
model?: string,
): Promise<ClaudeResult> {
const m = model || DEFAULT_MODEL;
const content: ApiContentBlock[] = [];
for (const b64 of imageBase64s) {
// Parse data:image/jpeg;base64,... format
const commaIdx = b64.indexOf(',');
const meta = b64.slice(0, commaIdx);
const data = b64.slice(commaIdx + 1);
const mediaType = meta.match(/data:([^;]+)/)?.[1] || 'image/jpeg';
content.push({
type: 'image',
source: { type: 'base64', media_type: mediaType, data } as unknown as Record<string, unknown>,
} as unknown as ApiContentBlock);
}
content.push({ type: 'text', text: textPrompt });
const messages: ApiMessage[] = [{ role: 'user', content }];
const response = await callApi(messages, m, { maxTokens: 4096 });
const usage: ClaudeUsage = {
inputTokens: response.usage.input_tokens,
outputTokens: response.usage.output_tokens,
costUsd: calculateCost(m, response.usage.input_tokens, response.usage.output_tokens),
model: m,
};
reportUsage(usage, 'vision_analysis');
return { text: extractText(response), usage };
}

View file

@ -1,5 +1,5 @@
// ─── HTML Report Generator ───
import { ReportJSON, ClientBrief, Trend, TrendVideo, ContentOpportunity } from './types-v2.js';
import { ReportJSON, ClientBrief, Trend, TrendVideo, ContentOpportunity, VisualCode } from './types-v2.js';
interface ReportStats {
videosScraped: number;
@ -16,9 +16,14 @@ export function buildMarkdown(report: ReportJSON, brief: ClientBrief, stats: Rep
lines.push(`# Social Listening Report — ${brief.clientName}`);
lines.push(`**${brief.category}** — ${formatDateRange(brief.dateRange)}`);
lines.push('');
lines.push(`| Videos Scraped | Comments Analysed | Transcripts | Desk Sources |`);
lines.push(`|---|---|---|---|`);
lines.push(`| ${stats.videosScraped} | ${stats.commentsAnalysed} | ${stats.transcriptsDownloaded} | ${stats.deskSources} |`);
const mdStats = [
{ label: 'Videos Scraped', value: stats.videosScraped },
{ label: 'Comments Analysed', value: stats.commentsAnalysed },
{ label: 'Transcripts', value: stats.transcriptsDownloaded },
].filter(s => s.value > 0);
lines.push(`| ${mdStats.map(s => s.label).join(' | ')} |`);
lines.push(`| ${mdStats.map(() => '---').join(' | ')} |`);
lines.push(`| ${mdStats.map(s => s.value).join(' | ')} |`);
lines.push('');
lines.push('## Executive Summary');
@ -72,11 +77,6 @@ export function buildMarkdown(report: ReportJSON, brief: ClientBrief, stats: Rep
lines.push('');
}
lines.push('## Desk Research Sources');
for (const s of report.deskSources) {
lines.push(`- [${s.title}](${s.url}) — ${s.summary}`);
}
return lines.join('\n');
}
@ -193,9 +193,37 @@ function deriveFormatCards(trends: Trend[]): { icon: string; name: string; desc:
return formats.slice(0, 6);
}
export function generateHtmlReport(report: ReportJSON, brief: ClientBrief, stats: ReportStats): string {
function renderVisualLanguageSection(visualCodes: VisualCode[], thumbnailMap?: Record<string, string>): string {
if (!visualCodes?.length) return '';
const cards = visualCodes.map(vc => {
// Try to find a thumbnail for the example video
const thumb = thumbnailMap && vc.exampleVideoUrl ? thumbnailMap[vc.exampleVideoUrl] : null;
const thumbHtml = thumb
? `<div class="vc-thumb"><img src="${thumb}" alt="${esc(vc.name)}" style="width:180px;height:180px;object-fit:cover;border-radius:8px"></div>`
: '';
return `<div class="vc-card">
<div class="vc-label">${esc(vc.name)}</div>
${thumbHtml}
<div class="vc-desc">
<p>${esc(vc.description)}</p>
<div class="vc-freq">${esc(vc.frequency)}</div>
${vc.exampleAuthor ? `<div class="vc-example">${esc(vc.exampleAuthor)}${(vc.examplePlays || 0).toLocaleString()} plays</div>` : ''}
</div>
</div>`;
}).join('\n');
return `
<!-- VISUAL LANGUAGE -->
<div class="section-header" id="visual-language">Visual Language</div>
<div class="vc-row">${cards}</div>`;
}
export function generateHtmlReport(report: ReportJSON, brief: ClientBrief, stats: ReportStats, thumbnailMap?: Record<string, string>): string {
const hasTikTok = report.trends.some(t => t.topVideoUrl?.includes('tiktok.com') || t.supportingVideos?.some(sv => sv.platform === 'tiktok'));
const hasInstagram = report.trends.some(t => t.topVideoUrl?.includes('instagram.com') || t.supportingVideos?.some(sv => sv.platform === 'instagram'));
const visualLanguageHtml = renderVisualLanguageSection(report.visualCodes || [], thumbnailMap);
const trendsHtml = report.trends.map((t, i) => {
const variationsHtml = t.variations.map(v => `<li>${esc(v)}</li>`).join('\n');
@ -250,9 +278,11 @@ export function generateHtmlReport(report: ReportJSON, brief: ClientBrief, stats
</div>`;
}).join('\n');
// Pullquote after first half of trends
const pullquoteIndex = Math.floor(report.trends.length / 2);
const pullquoteText = report.trends[pullquoteIndex]?.humanTruth || report.executiveSummary.split('.')[0];
// Pullquotes — use generated ones if available, fallback to trend humanTruth
const pullquotes = report.pullquotes?.length
? report.pullquotes
: [report.trends[Math.floor(report.trends.length / 2)]?.humanTruth || report.executiveSummary.split('.')[0]];
const pq = (i: number) => pullquotes[i] ? `<div class="pullquote">${esc(pullquotes[i])}</div>` : '';
const insightsHtml = report.audienceInsights.map(ins => `
<div class="insight-card">
@ -312,10 +342,6 @@ export function generateHtmlReport(report: ReportJSON, brief: ClientBrief, stats
</div>`;
}).join('\n');
const sourcesHtml = report.deskSources.map(s =>
`<li><a href="${esc(s.url)}" target="_blank">${esc(s.title)}</a> — ${esc(s.summary.slice(0, 120))}</li>`
).join('\n');
return `<!DOCTYPE html>
<html lang="en">
<head>
@ -397,6 +423,17 @@ hr { border: none; border-top: 2px solid #1a1a1a; margin: 48px 0; }
.supporting-author { font-size: 13px; font-weight: 700; color: #1a1a1a; margin-bottom: 4px; }
.supporting-desc { font-size: 12px; color: #666; line-height: 1.4; margin-bottom: 6px; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; }
.supporting-plays { font-size: 11px; font-weight: 600; color: #f5a623; }
.vc-row { display: flex; flex-direction: column; gap: 16px; margin: 28px 0; }
.vc-card { display: flex; gap: 20px; background: #fff; border: 1px solid #e8e8e8; border-radius: 12px; overflow: hidden; align-items: stretch; }
.vc-label { writing-mode: vertical-rl; text-orientation: mixed; background: #1a1a1a; color: #fff; font-size: 12px; font-weight: 700; letter-spacing: 1px; text-transform: uppercase; padding: 20px 14px; display: flex; align-items: center; justify-content: center; min-width: 50px; }
.vc-thumb { flex-shrink: 0; display: flex; align-items: center; padding: 16px 0; }
.vc-desc { padding: 20px; flex: 1; display: flex; flex-direction: column; justify-content: center; }
.vc-desc p { color: #444; margin-bottom: 8px; font-size: 15px; }
.vc-freq { font-size: 12px; color: #888; font-weight: 600; }
.vc-example { font-size: 12px; color: #f5a623; font-weight: 600; margin-top: 4px; }
.sticky-nav { position: sticky; top: 0; z-index: 100; background: rgba(255,255,255,0.95); backdrop-filter: blur(8px); border-bottom: 1px solid #e8e8e8; padding: 12px 0; display: flex; gap: 24px; justify-content: center; flex-wrap: wrap; font-size: 12px; font-weight: 600; text-transform: uppercase; letter-spacing: 1px; }
.sticky-nav a { color: #666; text-decoration: none; transition: color 0.2s; }
.sticky-nav a:hover { color: #1a1a1a; }
.footer { text-align: center; padding: 48px 0; color: #888; font-size: 12px; }
@media (max-width: 768px) {
.container { padding: 24px 16px; }
@ -407,6 +444,15 @@ hr { border: none; border-top: 2px solid #1a1a1a; margin: 48px 0; }
</style>
</head>
<body>
<nav class="sticky-nav">
<a href="#exec-summary">Summary</a>
<a href="#trends">Trends</a>
${report.visualCodes?.length ? '<a href="#visual-language">Visual Language</a>' : ''}
<a href="#insights">Insights</a>
<a href="#formats">Formats</a>
<a href="#opportunities">Opportunities</a>
<a href="#spotlight">Spotlight</a>
</nav>
<div class="container">
<div class="report-header">
@ -415,49 +461,48 @@ hr { border: none; border-top: 2px solid #1a1a1a; margin: 48px 0; }
<div class="subtitle">${esc(brief.category)} &mdash; ${formatDateRange(brief.dateRange)}</div>
</div>
<div class="stat-row">
<div class="stat-box"><div class="stat-number">${stats.videosScraped}</div><div class="stat-label">Videos Scraped</div></div>
<div class="stat-box"><div class="stat-number">${stats.commentsAnalysed}</div><div class="stat-label">Comments Analysed</div></div>
<div class="stat-box"><div class="stat-number">${stats.transcriptsDownloaded}</div><div class="stat-label">Transcripts Downloaded</div></div>
<div class="stat-box"><div class="stat-number">${stats.deskSources}</div><div class="stat-label">Desk Sources</div></div>
<div class="stat-row" style="grid-template-columns:repeat(${[stats.videosScraped, stats.commentsAnalysed, stats.transcriptsDownloaded].filter(v => v > 0).length}, 1fr)">
${stats.videosScraped > 0 ? `<div class="stat-box"><div class="stat-number">${stats.videosScraped}</div><div class="stat-label">Videos Scraped</div></div>` : ''}
${stats.commentsAnalysed > 0 ? `<div class="stat-box"><div class="stat-number">${stats.commentsAnalysed}</div><div class="stat-label">Comments Analysed</div></div>` : ''}
${stats.transcriptsDownloaded > 0 ? `<div class="stat-box"><div class="stat-number">${stats.transcriptsDownloaded}</div><div class="stat-label">Transcripts Downloaded</div></div>` : ''}
</div>
<hr>
<!-- EXECUTIVE SUMMARY -->
<div style="background:#fff;border:1px solid #e8e8e8;border-radius:16px;padding:32px;margin-bottom:40px;white-space:pre-line">${esc(report.executiveSummary)}</div>
<div id="exec-summary" style="background:#fff;border:1px solid #e8e8e8;border-radius:16px;padding:32px;margin-bottom:40px;white-space:pre-line">${esc(report.executiveSummary)}</div>
<!-- SECTION 01: CATEGORY TRENDS -->
<div class="section-header">01 &mdash; Category Trends</div>
<div class="section-header" id="trends">01 &mdash; Category Trends</div>
${trendsHtml}
<div class="pullquote">${esc(pullquoteText)}</div>
${visualLanguageHtml}
${pq(0)}
<!-- SECTION 02: AUDIENCE INSIGHTS -->
<div class="section-header">02 &mdash; Audience Insights</div>
<div class="section-header" id="insights">02 &mdash; Audience Insights</div>
<div class="insight-grid">
${insightsHtml}
</div>
${pq(1)}
<!-- CREATIVE FORMATS -->
<div class="section-header">The Formats That Drive Engagement</div>
<div class="section-header" id="formats">The Formats That Drive Engagement</div>
<div class="format-grid">
${formatsHtml}
</div>
<!-- SECTION 03: CONTENT OPPORTUNITIES -->
<div class="section-header">03 &mdash; Content Opportunities</div>
<div class="section-header" id="opportunities">03 &mdash; Content Opportunities</div>
${oppsHtml}
<!-- SECTION 04: CREATOR SPOTLIGHT -->
<div class="section-header">04 &mdash; Creator Spotlight</div>
${creatorsHtml}
${pq(2)}
<!-- DESK RESEARCH SOURCES -->
<div class="section-header">Desk Research Sources</div>
<ul class="source-list">
${sourcesHtml}
</ul>
<!-- SECTION 04: CREATOR SPOTLIGHT -->
<div class="section-header" id="spotlight">04 &mdash; Creator Spotlight</div>
${creatorsHtml}
<div class="footer">
<div class="qa-badge">QA REVIEWED &mdash; Community Manager + Brand Strategist</div>

View file

@ -15,7 +15,6 @@ import { runStage3 } from './stages/stage3-discovery-scrape.js';
import { runStage4 } from './stages/stage4-data-review.js';
import { runStage5 } from './stages/stage5-enrichment-scrape.js';
import { runStage6 } from './stages/stage6-pre-report-review.js';
import { runStage7 } from './stages/stage7-desk-search.js';
import { runStage8 } from './stages/stage8-report.js';
export type ProgressCallback = (
@ -151,18 +150,15 @@ export async function runPipeline(
state.stage6 = await runStage6(state.stage5.data, state.stage4.data, brief);
emit(6, 'Pre-Report Review', 'done', `${state.stage6.data.deskSearchQueries.length} desk queries`);
// ─── Stage 7: Desk Research ───
currentStage = 7; currentStageName = 'Desk Research';
// ─── Stage 7: Skipped (Desk Research removed) ───
emit(7, 'Desk Research', 'start');
state.stage7 = await runStage7(state.stage6.data, brief);
emit(7, 'Desk Research', 'done', `${state.stage7.data.length} sources`);
emit(7, 'Desk Research', 'done', 'Skipped');
// ─── Stage 8: Report Generation ───
currentStage = 8; currentStageName = 'Report Generation';
emit(8, 'Report Generation', 'start');
state.stage8 = await runStage8(
state.stage5.data,
state.stage7.data,
state.stage2.data,
state.stage4.data,
brief,

View file

@ -21,6 +21,7 @@ function normaliseTikTok(raw: RawTikTokItem): Video | null {
saveCount: raw.collectCount || 0,
duration: raw.videoMeta?.duration,
hashtags: raw.hashtags?.map(h => h.name) || [],
thumbnailUrl: raw.videoMeta?.coverUrl,
};
}
@ -41,6 +42,7 @@ function normaliseInstagram(raw: RawInstagramItem): Video | null {
saveCount: 0,
duration: raw.duration,
hashtags: raw.hashtags || [],
thumbnailUrl: raw.displayUrl,
};
}
@ -59,6 +61,7 @@ function normaliseYouTube(raw: RawYouTubeItem): Video | null {
commentCount: raw.commentsCount || 0,
shareCount: 0,
saveCount: 0,
thumbnailUrl: raw.thumbnailUrl,
};
}
@ -121,7 +124,7 @@ async function scrapeTikTok(brief: ClientBrief): Promise<Video[]> {
const tag = rawHashtag.replace(/^#/, '');
const items = await safeRunActor<RawTikTokItem>(
ACTORS.TIKTOK_SCRAPER,
{ hashtags: [tag], resultsPerPage: limits.resultsPerPage, shouldDownloadVideos: false },
{ hashtags: [tag], resultsPerPage: limits.resultsPerPage, shouldDownloadVideos: false, oldestCreateTime: brief.dateRange.from },
`TikTok hashtag: ${tag}`,
);
for (const item of items) { const v = normaliseTikTok(item); if (v) videos.push(v); }
@ -148,7 +151,7 @@ async function scrapeInstagram(brief: ClientBrief): Promise<Video[]> {
const tag = rawHashtag.replace(/^#/, '');
const items = await safeRunActor<RawInstagramItem>(
ACTORS.INSTAGRAM_HASHTAG,
{ hashtags: [tag], resultsLimit: limits.resultsLimit },
{ hashtags: [tag], resultsLimit: limits.resultsLimit, onlyPostsNewerThan: brief.dateRange.from },
`Instagram hashtag: ${tag}`,
);
for (const item of items) { const v = normaliseInstagram(item); if (v) videos.push(v); }
@ -175,7 +178,7 @@ async function scrapeYouTube(brief: ClientBrief): Promise<Video[]> {
for (const query of queries) {
const items = await safeRunActor<RawYouTubeItem>(
ACTORS.YOUTUBE_SEARCH,
{ searchQuery: query, maxResults: limits.maxResults },
{ searchQuery: query, maxResults: limits.maxResults, uploadDate: 'month' },
`YouTube search: ${query}`,
);
for (const item of items) { const v = normaliseYouTube(item); if (v) videos.push(v); }
@ -213,7 +216,9 @@ export async function runStage3(brief: ClientBrief): Promise<StageResult<Discove
}
// Filter last 30 days
const preFilterCount = allVideos.length;
allVideos = filterVideosLast30Days(allVideos, brief.dateRange);
console.log(`[Stage 3] Date filter: ${brief.dateRange.from} to ${brief.dateRange.to} — kept ${allVideos.length} of ${preFilterCount} videos`);
// Update byPlatform with filtered videos
for (const platform of brief.platforms) {

View file

@ -1,8 +1,8 @@
// ─── Stage 5: Enrichment Scrape (Transcripts + Comments) ───
// ─── Stage 5: Enrichment Scrape (Transcripts + Comments + Thumbnails) ───
import { ClientBrief, TopVideosSelection, EnrichmentData, EnrichedVideo, Video, StageResult } from '../types-v2.js';
import { runActor, ACTORS, getLimits } from '../apify.js';
const MAX_COMMENTS_PER_PLATFORM = 1000;
const MAX_COMMENTS_PER_PLATFORM = 2000;
interface TranscriptResult {
url?: string;
@ -13,8 +13,10 @@ interface TranscriptResult {
interface CommentResult {
videoUrl?: string;
postUrl?: string;
text?: string;
comment?: string;
commentText?: string;
}
/** Safely run a single actor — logs and continues on failure */
@ -99,8 +101,8 @@ async function fetchTikTokComments(urls: string[]): Promise<Map<string, string[]
);
for (const item of items) {
const url = item.videoUrl;
const text = item.text || item.comment;
const url = item.videoUrl || item.postUrl;
const text = item.text || item.comment || item.commentText;
if (url && text) {
const existing = map.get(url) || [];
existing.push(text);
@ -110,6 +112,38 @@ async function fetchTikTokComments(urls: string[]): Promise<Map<string, string[]
return map;
}
// ─── Thumbnail Download ───
async function fetchThumbnailsAsBase64(
videos: Video[],
maxCount: number = 50,
): Promise<Map<string, string>> {
const map = new Map<string, string>();
const candidates = videos
.filter(v => v.thumbnailUrl)
.sort((a, b) => (b.playCount || 0) - (a.playCount || 0))
.slice(0, maxCount);
console.log(`[Stage 5] Downloading ${candidates.length} thumbnails...`);
let downloaded = 0;
for (const v of candidates) {
try {
const res = await fetch(v.thumbnailUrl!);
if (!res.ok) continue;
const buffer = await res.arrayBuffer();
const contentType = res.headers.get('content-type') || 'image/jpeg';
const base64 = `data:${contentType};base64,${Buffer.from(buffer).toString('base64')}`;
map.set(v.url, base64);
downloaded++;
} catch {
// Non-fatal — skip failed thumbnails
}
}
console.log(`[Stage 5] Downloaded ${downloaded} / ${candidates.length} thumbnails`);
return map;
}
export async function runStage5(
selection: TopVideosSelection,
brief: ClientBrief,
@ -128,6 +162,9 @@ export async function runStage5(
const youtubeTranscripts = await fetchYouTubeTranscripts(youtubeUrls);
const tiktokComments = await fetchTikTokComments(tiktokUrls);
// Download thumbnails (plain HTTP, no Apify cost)
const thumbnailMap = await fetchThumbnailsAsBase64(selection.videos, 50);
// Merge all transcript maps
const allTranscripts = new Map<string, string>();
for (const [k, v] of tiktokTranscripts) allTranscripts.set(k, v);
@ -139,14 +176,20 @@ export async function runStage5(
...v,
transcript: allTranscripts.get(v.url) || null,
comments: tiktokComments.get(v.url) || [],
thumbnailBase64: thumbnailMap.get(v.url),
}));
const transcriptCount = enriched.filter(v => v.transcript).length;
const commentCount = enriched.reduce((sum, v) => sum + v.comments.length, 0);
// Convert thumbnailMap to plain object for serialization
const thumbnailObj: Record<string, string> = {};
for (const [k, v] of thumbnailMap) thumbnailObj[k] = v;
console.log(`[Stage 5] Enrichment complete:`);
console.log(` Transcripts: ${transcriptCount} / ${enriched.length}`);
console.log(` Comments: ${commentCount}`);
console.log(` Thumbnails: ${thumbnailMap.size}`);
return {
stage: 5,
@ -155,6 +198,7 @@ export async function runStage5(
videos: enriched,
transcriptCount,
commentCount,
thumbnailMap: thumbnailObj,
},
duration: Date.now() - start,
};

View file

@ -1,15 +1,102 @@
// ─── Stage 8: Final Report Generation (Opus) ───
import {
ClientBrief, EnrichmentData, DeskResearchSource, AgentReview,
TopVideosSelection, FinalReport, ReportJSON, StageResult,
ClientBrief, EnrichmentData, AgentReview,
TopVideosSelection, FinalReport, ReportJSON, VisualCode, StageResult,
} from '../types-v2.js';
import { callClaudeJSON } from '../claude-cli.js';
import { callClaudeJSON, callClaudeVision } from '../claude-cli.js';
import { buildMarkdown } from '../html-report.js';
import { generateHtmlReport } from '../html-report.js';
// ─── Visual Language Analysis ───
async function analyseVisualLanguage(
enrichment: EnrichmentData,
): Promise<VisualCode[]> {
const thumbnailMap = enrichment.thumbnailMap || {};
const entries = Object.entries(thumbnailMap);
if (entries.length < 5) {
console.log(`[Stage 8] Skipping visual analysis — only ${entries.length} thumbnails available`);
return [];
}
console.log(`[Stage 8] Analysing visual language from ${entries.length} thumbnails...`);
// Build lookup: url -> video info
const videoLookup = new Map(enrichment.videos.map(v => [v.url, v]));
// Take top 50, split into 5 batches of 10
const top50 = entries.slice(0, 50);
const batchSize = 10;
const batchResults: string[] = [];
for (let i = 0; i < top50.length; i += batchSize) {
const batch = top50.slice(i, i + batchSize);
const images = batch.map(([_, b64]) => b64);
const batchNum = Math.floor(i / batchSize) + 1;
const prompt = `You are analysing ${images.length} video thumbnails from a social media category. For each thumbnail, describe:
1. Colour palette and dominant colours
2. Composition (close-up face, full body, flat lay, text-heavy, etc.)
3. Text overlays (if any) font style, positioning
4. Facial expressions and body language
5. Setting/environment
6. Any recurring visual motifs
Then identify 2-3 visual PATTERNS you see across multiple thumbnails in this batch. Be specific and concrete.`;
try {
const result = await callClaudeVision(images, prompt, 'claude-sonnet-4-6');
batchResults.push(result.text);
console.log(`[Stage 8] Visual batch ${batchNum} complete`);
} catch (err) {
console.warn(`[Stage 8] Visual batch ${batchNum} failed: ${(err as Error).message}`);
}
}
if (!batchResults.length) return [];
// Synthesis: merge batch results into visual codes
const synthesisPrompt = `You analysed video thumbnails from a social media category in batches. Here are the batch-by-batch findings:
${batchResults.map((r, i) => `--- BATCH ${i + 1} ---\n${r}`).join('\n\n')}
Synthesise these observations into exactly 5-6 VISUAL CODES recurring visual patterns that define this category's visual language. Each visual code should be a specific, named pattern (e.g. "The Bare-Face Close-Up", "Pastel Flat Lay", "Text-First Controversy Hook").
Return JSON array:
[
{
"name": "Visual Code Name",
"description": "2-3 sentences describing the visual pattern — what it looks like, why creators use it, what emotion it conveys",
"frequency": "Seen in X of Y thumbnails analysed"
}
]`;
try {
const codes = await callClaudeJSON<VisualCode[]>(synthesisPrompt, 'claude-sonnet-4-6');
// Attach example videos to each code (pick first video with a thumbnail)
for (const code of codes) {
if (!code.exampleVideoUrl) {
const entry = top50[0];
if (entry) {
const video = videoLookup.get(entry[0]);
code.exampleVideoUrl = entry[0];
code.exampleAuthor = video?.author || '';
code.examplePlays = video?.playCount || 0;
}
}
}
console.log(`[Stage 8] Visual analysis complete: ${codes.length} visual codes`);
return codes;
} catch (err) {
console.warn(`[Stage 8] Visual synthesis failed: ${(err as Error).message}`);
return [];
}
}
function buildReportPrompt(
enrichment: EnrichmentData,
deskSources: DeskResearchSource[],
agentReviews: AgentReview[],
selection: TopVideosSelection,
brief: ClientBrief,
@ -32,9 +119,6 @@ Comments: ${comments}`;
// Agent hypotheses
const hypotheses = selection.hypotheses.join('\n- ');
// Desk sources summary
const sourcesSummary = deskSources.map(s => `- ${s.title}: ${s.summary}`).join('\n');
return `You are generating a social listening report for ${brief.clientName} in the ${brief.category} category.
DATE RANGE: ${brief.dateRange.from} to ${brief.dateRange.to}
@ -49,20 +133,24 @@ ${urlIndex}
STRATEGIST HYPOTHESES:
- ${hypotheses}
DESK RESEARCH SOURCES:
${sourcesSummary}
HARD RULES:
- Every topVideoUrl MUST be an exact URL from the VIDEO URL INDEX above
- Every topVideoPlays MUST exactly match the plays number from the index
- Never describe influencer content as organic unless proven default assumption for branded creator content = paid
- Each trend/insight/opportunity must be GENUINELY DISTINCT no duplication disguised with different words
- Creator Spotlight requires 2+ videos with strong engagement, not single viral hits
- Trends must be timely (last 30 days), not evergreen observations
- TIMELINESS IS CRITICAL: Every trend must be anchored to specific videos from the last 30 days. Do NOT include evergreen observations like "authenticity matters" or "short-form video is growing". If a trend could have been written 6 months ago, it is NOT a trend it is a category norm. Focus on what is NEW, surprising, or accelerating in the data window ${brief.dateRange.from} to ${brief.dateRange.to}. Name specific creators, specific videos, specific moments.
- AUDIENCE INSIGHTS must prioritize comment text over video metadata. Mine the Comments fields for actual audience language confessions, questions, debates, purchase-intent signals, requests. Each exampleQuote MUST be a real comment from the corpus, not a caption or description. If comments are available, insights should read like community analysis, not metadata summaries.
- Each trend MUST include 2-3 supportingVideos from the VIDEO URL INDEX these will be embedded in the report
- supportingVideos should include the platform field matching [tiktok|instagram|youtube] from the index
- 7-12 trends, exactly 6 audience insights, 7 content opportunities, 1-2 creator spotlights
CREATOR SPOTLIGHT SELECTION:
- Only consider creators with 2-10 videos in the corpus
- EXCLUDE any creator whose videos make up more than 50% of the total dataset that is category domination, not a discovery
- Score each eligible creator: score = avg_likes_per_video × num_videos × engagement_rate (where engagement_rate = (likes + comments + shares) / plays)
- Select the top 1-2 creators by this score
- The spotlight should surface mid-tier creators who consistently resonate, not mega-influencers who are already obvious
Return this EXACT JSON structure:
{
"executiveSummary": "3-4 paragraph narrative overview of the category landscape",
@ -107,13 +195,13 @@ Return this EXACT JSON structure:
"keyVideos": [{"url": "EXACT url", "description": "Brief desc", "plays": 12345}],
"growthSignal": "Trajectory indicator"
}
]
],
"pullquotes": ["3-4 sharp, quotable one-liners that summarize key findings. Editorial in tone — pithy, insight-driven sentences a reader would want to screenshot. These will be displayed as visual dividers between report sections."]
}`;
}
export async function runStage8(
enrichment: EnrichmentData,
deskSources: DeskResearchSource[],
agentReviews: AgentReview[],
selection: TopVideosSelection,
brief: ClientBrief,
@ -121,25 +209,28 @@ export async function runStage8(
const start = Date.now();
console.log('[Stage 8] Generating final report via Claude Opus...');
const prompt = buildReportPrompt(enrichment, deskSources, agentReviews, selection, brief);
// Run visual language analysis (before main report)
const visualCodes = await analyseVisualLanguage(enrichment);
const prompt = buildReportPrompt(enrichment, agentReviews, selection, brief);
const reportJSON = await callClaudeJSON<ReportJSON>(prompt, 'claude-opus-4-6', {
timeout: 600_000, // 10 min
});
// Attach desk sources
reportJSON.deskSources = deskSources;
reportJSON.deskSources = [];
reportJSON.visualCodes = visualCodes;
const stats = {
videosScraped: enrichment.videos.length,
commentsAnalysed: enrichment.commentCount,
transcriptsDownloaded: enrichment.transcriptCount,
deskSources: deskSources.length,
deskSources: 0,
};
// Build outputs
const markdown = buildMarkdown(reportJSON, brief, stats);
const html = generateHtmlReport(reportJSON, brief, stats);
const html = generateHtmlReport(reportJSON, brief, stats, enrichment.thumbnailMap);
const finalReport: FinalReport = {
...reportJSON,

View file

@ -34,11 +34,13 @@ export interface Video {
duration?: number;
hashtags?: string[];
engagementScore?: number;
thumbnailUrl?: string;
}
export interface EnrichedVideo extends Video {
transcript: string | null;
comments: string[];
thumbnailBase64?: string;
}
export interface AgentReview {
@ -76,6 +78,7 @@ export interface EnrichmentData {
videos: EnrichedVideo[];
transcriptCount: number;
commentCount: number;
thumbnailMap?: Record<string, string>;
}
export interface PreReportReview {
@ -136,6 +139,15 @@ export interface CreatorSpotlight {
growthSignal: string;
}
export interface VisualCode {
name: string;
description: string;
frequency: string;
exampleVideoUrl: string;
exampleAuthor: string;
examplePlays: number;
}
export interface ReportJSON {
executiveSummary: string;
trends: Trend[];
@ -143,6 +155,8 @@ export interface ReportJSON {
contentOpportunities: ContentOpportunity[];
creatorSpotlight: CreatorSpotlight[];
deskSources: DeskResearchSource[];
pullquotes?: string[];
visualCodes?: VisualCode[];
}
export interface FinalReport extends ReportJSON {
@ -190,7 +204,7 @@ export interface RawTikTokItem {
commentCount?: number;
shareCount?: number;
collectCount?: number;
videoMeta?: { duration?: number };
videoMeta?: { duration?: number; coverUrl?: string };
hashtags?: { name: string }[];
}
@ -207,6 +221,7 @@ export interface RawInstagramItem {
commentsCount?: number;
duration?: number;
hashtags?: string[];
displayUrl?: string;
}
export interface RawYouTubeItem {
@ -218,4 +233,5 @@ export interface RawYouTubeItem {
viewCount?: number;
likes?: number;
commentsCount?: number;
thumbnailUrl?: string;
}