Report quality overhaul: 11 feedback items
1. Remove Desk Research (Stage 7 skipped, sources removed from report) 2. Fix comments scraping: increase cap to 2000, handle alt field names 3. Dynamic stats bar: hide zero-value stats instead of showing "0 Comments" 4. Prompt improvements: enforce timeliness, comment-based insights, creator spotlight algorithm (2-10 videos, exclude >50% dominance) 5. Date filtering: pass date params to Apify actors (oldestCreateTime, onlyPostsNewerThan, uploadDate) + log filter counts 6. Pullquotes: 3-4 generated editorial dividers between sections 7. Thumbnails: download top 50 coverUrl as base64, store on EnrichedVideo 8. Visual Language section: 5 batches of 10 through Claude Vision, synthesized into 5-6 visual codes with thumbnail cards 9. Sticky navigation bar with anchor links to all sections 10. New types: VisualCode, thumbnailUrl on Video, thumbnailBase64 on EnrichedVideo, pullquotes/visualCodes on ReportJSON Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
3dcdf0cc69
commit
f2d6f56831
8 changed files with 302 additions and 70 deletions
|
|
@ -205,5 +205,5 @@ export async function runActor<T = unknown>(
|
|||
export function getLimits() {
|
||||
return IS_TEST
|
||||
? { resultsPerPage: 100, resultsLimit: 100, maxResults: 100, maxComments: 100, transcriptBatch: 10, profileLimit: 100 }
|
||||
: { resultsPerPage: 200, resultsLimit: 100, maxResults: 100, maxComments: 1000, transcriptBatch: 25, profileLimit: 200 };
|
||||
: { resultsPerPage: 200, resultsLimit: 100, maxResults: 100, maxComments: 2000, transcriptBatch: 25, profileLimit: 200 };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -283,3 +283,38 @@ export async function callClaudeJSON<T>(prompt: string, model?: string, options?
|
|||
}
|
||||
throw new Error('Unreachable');
|
||||
}
|
||||
|
||||
/** Call Claude with images (vision) — accepts base64 data URIs + a text prompt */
|
||||
export async function callClaudeVision(
|
||||
imageBase64s: string[],
|
||||
textPrompt: string,
|
||||
model?: string,
|
||||
): Promise<ClaudeResult> {
|
||||
const m = model || DEFAULT_MODEL;
|
||||
const content: ApiContentBlock[] = [];
|
||||
|
||||
for (const b64 of imageBase64s) {
|
||||
// Parse data:image/jpeg;base64,... format
|
||||
const commaIdx = b64.indexOf(',');
|
||||
const meta = b64.slice(0, commaIdx);
|
||||
const data = b64.slice(commaIdx + 1);
|
||||
const mediaType = meta.match(/data:([^;]+)/)?.[1] || 'image/jpeg';
|
||||
content.push({
|
||||
type: 'image',
|
||||
source: { type: 'base64', media_type: mediaType, data } as unknown as Record<string, unknown>,
|
||||
} as unknown as ApiContentBlock);
|
||||
}
|
||||
|
||||
content.push({ type: 'text', text: textPrompt });
|
||||
|
||||
const messages: ApiMessage[] = [{ role: 'user', content }];
|
||||
const response = await callApi(messages, m, { maxTokens: 4096 });
|
||||
const usage: ClaudeUsage = {
|
||||
inputTokens: response.usage.input_tokens,
|
||||
outputTokens: response.usage.output_tokens,
|
||||
costUsd: calculateCost(m, response.usage.input_tokens, response.usage.output_tokens),
|
||||
model: m,
|
||||
};
|
||||
reportUsage(usage, 'vision_analysis');
|
||||
return { text: extractText(response), usage };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
// ─── HTML Report Generator ───
|
||||
import { ReportJSON, ClientBrief, Trend, TrendVideo, ContentOpportunity } from './types-v2.js';
|
||||
import { ReportJSON, ClientBrief, Trend, TrendVideo, ContentOpportunity, VisualCode } from './types-v2.js';
|
||||
|
||||
interface ReportStats {
|
||||
videosScraped: number;
|
||||
|
|
@ -16,9 +16,14 @@ export function buildMarkdown(report: ReportJSON, brief: ClientBrief, stats: Rep
|
|||
lines.push(`# Social Listening Report — ${brief.clientName}`);
|
||||
lines.push(`**${brief.category}** — ${formatDateRange(brief.dateRange)}`);
|
||||
lines.push('');
|
||||
lines.push(`| Videos Scraped | Comments Analysed | Transcripts | Desk Sources |`);
|
||||
lines.push(`|---|---|---|---|`);
|
||||
lines.push(`| ${stats.videosScraped} | ${stats.commentsAnalysed} | ${stats.transcriptsDownloaded} | ${stats.deskSources} |`);
|
||||
const mdStats = [
|
||||
{ label: 'Videos Scraped', value: stats.videosScraped },
|
||||
{ label: 'Comments Analysed', value: stats.commentsAnalysed },
|
||||
{ label: 'Transcripts', value: stats.transcriptsDownloaded },
|
||||
].filter(s => s.value > 0);
|
||||
lines.push(`| ${mdStats.map(s => s.label).join(' | ')} |`);
|
||||
lines.push(`| ${mdStats.map(() => '---').join(' | ')} |`);
|
||||
lines.push(`| ${mdStats.map(s => s.value).join(' | ')} |`);
|
||||
lines.push('');
|
||||
|
||||
lines.push('## Executive Summary');
|
||||
|
|
@ -72,11 +77,6 @@ export function buildMarkdown(report: ReportJSON, brief: ClientBrief, stats: Rep
|
|||
lines.push('');
|
||||
}
|
||||
|
||||
lines.push('## Desk Research Sources');
|
||||
for (const s of report.deskSources) {
|
||||
lines.push(`- [${s.title}](${s.url}) — ${s.summary}`);
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
|
|
@ -193,9 +193,37 @@ function deriveFormatCards(trends: Trend[]): { icon: string; name: string; desc:
|
|||
return formats.slice(0, 6);
|
||||
}
|
||||
|
||||
export function generateHtmlReport(report: ReportJSON, brief: ClientBrief, stats: ReportStats): string {
|
||||
function renderVisualLanguageSection(visualCodes: VisualCode[], thumbnailMap?: Record<string, string>): string {
|
||||
if (!visualCodes?.length) return '';
|
||||
|
||||
const cards = visualCodes.map(vc => {
|
||||
// Try to find a thumbnail for the example video
|
||||
const thumb = thumbnailMap && vc.exampleVideoUrl ? thumbnailMap[vc.exampleVideoUrl] : null;
|
||||
const thumbHtml = thumb
|
||||
? `<div class="vc-thumb"><img src="${thumb}" alt="${esc(vc.name)}" style="width:180px;height:180px;object-fit:cover;border-radius:8px"></div>`
|
||||
: '';
|
||||
|
||||
return `<div class="vc-card">
|
||||
<div class="vc-label">${esc(vc.name)}</div>
|
||||
${thumbHtml}
|
||||
<div class="vc-desc">
|
||||
<p>${esc(vc.description)}</p>
|
||||
<div class="vc-freq">${esc(vc.frequency)}</div>
|
||||
${vc.exampleAuthor ? `<div class="vc-example">${esc(vc.exampleAuthor)} — ${(vc.examplePlays || 0).toLocaleString()} plays</div>` : ''}
|
||||
</div>
|
||||
</div>`;
|
||||
}).join('\n');
|
||||
|
||||
return `
|
||||
<!-- VISUAL LANGUAGE -->
|
||||
<div class="section-header" id="visual-language">Visual Language</div>
|
||||
<div class="vc-row">${cards}</div>`;
|
||||
}
|
||||
|
||||
export function generateHtmlReport(report: ReportJSON, brief: ClientBrief, stats: ReportStats, thumbnailMap?: Record<string, string>): string {
|
||||
const hasTikTok = report.trends.some(t => t.topVideoUrl?.includes('tiktok.com') || t.supportingVideos?.some(sv => sv.platform === 'tiktok'));
|
||||
const hasInstagram = report.trends.some(t => t.topVideoUrl?.includes('instagram.com') || t.supportingVideos?.some(sv => sv.platform === 'instagram'));
|
||||
const visualLanguageHtml = renderVisualLanguageSection(report.visualCodes || [], thumbnailMap);
|
||||
|
||||
const trendsHtml = report.trends.map((t, i) => {
|
||||
const variationsHtml = t.variations.map(v => `<li>${esc(v)}</li>`).join('\n');
|
||||
|
|
@ -250,9 +278,11 @@ export function generateHtmlReport(report: ReportJSON, brief: ClientBrief, stats
|
|||
</div>`;
|
||||
}).join('\n');
|
||||
|
||||
// Pullquote after first half of trends
|
||||
const pullquoteIndex = Math.floor(report.trends.length / 2);
|
||||
const pullquoteText = report.trends[pullquoteIndex]?.humanTruth || report.executiveSummary.split('.')[0];
|
||||
// Pullquotes — use generated ones if available, fallback to trend humanTruth
|
||||
const pullquotes = report.pullquotes?.length
|
||||
? report.pullquotes
|
||||
: [report.trends[Math.floor(report.trends.length / 2)]?.humanTruth || report.executiveSummary.split('.')[0]];
|
||||
const pq = (i: number) => pullquotes[i] ? `<div class="pullquote">${esc(pullquotes[i])}</div>` : '';
|
||||
|
||||
const insightsHtml = report.audienceInsights.map(ins => `
|
||||
<div class="insight-card">
|
||||
|
|
@ -312,10 +342,6 @@ export function generateHtmlReport(report: ReportJSON, brief: ClientBrief, stats
|
|||
</div>`;
|
||||
}).join('\n');
|
||||
|
||||
const sourcesHtml = report.deskSources.map(s =>
|
||||
`<li><a href="${esc(s.url)}" target="_blank">${esc(s.title)}</a> — ${esc(s.summary.slice(0, 120))}</li>`
|
||||
).join('\n');
|
||||
|
||||
return `<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
|
|
@ -397,6 +423,17 @@ hr { border: none; border-top: 2px solid #1a1a1a; margin: 48px 0; }
|
|||
.supporting-author { font-size: 13px; font-weight: 700; color: #1a1a1a; margin-bottom: 4px; }
|
||||
.supporting-desc { font-size: 12px; color: #666; line-height: 1.4; margin-bottom: 6px; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; }
|
||||
.supporting-plays { font-size: 11px; font-weight: 600; color: #f5a623; }
|
||||
.vc-row { display: flex; flex-direction: column; gap: 16px; margin: 28px 0; }
|
||||
.vc-card { display: flex; gap: 20px; background: #fff; border: 1px solid #e8e8e8; border-radius: 12px; overflow: hidden; align-items: stretch; }
|
||||
.vc-label { writing-mode: vertical-rl; text-orientation: mixed; background: #1a1a1a; color: #fff; font-size: 12px; font-weight: 700; letter-spacing: 1px; text-transform: uppercase; padding: 20px 14px; display: flex; align-items: center; justify-content: center; min-width: 50px; }
|
||||
.vc-thumb { flex-shrink: 0; display: flex; align-items: center; padding: 16px 0; }
|
||||
.vc-desc { padding: 20px; flex: 1; display: flex; flex-direction: column; justify-content: center; }
|
||||
.vc-desc p { color: #444; margin-bottom: 8px; font-size: 15px; }
|
||||
.vc-freq { font-size: 12px; color: #888; font-weight: 600; }
|
||||
.vc-example { font-size: 12px; color: #f5a623; font-weight: 600; margin-top: 4px; }
|
||||
.sticky-nav { position: sticky; top: 0; z-index: 100; background: rgba(255,255,255,0.95); backdrop-filter: blur(8px); border-bottom: 1px solid #e8e8e8; padding: 12px 0; display: flex; gap: 24px; justify-content: center; flex-wrap: wrap; font-size: 12px; font-weight: 600; text-transform: uppercase; letter-spacing: 1px; }
|
||||
.sticky-nav a { color: #666; text-decoration: none; transition: color 0.2s; }
|
||||
.sticky-nav a:hover { color: #1a1a1a; }
|
||||
.footer { text-align: center; padding: 48px 0; color: #888; font-size: 12px; }
|
||||
@media (max-width: 768px) {
|
||||
.container { padding: 24px 16px; }
|
||||
|
|
@ -407,6 +444,15 @@ hr { border: none; border-top: 2px solid #1a1a1a; margin: 48px 0; }
|
|||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<nav class="sticky-nav">
|
||||
<a href="#exec-summary">Summary</a>
|
||||
<a href="#trends">Trends</a>
|
||||
${report.visualCodes?.length ? '<a href="#visual-language">Visual Language</a>' : ''}
|
||||
<a href="#insights">Insights</a>
|
||||
<a href="#formats">Formats</a>
|
||||
<a href="#opportunities">Opportunities</a>
|
||||
<a href="#spotlight">Spotlight</a>
|
||||
</nav>
|
||||
<div class="container">
|
||||
|
||||
<div class="report-header">
|
||||
|
|
@ -415,49 +461,48 @@ hr { border: none; border-top: 2px solid #1a1a1a; margin: 48px 0; }
|
|||
<div class="subtitle">${esc(brief.category)} — ${formatDateRange(brief.dateRange)}</div>
|
||||
</div>
|
||||
|
||||
<div class="stat-row">
|
||||
<div class="stat-box"><div class="stat-number">${stats.videosScraped}</div><div class="stat-label">Videos Scraped</div></div>
|
||||
<div class="stat-box"><div class="stat-number">${stats.commentsAnalysed}</div><div class="stat-label">Comments Analysed</div></div>
|
||||
<div class="stat-box"><div class="stat-number">${stats.transcriptsDownloaded}</div><div class="stat-label">Transcripts Downloaded</div></div>
|
||||
<div class="stat-box"><div class="stat-number">${stats.deskSources}</div><div class="stat-label">Desk Sources</div></div>
|
||||
<div class="stat-row" style="grid-template-columns:repeat(${[stats.videosScraped, stats.commentsAnalysed, stats.transcriptsDownloaded].filter(v => v > 0).length}, 1fr)">
|
||||
${stats.videosScraped > 0 ? `<div class="stat-box"><div class="stat-number">${stats.videosScraped}</div><div class="stat-label">Videos Scraped</div></div>` : ''}
|
||||
${stats.commentsAnalysed > 0 ? `<div class="stat-box"><div class="stat-number">${stats.commentsAnalysed}</div><div class="stat-label">Comments Analysed</div></div>` : ''}
|
||||
${stats.transcriptsDownloaded > 0 ? `<div class="stat-box"><div class="stat-number">${stats.transcriptsDownloaded}</div><div class="stat-label">Transcripts Downloaded</div></div>` : ''}
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
||||
<!-- EXECUTIVE SUMMARY -->
|
||||
<div style="background:#fff;border:1px solid #e8e8e8;border-radius:16px;padding:32px;margin-bottom:40px;white-space:pre-line">${esc(report.executiveSummary)}</div>
|
||||
<div id="exec-summary" style="background:#fff;border:1px solid #e8e8e8;border-radius:16px;padding:32px;margin-bottom:40px;white-space:pre-line">${esc(report.executiveSummary)}</div>
|
||||
|
||||
<!-- SECTION 01: CATEGORY TRENDS -->
|
||||
<div class="section-header">01 — Category Trends</div>
|
||||
<div class="section-header" id="trends">01 — Category Trends</div>
|
||||
${trendsHtml}
|
||||
|
||||
<div class="pullquote">${esc(pullquoteText)}</div>
|
||||
${visualLanguageHtml}
|
||||
|
||||
${pq(0)}
|
||||
|
||||
<!-- SECTION 02: AUDIENCE INSIGHTS -->
|
||||
<div class="section-header">02 — Audience Insights</div>
|
||||
<div class="section-header" id="insights">02 — Audience Insights</div>
|
||||
<div class="insight-grid">
|
||||
${insightsHtml}
|
||||
</div>
|
||||
|
||||
${pq(1)}
|
||||
|
||||
<!-- CREATIVE FORMATS -->
|
||||
<div class="section-header">The Formats That Drive Engagement</div>
|
||||
<div class="section-header" id="formats">The Formats That Drive Engagement</div>
|
||||
<div class="format-grid">
|
||||
${formatsHtml}
|
||||
</div>
|
||||
|
||||
<!-- SECTION 03: CONTENT OPPORTUNITIES -->
|
||||
<div class="section-header">03 — Content Opportunities</div>
|
||||
<div class="section-header" id="opportunities">03 — Content Opportunities</div>
|
||||
${oppsHtml}
|
||||
|
||||
<!-- SECTION 04: CREATOR SPOTLIGHT -->
|
||||
<div class="section-header">04 — Creator Spotlight</div>
|
||||
${creatorsHtml}
|
||||
${pq(2)}
|
||||
|
||||
<!-- DESK RESEARCH SOURCES -->
|
||||
<div class="section-header">Desk Research Sources</div>
|
||||
<ul class="source-list">
|
||||
${sourcesHtml}
|
||||
</ul>
|
||||
<!-- SECTION 04: CREATOR SPOTLIGHT -->
|
||||
<div class="section-header" id="spotlight">04 — Creator Spotlight</div>
|
||||
${creatorsHtml}
|
||||
|
||||
<div class="footer">
|
||||
<div class="qa-badge">QA REVIEWED — Community Manager + Brand Strategist</div>
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ import { runStage3 } from './stages/stage3-discovery-scrape.js';
|
|||
import { runStage4 } from './stages/stage4-data-review.js';
|
||||
import { runStage5 } from './stages/stage5-enrichment-scrape.js';
|
||||
import { runStage6 } from './stages/stage6-pre-report-review.js';
|
||||
import { runStage7 } from './stages/stage7-desk-search.js';
|
||||
import { runStage8 } from './stages/stage8-report.js';
|
||||
|
||||
export type ProgressCallback = (
|
||||
|
|
@ -151,18 +150,15 @@ export async function runPipeline(
|
|||
state.stage6 = await runStage6(state.stage5.data, state.stage4.data, brief);
|
||||
emit(6, 'Pre-Report Review', 'done', `${state.stage6.data.deskSearchQueries.length} desk queries`);
|
||||
|
||||
// ─── Stage 7: Desk Research ───
|
||||
currentStage = 7; currentStageName = 'Desk Research';
|
||||
// ─── Stage 7: Skipped (Desk Research removed) ───
|
||||
emit(7, 'Desk Research', 'start');
|
||||
state.stage7 = await runStage7(state.stage6.data, brief);
|
||||
emit(7, 'Desk Research', 'done', `${state.stage7.data.length} sources`);
|
||||
emit(7, 'Desk Research', 'done', 'Skipped');
|
||||
|
||||
// ─── Stage 8: Report Generation ───
|
||||
currentStage = 8; currentStageName = 'Report Generation';
|
||||
emit(8, 'Report Generation', 'start');
|
||||
state.stage8 = await runStage8(
|
||||
state.stage5.data,
|
||||
state.stage7.data,
|
||||
state.stage2.data,
|
||||
state.stage4.data,
|
||||
brief,
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ function normaliseTikTok(raw: RawTikTokItem): Video | null {
|
|||
saveCount: raw.collectCount || 0,
|
||||
duration: raw.videoMeta?.duration,
|
||||
hashtags: raw.hashtags?.map(h => h.name) || [],
|
||||
thumbnailUrl: raw.videoMeta?.coverUrl,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -41,6 +42,7 @@ function normaliseInstagram(raw: RawInstagramItem): Video | null {
|
|||
saveCount: 0,
|
||||
duration: raw.duration,
|
||||
hashtags: raw.hashtags || [],
|
||||
thumbnailUrl: raw.displayUrl,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -59,6 +61,7 @@ function normaliseYouTube(raw: RawYouTubeItem): Video | null {
|
|||
commentCount: raw.commentsCount || 0,
|
||||
shareCount: 0,
|
||||
saveCount: 0,
|
||||
thumbnailUrl: raw.thumbnailUrl,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -121,7 +124,7 @@ async function scrapeTikTok(brief: ClientBrief): Promise<Video[]> {
|
|||
const tag = rawHashtag.replace(/^#/, '');
|
||||
const items = await safeRunActor<RawTikTokItem>(
|
||||
ACTORS.TIKTOK_SCRAPER,
|
||||
{ hashtags: [tag], resultsPerPage: limits.resultsPerPage, shouldDownloadVideos: false },
|
||||
{ hashtags: [tag], resultsPerPage: limits.resultsPerPage, shouldDownloadVideos: false, oldestCreateTime: brief.dateRange.from },
|
||||
`TikTok hashtag: ${tag}`,
|
||||
);
|
||||
for (const item of items) { const v = normaliseTikTok(item); if (v) videos.push(v); }
|
||||
|
|
@ -148,7 +151,7 @@ async function scrapeInstagram(brief: ClientBrief): Promise<Video[]> {
|
|||
const tag = rawHashtag.replace(/^#/, '');
|
||||
const items = await safeRunActor<RawInstagramItem>(
|
||||
ACTORS.INSTAGRAM_HASHTAG,
|
||||
{ hashtags: [tag], resultsLimit: limits.resultsLimit },
|
||||
{ hashtags: [tag], resultsLimit: limits.resultsLimit, onlyPostsNewerThan: brief.dateRange.from },
|
||||
`Instagram hashtag: ${tag}`,
|
||||
);
|
||||
for (const item of items) { const v = normaliseInstagram(item); if (v) videos.push(v); }
|
||||
|
|
@ -175,7 +178,7 @@ async function scrapeYouTube(brief: ClientBrief): Promise<Video[]> {
|
|||
for (const query of queries) {
|
||||
const items = await safeRunActor<RawYouTubeItem>(
|
||||
ACTORS.YOUTUBE_SEARCH,
|
||||
{ searchQuery: query, maxResults: limits.maxResults },
|
||||
{ searchQuery: query, maxResults: limits.maxResults, uploadDate: 'month' },
|
||||
`YouTube search: ${query}`,
|
||||
);
|
||||
for (const item of items) { const v = normaliseYouTube(item); if (v) videos.push(v); }
|
||||
|
|
@ -213,7 +216,9 @@ export async function runStage3(brief: ClientBrief): Promise<StageResult<Discove
|
|||
}
|
||||
|
||||
// Filter last 30 days
|
||||
const preFilterCount = allVideos.length;
|
||||
allVideos = filterVideosLast30Days(allVideos, brief.dateRange);
|
||||
console.log(`[Stage 3] Date filter: ${brief.dateRange.from} to ${brief.dateRange.to} — kept ${allVideos.length} of ${preFilterCount} videos`);
|
||||
|
||||
// Update byPlatform with filtered videos
|
||||
for (const platform of brief.platforms) {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
// ─── Stage 5: Enrichment Scrape (Transcripts + Comments) ───
|
||||
// ─── Stage 5: Enrichment Scrape (Transcripts + Comments + Thumbnails) ───
|
||||
import { ClientBrief, TopVideosSelection, EnrichmentData, EnrichedVideo, Video, StageResult } from '../types-v2.js';
|
||||
import { runActor, ACTORS, getLimits } from '../apify.js';
|
||||
|
||||
const MAX_COMMENTS_PER_PLATFORM = 1000;
|
||||
const MAX_COMMENTS_PER_PLATFORM = 2000;
|
||||
|
||||
interface TranscriptResult {
|
||||
url?: string;
|
||||
|
|
@ -13,8 +13,10 @@ interface TranscriptResult {
|
|||
|
||||
interface CommentResult {
|
||||
videoUrl?: string;
|
||||
postUrl?: string;
|
||||
text?: string;
|
||||
comment?: string;
|
||||
commentText?: string;
|
||||
}
|
||||
|
||||
/** Safely run a single actor — logs and continues on failure */
|
||||
|
|
@ -99,8 +101,8 @@ async function fetchTikTokComments(urls: string[]): Promise<Map<string, string[]
|
|||
);
|
||||
|
||||
for (const item of items) {
|
||||
const url = item.videoUrl;
|
||||
const text = item.text || item.comment;
|
||||
const url = item.videoUrl || item.postUrl;
|
||||
const text = item.text || item.comment || item.commentText;
|
||||
if (url && text) {
|
||||
const existing = map.get(url) || [];
|
||||
existing.push(text);
|
||||
|
|
@ -110,6 +112,38 @@ async function fetchTikTokComments(urls: string[]): Promise<Map<string, string[]
|
|||
return map;
|
||||
}
|
||||
|
||||
// ─── Thumbnail Download ───
|
||||
|
||||
async function fetchThumbnailsAsBase64(
|
||||
videos: Video[],
|
||||
maxCount: number = 50,
|
||||
): Promise<Map<string, string>> {
|
||||
const map = new Map<string, string>();
|
||||
const candidates = videos
|
||||
.filter(v => v.thumbnailUrl)
|
||||
.sort((a, b) => (b.playCount || 0) - (a.playCount || 0))
|
||||
.slice(0, maxCount);
|
||||
|
||||
console.log(`[Stage 5] Downloading ${candidates.length} thumbnails...`);
|
||||
let downloaded = 0;
|
||||
|
||||
for (const v of candidates) {
|
||||
try {
|
||||
const res = await fetch(v.thumbnailUrl!);
|
||||
if (!res.ok) continue;
|
||||
const buffer = await res.arrayBuffer();
|
||||
const contentType = res.headers.get('content-type') || 'image/jpeg';
|
||||
const base64 = `data:${contentType};base64,${Buffer.from(buffer).toString('base64')}`;
|
||||
map.set(v.url, base64);
|
||||
downloaded++;
|
||||
} catch {
|
||||
// Non-fatal — skip failed thumbnails
|
||||
}
|
||||
}
|
||||
console.log(`[Stage 5] Downloaded ${downloaded} / ${candidates.length} thumbnails`);
|
||||
return map;
|
||||
}
|
||||
|
||||
export async function runStage5(
|
||||
selection: TopVideosSelection,
|
||||
brief: ClientBrief,
|
||||
|
|
@ -128,6 +162,9 @@ export async function runStage5(
|
|||
const youtubeTranscripts = await fetchYouTubeTranscripts(youtubeUrls);
|
||||
const tiktokComments = await fetchTikTokComments(tiktokUrls);
|
||||
|
||||
// Download thumbnails (plain HTTP, no Apify cost)
|
||||
const thumbnailMap = await fetchThumbnailsAsBase64(selection.videos, 50);
|
||||
|
||||
// Merge all transcript maps
|
||||
const allTranscripts = new Map<string, string>();
|
||||
for (const [k, v] of tiktokTranscripts) allTranscripts.set(k, v);
|
||||
|
|
@ -139,14 +176,20 @@ export async function runStage5(
|
|||
...v,
|
||||
transcript: allTranscripts.get(v.url) || null,
|
||||
comments: tiktokComments.get(v.url) || [],
|
||||
thumbnailBase64: thumbnailMap.get(v.url),
|
||||
}));
|
||||
|
||||
const transcriptCount = enriched.filter(v => v.transcript).length;
|
||||
const commentCount = enriched.reduce((sum, v) => sum + v.comments.length, 0);
|
||||
|
||||
// Convert thumbnailMap to plain object for serialization
|
||||
const thumbnailObj: Record<string, string> = {};
|
||||
for (const [k, v] of thumbnailMap) thumbnailObj[k] = v;
|
||||
|
||||
console.log(`[Stage 5] Enrichment complete:`);
|
||||
console.log(` Transcripts: ${transcriptCount} / ${enriched.length}`);
|
||||
console.log(` Comments: ${commentCount}`);
|
||||
console.log(` Thumbnails: ${thumbnailMap.size}`);
|
||||
|
||||
return {
|
||||
stage: 5,
|
||||
|
|
@ -155,6 +198,7 @@ export async function runStage5(
|
|||
videos: enriched,
|
||||
transcriptCount,
|
||||
commentCount,
|
||||
thumbnailMap: thumbnailObj,
|
||||
},
|
||||
duration: Date.now() - start,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,15 +1,102 @@
|
|||
// ─── Stage 8: Final Report Generation (Opus) ───
|
||||
import {
|
||||
ClientBrief, EnrichmentData, DeskResearchSource, AgentReview,
|
||||
TopVideosSelection, FinalReport, ReportJSON, StageResult,
|
||||
ClientBrief, EnrichmentData, AgentReview,
|
||||
TopVideosSelection, FinalReport, ReportJSON, VisualCode, StageResult,
|
||||
} from '../types-v2.js';
|
||||
import { callClaudeJSON } from '../claude-cli.js';
|
||||
import { callClaudeJSON, callClaudeVision } from '../claude-cli.js';
|
||||
import { buildMarkdown } from '../html-report.js';
|
||||
import { generateHtmlReport } from '../html-report.js';
|
||||
|
||||
// ─── Visual Language Analysis ───
|
||||
|
||||
async function analyseVisualLanguage(
|
||||
enrichment: EnrichmentData,
|
||||
): Promise<VisualCode[]> {
|
||||
const thumbnailMap = enrichment.thumbnailMap || {};
|
||||
const entries = Object.entries(thumbnailMap);
|
||||
if (entries.length < 5) {
|
||||
console.log(`[Stage 8] Skipping visual analysis — only ${entries.length} thumbnails available`);
|
||||
return [];
|
||||
}
|
||||
|
||||
console.log(`[Stage 8] Analysing visual language from ${entries.length} thumbnails...`);
|
||||
|
||||
// Build lookup: url -> video info
|
||||
const videoLookup = new Map(enrichment.videos.map(v => [v.url, v]));
|
||||
|
||||
// Take top 50, split into 5 batches of 10
|
||||
const top50 = entries.slice(0, 50);
|
||||
const batchSize = 10;
|
||||
const batchResults: string[] = [];
|
||||
|
||||
for (let i = 0; i < top50.length; i += batchSize) {
|
||||
const batch = top50.slice(i, i + batchSize);
|
||||
const images = batch.map(([_, b64]) => b64);
|
||||
const batchNum = Math.floor(i / batchSize) + 1;
|
||||
|
||||
const prompt = `You are analysing ${images.length} video thumbnails from a social media category. For each thumbnail, describe:
|
||||
1. Colour palette and dominant colours
|
||||
2. Composition (close-up face, full body, flat lay, text-heavy, etc.)
|
||||
3. Text overlays (if any) — font style, positioning
|
||||
4. Facial expressions and body language
|
||||
5. Setting/environment
|
||||
6. Any recurring visual motifs
|
||||
|
||||
Then identify 2-3 visual PATTERNS you see across multiple thumbnails in this batch. Be specific and concrete.`;
|
||||
|
||||
try {
|
||||
const result = await callClaudeVision(images, prompt, 'claude-sonnet-4-6');
|
||||
batchResults.push(result.text);
|
||||
console.log(`[Stage 8] Visual batch ${batchNum} complete`);
|
||||
} catch (err) {
|
||||
console.warn(`[Stage 8] Visual batch ${batchNum} failed: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!batchResults.length) return [];
|
||||
|
||||
// Synthesis: merge batch results into visual codes
|
||||
const synthesisPrompt = `You analysed video thumbnails from a social media category in batches. Here are the batch-by-batch findings:
|
||||
|
||||
${batchResults.map((r, i) => `--- BATCH ${i + 1} ---\n${r}`).join('\n\n')}
|
||||
|
||||
Synthesise these observations into exactly 5-6 VISUAL CODES — recurring visual patterns that define this category's visual language. Each visual code should be a specific, named pattern (e.g. "The Bare-Face Close-Up", "Pastel Flat Lay", "Text-First Controversy Hook").
|
||||
|
||||
Return JSON array:
|
||||
[
|
||||
{
|
||||
"name": "Visual Code Name",
|
||||
"description": "2-3 sentences describing the visual pattern — what it looks like, why creators use it, what emotion it conveys",
|
||||
"frequency": "Seen in X of Y thumbnails analysed"
|
||||
}
|
||||
]`;
|
||||
|
||||
try {
|
||||
const codes = await callClaudeJSON<VisualCode[]>(synthesisPrompt, 'claude-sonnet-4-6');
|
||||
|
||||
// Attach example videos to each code (pick first video with a thumbnail)
|
||||
for (const code of codes) {
|
||||
if (!code.exampleVideoUrl) {
|
||||
const entry = top50[0];
|
||||
if (entry) {
|
||||
const video = videoLookup.get(entry[0]);
|
||||
code.exampleVideoUrl = entry[0];
|
||||
code.exampleAuthor = video?.author || '';
|
||||
code.examplePlays = video?.playCount || 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[Stage 8] Visual analysis complete: ${codes.length} visual codes`);
|
||||
return codes;
|
||||
} catch (err) {
|
||||
console.warn(`[Stage 8] Visual synthesis failed: ${(err as Error).message}`);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function buildReportPrompt(
|
||||
enrichment: EnrichmentData,
|
||||
deskSources: DeskResearchSource[],
|
||||
agentReviews: AgentReview[],
|
||||
selection: TopVideosSelection,
|
||||
brief: ClientBrief,
|
||||
|
|
@ -32,9 +119,6 @@ Comments: ${comments}`;
|
|||
// Agent hypotheses
|
||||
const hypotheses = selection.hypotheses.join('\n- ');
|
||||
|
||||
// Desk sources summary
|
||||
const sourcesSummary = deskSources.map(s => `- ${s.title}: ${s.summary}`).join('\n');
|
||||
|
||||
return `You are generating a social listening report for ${brief.clientName} in the ${brief.category} category.
|
||||
|
||||
DATE RANGE: ${brief.dateRange.from} to ${brief.dateRange.to}
|
||||
|
|
@ -49,20 +133,24 @@ ${urlIndex}
|
|||
STRATEGIST HYPOTHESES:
|
||||
- ${hypotheses}
|
||||
|
||||
DESK RESEARCH SOURCES:
|
||||
${sourcesSummary}
|
||||
|
||||
HARD RULES:
|
||||
- Every topVideoUrl MUST be an exact URL from the VIDEO URL INDEX above
|
||||
- Every topVideoPlays MUST exactly match the plays number from the index
|
||||
- Never describe influencer content as organic unless proven — default assumption for branded creator content = paid
|
||||
- Each trend/insight/opportunity must be GENUINELY DISTINCT — no duplication disguised with different words
|
||||
- Creator Spotlight requires 2+ videos with strong engagement, not single viral hits
|
||||
- Trends must be timely (last 30 days), not evergreen observations
|
||||
- TIMELINESS IS CRITICAL: Every trend must be anchored to specific videos from the last 30 days. Do NOT include evergreen observations like "authenticity matters" or "short-form video is growing". If a trend could have been written 6 months ago, it is NOT a trend — it is a category norm. Focus on what is NEW, surprising, or accelerating in the data window ${brief.dateRange.from} to ${brief.dateRange.to}. Name specific creators, specific videos, specific moments.
|
||||
- AUDIENCE INSIGHTS must prioritize comment text over video metadata. Mine the Comments fields for actual audience language — confessions, questions, debates, purchase-intent signals, requests. Each exampleQuote MUST be a real comment from the corpus, not a caption or description. If comments are available, insights should read like community analysis, not metadata summaries.
|
||||
- Each trend MUST include 2-3 supportingVideos from the VIDEO URL INDEX — these will be embedded in the report
|
||||
- supportingVideos should include the platform field matching [tiktok|instagram|youtube] from the index
|
||||
- 7-12 trends, exactly 6 audience insights, 7 content opportunities, 1-2 creator spotlights
|
||||
|
||||
CREATOR SPOTLIGHT SELECTION:
|
||||
- Only consider creators with 2-10 videos in the corpus
|
||||
- EXCLUDE any creator whose videos make up more than 50% of the total dataset — that is category domination, not a discovery
|
||||
- Score each eligible creator: score = avg_likes_per_video × num_videos × engagement_rate (where engagement_rate = (likes + comments + shares) / plays)
|
||||
- Select the top 1-2 creators by this score
|
||||
- The spotlight should surface mid-tier creators who consistently resonate, not mega-influencers who are already obvious
|
||||
|
||||
Return this EXACT JSON structure:
|
||||
{
|
||||
"executiveSummary": "3-4 paragraph narrative overview of the category landscape",
|
||||
|
|
@ -107,13 +195,13 @@ Return this EXACT JSON structure:
|
|||
"keyVideos": [{"url": "EXACT url", "description": "Brief desc", "plays": 12345}],
|
||||
"growthSignal": "Trajectory indicator"
|
||||
}
|
||||
]
|
||||
],
|
||||
"pullquotes": ["3-4 sharp, quotable one-liners that summarize key findings. Editorial in tone — pithy, insight-driven sentences a reader would want to screenshot. These will be displayed as visual dividers between report sections."]
|
||||
}`;
|
||||
}
|
||||
|
||||
export async function runStage8(
|
||||
enrichment: EnrichmentData,
|
||||
deskSources: DeskResearchSource[],
|
||||
agentReviews: AgentReview[],
|
||||
selection: TopVideosSelection,
|
||||
brief: ClientBrief,
|
||||
|
|
@ -121,25 +209,28 @@ export async function runStage8(
|
|||
const start = Date.now();
|
||||
console.log('[Stage 8] Generating final report via Claude Opus...');
|
||||
|
||||
const prompt = buildReportPrompt(enrichment, deskSources, agentReviews, selection, brief);
|
||||
// Run visual language analysis (before main report)
|
||||
const visualCodes = await analyseVisualLanguage(enrichment);
|
||||
|
||||
const prompt = buildReportPrompt(enrichment, agentReviews, selection, brief);
|
||||
|
||||
const reportJSON = await callClaudeJSON<ReportJSON>(prompt, 'claude-opus-4-6', {
|
||||
timeout: 600_000, // 10 min
|
||||
});
|
||||
|
||||
// Attach desk sources
|
||||
reportJSON.deskSources = deskSources;
|
||||
reportJSON.deskSources = [];
|
||||
reportJSON.visualCodes = visualCodes;
|
||||
|
||||
const stats = {
|
||||
videosScraped: enrichment.videos.length,
|
||||
commentsAnalysed: enrichment.commentCount,
|
||||
transcriptsDownloaded: enrichment.transcriptCount,
|
||||
deskSources: deskSources.length,
|
||||
deskSources: 0,
|
||||
};
|
||||
|
||||
// Build outputs
|
||||
const markdown = buildMarkdown(reportJSON, brief, stats);
|
||||
const html = generateHtmlReport(reportJSON, brief, stats);
|
||||
const html = generateHtmlReport(reportJSON, brief, stats, enrichment.thumbnailMap);
|
||||
|
||||
const finalReport: FinalReport = {
|
||||
...reportJSON,
|
||||
|
|
|
|||
|
|
@ -34,11 +34,13 @@ export interface Video {
|
|||
duration?: number;
|
||||
hashtags?: string[];
|
||||
engagementScore?: number;
|
||||
thumbnailUrl?: string;
|
||||
}
|
||||
|
||||
export interface EnrichedVideo extends Video {
|
||||
transcript: string | null;
|
||||
comments: string[];
|
||||
thumbnailBase64?: string;
|
||||
}
|
||||
|
||||
export interface AgentReview {
|
||||
|
|
@ -76,6 +78,7 @@ export interface EnrichmentData {
|
|||
videos: EnrichedVideo[];
|
||||
transcriptCount: number;
|
||||
commentCount: number;
|
||||
thumbnailMap?: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface PreReportReview {
|
||||
|
|
@ -136,6 +139,15 @@ export interface CreatorSpotlight {
|
|||
growthSignal: string;
|
||||
}
|
||||
|
||||
export interface VisualCode {
|
||||
name: string;
|
||||
description: string;
|
||||
frequency: string;
|
||||
exampleVideoUrl: string;
|
||||
exampleAuthor: string;
|
||||
examplePlays: number;
|
||||
}
|
||||
|
||||
export interface ReportJSON {
|
||||
executiveSummary: string;
|
||||
trends: Trend[];
|
||||
|
|
@ -143,6 +155,8 @@ export interface ReportJSON {
|
|||
contentOpportunities: ContentOpportunity[];
|
||||
creatorSpotlight: CreatorSpotlight[];
|
||||
deskSources: DeskResearchSource[];
|
||||
pullquotes?: string[];
|
||||
visualCodes?: VisualCode[];
|
||||
}
|
||||
|
||||
export interface FinalReport extends ReportJSON {
|
||||
|
|
@ -190,7 +204,7 @@ export interface RawTikTokItem {
|
|||
commentCount?: number;
|
||||
shareCount?: number;
|
||||
collectCount?: number;
|
||||
videoMeta?: { duration?: number };
|
||||
videoMeta?: { duration?: number; coverUrl?: string };
|
||||
hashtags?: { name: string }[];
|
||||
}
|
||||
|
||||
|
|
@ -207,6 +221,7 @@ export interface RawInstagramItem {
|
|||
commentsCount?: number;
|
||||
duration?: number;
|
||||
hashtags?: string[];
|
||||
displayUrl?: string;
|
||||
}
|
||||
|
||||
export interface RawYouTubeItem {
|
||||
|
|
@ -218,4 +233,5 @@ export interface RawYouTubeItem {
|
|||
viewCount?: number;
|
||||
likes?: number;
|
||||
commentsCount?: number;
|
||||
thumbnailUrl?: string;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue