diff --git a/v2/pipeline/stages/stage_2_pass1_scrape.ts b/v2/pipeline/stages/stage_2_pass1_scrape.ts index 0753978..93908a5 100644 --- a/v2/pipeline/stages/stage_2_pass1_scrape.ts +++ b/v2/pipeline/stages/stage_2_pass1_scrape.ts @@ -208,6 +208,15 @@ export async function runStage2Pass1Scrape(args: Stage2Args): Promise<{ ok: true // overshoot it slightly. Worth ~$3 of overshoot to save 10+ minutes. const PASS1_PARALLEL = 4; + // Apify rejects "UK" — its allowed-list uses ISO codes, where Britain is "GB". + // Briefs created before the schema normalisation in v2/server/schemas/brief.ts + // (or any other "colloquial" alpha-2 collision) need this safety net or every + // hashtag/search seed 400s with "Field input.proxyCountryCode must be equal..." + const proxyCountry = (() => { + const upper = (brief.geo || '').trim().toUpperCase(); + return upper === 'UK' ? 'GB' : upper; + })(); + function buildActorInput(job: ScrapeJob): { actor: string; input: Record; label: string } { const label = job.kind === 'hashtag' ? `hashtag:${job.tag} (${job.tier})` @@ -223,7 +232,7 @@ export async function runStage2Pass1Scrape(args: Stage2Args): Promise<{ ok: true resultsPerPage: limits.resultsPerPage, shouldDownloadVideos: process.env.DISABLE_VIDEO_DOWNLOADS !== 'true', shouldDownloadCovers: true, - proxyCountryCode: brief.geo, + proxyCountryCode: proxyCountry, minPlayCount: brief.min_plays, }, }; @@ -248,7 +257,7 @@ export async function runStage2Pass1Scrape(args: Stage2Args): Promise<{ ok: true resultsPerPage: limits.resultsPerPage, shouldDownloadVideos: process.env.DISABLE_VIDEO_DOWNLOADS !== 'true', shouldDownloadCovers: true, - proxyCountryCode: brief.geo, + proxyCountryCode: proxyCountry, minPlayCount: brief.min_plays, }, }; diff --git a/v2/pipeline/stages/stage_8_trends.ts b/v2/pipeline/stages/stage_8_trends.ts index 3068b22..e5c125f 100644 --- a/v2/pipeline/stages/stage_8_trends.ts +++ b/v2/pipeline/stages/stage_8_trends.ts @@ -223,6 +223,21 @@ export async function runStage8Trends(reportId: string, brief: BriefInput): Prom // Load per-video analyses + pass1 for KPI computation const analyses = loadAnalyses(reportId); + + // Trend synthesis schema requires ≥5 supporting videos per trend. If the + // analysis pool is smaller than that, the rubric literally cannot satisfy + // the schema — Claude will return whatever it can and Zod throws an + // unactionable wall of "Array must contain at least 5 element(s)" errors + // that points the user at "fix the data shape" rather than "your dataset + // is too small". Fail loudly and clearly here. + if (analyses.size < MIN_SUPPORTING) { + throw new Error( + `Stage 8: only ${analyses.size} videos analysed but trend synthesis needs at least ${MIN_SUPPORTING} per trend. ` + + `This usually means Stage 2 (broad scrape) returned too few videos — check the brief: ` + + `lower min_likes/min_plays, broaden seed hashtags, raise budget_usd, or verify geo is a valid ISO code (e.g. "GB" not "UK"). ` + + `Then click Force re-run.`, + ); + } const pass1Path = PATHS.pass1Videos(reportId); type LiteMeta = { plays: number; likes: number; saves: number; comments_count: number; shares: number; stl_pct: number; handle: string }; const pass1Lite = new Map(); diff --git a/v2/server/schemas/brief.ts b/v2/server/schemas/brief.ts index b68ae64..7b218bf 100644 --- a/v2/server/schemas/brief.ts +++ b/v2/server/schemas/brief.ts @@ -27,7 +27,14 @@ export const BRIEF_INPUT = z.object({ }), competitors: z.array(COMPETITOR).min(3).max(15), audience: AUDIENCE, - geo: z.string().min(2), + // Apify's TikTok scraper requires ISO 3166-1 alpha-2 codes for proxyCountryCode. + // The most common gotcha is "UK" — colloquial but invalid; the ISO code is "GB". + // We normalise on input so the brief form is forgiving but the wire payload is correct. + geo: z.string().min(2).transform((v) => { + const upper = v.trim().toUpperCase(); + if (upper === 'UK') return 'GB'; + return upper; + }), language: z.string().default('en'), business_question: z.string().refine( (v) => v.split(/\s+/).filter(Boolean).length >= 8,