From 821d9cbc45bbd017a742b423fbb89cf2899bb68a Mon Sep 17 00:00:00 2001 From: DJP Date: Thu, 30 Apr 2026 08:49:03 -0400 Subject: [PATCH] =?UTF-8?q?Fix=20UK=E2=86=92GB=20geo=20normalisation=20+?= =?UTF-8?q?=20clear=20Stage=208=20too-few-videos=20error?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Dove2 run on prod failed because every hashtag/search seed 400'd with "Field input.proxyCountryCode must be equal to one of the allowed values" — Apify uses ISO codes ("GB"), not the colloquial "UK" stored on the brief. Only profile scrapes (which don't pass proxyCountryCode) got through, leaving 24 videos and a 16% manifest gate. Two layers of fix: - Brief Zod schema transforms geo: trims, uppercases, maps "UK" → "GB". All briefs created or edited from now on are normalised at the form boundary. - Stage 2 also normalises at actor-input time, as belt-and-braces for briefs already in the DB with "UK" written before this commit. Plus a clear pre-flight error in Stage 8: when fewer than 5 videos made it through analysis the trends schema literally can't be satisfied (each trend needs ≥5 supporting_video_ids). Previously Claude tried, Zod rejected with a 50-line "too_small" wall, and the operator was left guessing. Now we throw a single sentence pointing at the actual cause: the dataset is too small — adjust the brief and force re-run. Co-Authored-By: Claude Opus 4.7 (1M context) --- v2/pipeline/stages/stage_2_pass1_scrape.ts | 13 +++++++++++-- v2/pipeline/stages/stage_8_trends.ts | 15 +++++++++++++++ v2/server/schemas/brief.ts | 9 ++++++++- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/v2/pipeline/stages/stage_2_pass1_scrape.ts b/v2/pipeline/stages/stage_2_pass1_scrape.ts index 0753978..93908a5 100644 --- a/v2/pipeline/stages/stage_2_pass1_scrape.ts +++ b/v2/pipeline/stages/stage_2_pass1_scrape.ts @@ -208,6 +208,15 @@ export async function runStage2Pass1Scrape(args: Stage2Args): Promise<{ ok: true // overshoot it slightly. Worth ~$3 of overshoot to save 10+ minutes. const PASS1_PARALLEL = 4; + // Apify rejects "UK" — its allowed-list uses ISO codes, where Britain is "GB". + // Briefs created before the schema normalisation in v2/server/schemas/brief.ts + // (or any other "colloquial" alpha-2 collision) need this safety net or every + // hashtag/search seed 400s with "Field input.proxyCountryCode must be equal..." + const proxyCountry = (() => { + const upper = (brief.geo || '').trim().toUpperCase(); + return upper === 'UK' ? 'GB' : upper; + })(); + function buildActorInput(job: ScrapeJob): { actor: string; input: Record; label: string } { const label = job.kind === 'hashtag' ? `hashtag:${job.tag} (${job.tier})` @@ -223,7 +232,7 @@ export async function runStage2Pass1Scrape(args: Stage2Args): Promise<{ ok: true resultsPerPage: limits.resultsPerPage, shouldDownloadVideos: process.env.DISABLE_VIDEO_DOWNLOADS !== 'true', shouldDownloadCovers: true, - proxyCountryCode: brief.geo, + proxyCountryCode: proxyCountry, minPlayCount: brief.min_plays, }, }; @@ -248,7 +257,7 @@ export async function runStage2Pass1Scrape(args: Stage2Args): Promise<{ ok: true resultsPerPage: limits.resultsPerPage, shouldDownloadVideos: process.env.DISABLE_VIDEO_DOWNLOADS !== 'true', shouldDownloadCovers: true, - proxyCountryCode: brief.geo, + proxyCountryCode: proxyCountry, minPlayCount: brief.min_plays, }, }; diff --git a/v2/pipeline/stages/stage_8_trends.ts b/v2/pipeline/stages/stage_8_trends.ts index 3068b22..e5c125f 100644 --- a/v2/pipeline/stages/stage_8_trends.ts +++ b/v2/pipeline/stages/stage_8_trends.ts @@ -223,6 +223,21 @@ export async function runStage8Trends(reportId: string, brief: BriefInput): Prom // Load per-video analyses + pass1 for KPI computation const analyses = loadAnalyses(reportId); + + // Trend synthesis schema requires ≥5 supporting videos per trend. If the + // analysis pool is smaller than that, the rubric literally cannot satisfy + // the schema — Claude will return whatever it can and Zod throws an + // unactionable wall of "Array must contain at least 5 element(s)" errors + // that points the user at "fix the data shape" rather than "your dataset + // is too small". Fail loudly and clearly here. + if (analyses.size < MIN_SUPPORTING) { + throw new Error( + `Stage 8: only ${analyses.size} videos analysed but trend synthesis needs at least ${MIN_SUPPORTING} per trend. ` + + `This usually means Stage 2 (broad scrape) returned too few videos — check the brief: ` + + `lower min_likes/min_plays, broaden seed hashtags, raise budget_usd, or verify geo is a valid ISO code (e.g. "GB" not "UK"). ` + + `Then click Force re-run.`, + ); + } const pass1Path = PATHS.pass1Videos(reportId); type LiteMeta = { plays: number; likes: number; saves: number; comments_count: number; shares: number; stl_pct: number; handle: string }; const pass1Lite = new Map(); diff --git a/v2/server/schemas/brief.ts b/v2/server/schemas/brief.ts index b68ae64..7b218bf 100644 --- a/v2/server/schemas/brief.ts +++ b/v2/server/schemas/brief.ts @@ -27,7 +27,14 @@ export const BRIEF_INPUT = z.object({ }), competitors: z.array(COMPETITOR).min(3).max(15), audience: AUDIENCE, - geo: z.string().min(2), + // Apify's TikTok scraper requires ISO 3166-1 alpha-2 codes for proxyCountryCode. + // The most common gotcha is "UK" — colloquial but invalid; the ISO code is "GB". + // We normalise on input so the brief form is forgiving but the wire payload is correct. + geo: z.string().min(2).transform((v) => { + const upper = v.trim().toUpperCase(); + if (upper === 'UK') return 'GB'; + return upper; + }), language: z.string().default('en'), business_question: z.string().refine( (v) => v.split(/\s+/).filter(Boolean).length >= 8,