Fix UK→GB geo normalisation + clear Stage 8 too-few-videos error
The Dove2 run on prod failed because every hashtag/search seed 400'd
with "Field input.proxyCountryCode must be equal to one of the allowed
values" — Apify uses ISO codes ("GB"), not the colloquial "UK" stored
on the brief. Only profile scrapes (which don't pass proxyCountryCode)
got through, leaving 24 videos and a 16% manifest gate.
Two layers of fix:
- Brief Zod schema transforms geo: trims, uppercases, maps "UK" → "GB".
All briefs created or edited from now on are normalised at the form
boundary.
- Stage 2 also normalises at actor-input time, as belt-and-braces for
briefs already in the DB with "UK" written before this commit.
Plus a clear pre-flight error in Stage 8: when fewer than 5 videos made
it through analysis the trends schema literally can't be satisfied (each
trend needs ≥5 supporting_video_ids). Previously Claude tried, Zod
rejected with a 50-line "too_small" wall, and the operator was left
guessing. Now we throw a single sentence pointing at the actual cause:
the dataset is too small — adjust the brief and force re-run.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
404425a06e
commit
821d9cbc45
3 changed files with 34 additions and 3 deletions
|
|
@ -208,6 +208,15 @@ export async function runStage2Pass1Scrape(args: Stage2Args): Promise<{ ok: true
|
|||
// overshoot it slightly. Worth ~$3 of overshoot to save 10+ minutes.
|
||||
const PASS1_PARALLEL = 4;
|
||||
|
||||
// Apify rejects "UK" — its allowed-list uses ISO codes, where Britain is "GB".
|
||||
// Briefs created before the schema normalisation in v2/server/schemas/brief.ts
|
||||
// (or any other "colloquial" alpha-2 collision) need this safety net or every
|
||||
// hashtag/search seed 400s with "Field input.proxyCountryCode must be equal..."
|
||||
const proxyCountry = (() => {
|
||||
const upper = (brief.geo || '').trim().toUpperCase();
|
||||
return upper === 'UK' ? 'GB' : upper;
|
||||
})();
|
||||
|
||||
function buildActorInput(job: ScrapeJob): { actor: string; input: Record<string, unknown>; label: string } {
|
||||
const label = job.kind === 'hashtag'
|
||||
? `hashtag:${job.tag} (${job.tier})`
|
||||
|
|
@ -223,7 +232,7 @@ export async function runStage2Pass1Scrape(args: Stage2Args): Promise<{ ok: true
|
|||
resultsPerPage: limits.resultsPerPage,
|
||||
shouldDownloadVideos: process.env.DISABLE_VIDEO_DOWNLOADS !== 'true',
|
||||
shouldDownloadCovers: true,
|
||||
proxyCountryCode: brief.geo,
|
||||
proxyCountryCode: proxyCountry,
|
||||
minPlayCount: brief.min_plays,
|
||||
},
|
||||
};
|
||||
|
|
@ -248,7 +257,7 @@ export async function runStage2Pass1Scrape(args: Stage2Args): Promise<{ ok: true
|
|||
resultsPerPage: limits.resultsPerPage,
|
||||
shouldDownloadVideos: process.env.DISABLE_VIDEO_DOWNLOADS !== 'true',
|
||||
shouldDownloadCovers: true,
|
||||
proxyCountryCode: brief.geo,
|
||||
proxyCountryCode: proxyCountry,
|
||||
minPlayCount: brief.min_plays,
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -223,6 +223,21 @@ export async function runStage8Trends(reportId: string, brief: BriefInput): Prom
|
|||
|
||||
// Load per-video analyses + pass1 for KPI computation
|
||||
const analyses = loadAnalyses(reportId);
|
||||
|
||||
// Trend synthesis schema requires ≥5 supporting videos per trend. If the
|
||||
// analysis pool is smaller than that, the rubric literally cannot satisfy
|
||||
// the schema — Claude will return whatever it can and Zod throws an
|
||||
// unactionable wall of "Array must contain at least 5 element(s)" errors
|
||||
// that points the user at "fix the data shape" rather than "your dataset
|
||||
// is too small". Fail loudly and clearly here.
|
||||
if (analyses.size < MIN_SUPPORTING) {
|
||||
throw new Error(
|
||||
`Stage 8: only ${analyses.size} videos analysed but trend synthesis needs at least ${MIN_SUPPORTING} per trend. ` +
|
||||
`This usually means Stage 2 (broad scrape) returned too few videos — check the brief: ` +
|
||||
`lower min_likes/min_plays, broaden seed hashtags, raise budget_usd, or verify geo is a valid ISO code (e.g. "GB" not "UK"). ` +
|
||||
`Then click Force re-run.`,
|
||||
);
|
||||
}
|
||||
const pass1Path = PATHS.pass1Videos(reportId);
|
||||
type LiteMeta = { plays: number; likes: number; saves: number; comments_count: number; shares: number; stl_pct: number; handle: string };
|
||||
const pass1Lite = new Map<string, LiteMeta>();
|
||||
|
|
|
|||
|
|
@ -27,7 +27,14 @@ export const BRIEF_INPUT = z.object({
|
|||
}),
|
||||
competitors: z.array(COMPETITOR).min(3).max(15),
|
||||
audience: AUDIENCE,
|
||||
geo: z.string().min(2),
|
||||
// Apify's TikTok scraper requires ISO 3166-1 alpha-2 codes for proxyCountryCode.
|
||||
// The most common gotcha is "UK" — colloquial but invalid; the ISO code is "GB".
|
||||
// We normalise on input so the brief form is forgiving but the wire payload is correct.
|
||||
geo: z.string().min(2).transform((v) => {
|
||||
const upper = v.trim().toUpperCase();
|
||||
if (upper === 'UK') return 'GB';
|
||||
return upper;
|
||||
}),
|
||||
language: z.string().default('en'),
|
||||
business_question: z.string().refine(
|
||||
(v) => v.split(/\s+/).filter(Boolean).length >= 8,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue