social-reporting-tool/v2/db/init.sql
DJP 3e71df8a79 Lower default engagement floor 10×; richer Stage 3 diagnostic
The 1000 likes / 10000 plays defaults are calibrated for top-of-funnel
beauty/fitness scrapes; in narrower TikTok niches almost every video lands
below them and Stage 2 returns 0 keepers. Defaults dropped to 100 likes /
1000 plays across:
- server/schemas/brief.ts (Zod default)
- db/init.sql (column default for new DBs)
- examples/dove-demo-brief.json
- operator-app's brief-form initial values
- operator-app's "Load Dove demo" inline brief

Stage 3 empty-pass1 error now reads pass1/spend_log.json and reports the
actual scrape breakdown — total $ spent, total raw videos returned, and
how many got dropped by each floor (zero-plays / min_plays / min_likes /
min_stl_pct). So instead of a generic "lower the floor", the user sees:
"Spent $5.42 across 7 scrapes; 1400 videos returned. Dropped: 12 zero-plays,
1305 below min_plays=10000, 31 below min_likes=1000."

Existing briefs are unaffected (column default applies to NEW rows). For the
in-flight Dove2 run the user can edit the brief and lower the floor, then
click Retry pipeline.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:28:23 -04:00

214 lines
8.7 KiB
SQL

-- Social Reporting V2 — fresh schema
-- Coexists with V1 in a separate database (`social_reporting_v2`).
-- Forward-only migrations under v2/db/migrations/.
CREATE EXTENSION IF NOT EXISTS "pgcrypto"; -- gen_random_uuid()
CREATE EXTENSION IF NOT EXISTS "citext"; -- case-insensitive email
-- ─── Identity ───────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
azure_oid TEXT UNIQUE NOT NULL,
email CITEXT UNIQUE NOT NULL,
display_name TEXT NOT NULL,
is_super_admin BOOLEAN NOT NULL DEFAULT FALSE,
password_hash TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_login_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS idx_users_azure_oid ON users(azure_oid);
CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);
CREATE TABLE IF NOT EXISTS teams (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
slug TEXT UNIQUE NOT NULL,
name TEXT NOT NULL,
is_personal BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
DO $$ BEGIN
CREATE TYPE team_role AS ENUM ('owner','admin','editor','viewer');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE TABLE IF NOT EXISTS team_memberships (
team_id UUID NOT NULL REFERENCES teams(id) ON DELETE CASCADE,
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
role team_role NOT NULL,
added_by UUID REFERENCES users(id),
added_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (team_id, user_id)
);
CREATE INDEX IF NOT EXISTS idx_memberships_user ON team_memberships(user_id);
-- ─── Briefs / Reports ───────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS briefs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
team_id UUID NOT NULL REFERENCES teams(id) ON DELETE RESTRICT,
owner_id UUID NOT NULL REFERENCES users(id) ON DELETE RESTRICT,
slug TEXT NOT NULL,
client_name TEXT NOT NULL,
category TEXT NOT NULL,
business_question TEXT NOT NULL,
date_window_days INTEGER NOT NULL DEFAULT 30,
budget_usd NUMERIC(10,2) NOT NULL,
platforms TEXT[] NOT NULL DEFAULT ARRAY['tiktok'],
positioning JSONB,
kpis JSONB,
context_vision TEXT,
-- Hashtag engagement floor (the V2 quality knob).
-- Defaults calibrated for niche-category scrapes; raise for broader categories.
min_likes INTEGER NOT NULL DEFAULT 100,
min_plays INTEGER NOT NULL DEFAULT 1000,
min_stl_pct NUMERIC(5,2) NOT NULL DEFAULT 0,
prior_report_id UUID,
brief_yaml JSONB NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (team_id, slug)
);
CREATE INDEX IF NOT EXISTS idx_briefs_team ON briefs(team_id);
CREATE INDEX IF NOT EXISTS idx_briefs_owner ON briefs(owner_id);
DO $$ BEGIN
CREATE TYPE report_status AS ENUM (
'pending','seeds','pass1','select','pass2','validate',
'analyse','insights','trends','qa','build','completed','failed'
);
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE TABLE IF NOT EXISTS reports (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
brief_id UUID NOT NULL REFERENCES briefs(id) ON DELETE CASCADE,
team_id UUID NOT NULL REFERENCES teams(id),
triggered_by UUID NOT NULL REFERENCES users(id),
status report_status NOT NULL DEFAULT 'pending',
current_stage INTEGER NOT NULL DEFAULT 0,
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
finished_at TIMESTAMPTZ,
apify_cost_usd NUMERIC(10,4) NOT NULL DEFAULT 0,
claude_cost_usd NUMERIC(10,4) NOT NULL DEFAULT 0,
total_cost_usd NUMERIC(10,4) NOT NULL DEFAULT 0,
fs_root TEXT NOT NULL,
manifest_passed_at TIMESTAMPTZ,
error_message TEXT
);
CREATE INDEX IF NOT EXISTS idx_reports_team ON reports(team_id, started_at DESC);
CREATE INDEX IF NOT EXISTS idx_reports_brief ON reports(brief_id, started_at DESC);
ALTER TABLE briefs
ADD CONSTRAINT briefs_prior_report_fk
FOREIGN KEY (prior_report_id) REFERENCES reports(id) ON DELETE SET NULL
DEFERRABLE INITIALLY DEFERRED;
CREATE TABLE IF NOT EXISTS cost_events (
id BIGSERIAL PRIMARY KEY,
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
stage INTEGER NOT NULL,
stage_name TEXT NOT NULL,
source TEXT NOT NULL CHECK (source IN ('claude','apify')),
label TEXT NOT NULL,
model TEXT,
input_tokens INTEGER NOT NULL DEFAULT 0,
output_tokens INTEGER NOT NULL DEFAULT 0,
cost_usd NUMERIC(10,6) NOT NULL DEFAULT 0,
metadata JSONB
);
CREATE INDEX IF NOT EXISTS idx_cost_report ON cost_events(report_id, created_at);
-- ─── Videos / Assets / Manifest (THE LINKING FIX) ───────────────────────
-- TikTok numeric id is the canonical key. URL is presentation, not key.
CREATE TABLE IF NOT EXISTS videos (
id TEXT PRIMARY KEY,
platform TEXT NOT NULL DEFAULT 'tiktok',
handle TEXT NOT NULL,
url_canonical TEXT NOT NULL,
caption TEXT,
hashtags TEXT[],
plays BIGINT,
likes BIGINT,
saves BIGINT,
comments_count INTEGER,
shares BIGINT,
stl_pct NUMERIC(5,2),
duration_sec INTEGER,
posted_at TIMESTAMPTZ,
cover_url TEXT,
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_videos_handle ON videos(handle);
CREATE INDEX IF NOT EXISTS idx_videos_posted ON videos(posted_at DESC);
DO $$ BEGIN
CREATE TYPE asset_kind AS ENUM ('metadata','cover','transcript','comments','frames','bundle');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
DO $$ BEGIN
CREATE TYPE asset_status AS ENUM ('pending','ok','failed','dropped');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE TABLE IF NOT EXISTS video_assets (
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
video_id TEXT NOT NULL REFERENCES videos(id),
asset_kind asset_kind NOT NULL,
status asset_status NOT NULL DEFAULT 'pending',
fs_path TEXT,
byte_size BIGINT,
error TEXT,
source_url TEXT,
attempt_count INTEGER NOT NULL DEFAULT 0,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (report_id, video_id, asset_kind)
);
CREATE INDEX IF NOT EXISTS idx_assets_status ON video_assets(report_id, status);
CREATE TABLE IF NOT EXISTS manifest_checks (
report_id UUID PRIMARY KEY REFERENCES reports(id) ON DELETE CASCADE,
selected_count INTEGER NOT NULL,
metadata_ok INTEGER NOT NULL DEFAULT 0,
transcript_ok INTEGER NOT NULL DEFAULT 0,
comments_ok INTEGER NOT NULL DEFAULT 0,
frames_ok INTEGER NOT NULL DEFAULT 0,
cover_ok INTEGER NOT NULL DEFAULT 0,
all_ok_count INTEGER NOT NULL DEFAULT 0,
coverage_pct NUMERIC(5,2) NOT NULL DEFAULT 0,
passed BOOLEAN NOT NULL DEFAULT FALSE,
missing JSONB NOT NULL DEFAULT '[]'::jsonb,
built_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS selected_videos (
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
video_id TEXT NOT NULL REFERENCES videos(id),
rank_score NUMERIC(10,4),
recipe_label TEXT NOT NULL,
is_backfill BOOLEAN NOT NULL DEFAULT FALSE,
PRIMARY KEY (report_id, video_id)
);
-- ─── Trends (junction table — the only place trend↔video lives) ─────────
CREATE TABLE IF NOT EXISTS trends (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
slug TEXT NOT NULL,
name TEXT NOT NULL,
category TEXT NOT NULL,
relevance_tier TEXT NOT NULL CHECK (relevance_tier IN ('core','peripheral')),
velocity NUMERIC(6,3),
description TEXT,
body_jsonb JSONB NOT NULL,
UNIQUE (report_id, slug)
);
CREATE INDEX IF NOT EXISTS idx_trends_report ON trends(report_id);
CREATE TABLE IF NOT EXISTS trend_videos (
trend_id UUID NOT NULL REFERENCES trends(id) ON DELETE CASCADE,
video_id TEXT NOT NULL REFERENCES videos(id),
rank INTEGER,
PRIMARY KEY (trend_id, video_id)
);
CREATE INDEX IF NOT EXISTS idx_trend_videos_video ON trend_videos(video_id);