social-reporting-tool/v2/db/init.sql
DJP b89e8b511e Add V2: multi-team social-reporting platform with manifest-gated linking
V2 lives entirely under v2/ and is built around three asks the team raised
about V1: per-video assets sometimes drifted onto the wrong trend, hashtag
scrapes returned junk that wasn't filterable per-client, and there was no
multi-user model behind Microsoft SSO.

Highlights:
- Stable TikTok numeric-id key for every per-video asset; URL form drift is
  logged loudly to drift_log.jsonl and never silently nulls assets. Stage 5
  manifest hard-gates Stage 6 if any selected video is missing any required
  asset; --drop-failing auto-backfills from the next-best recipe candidates.
- Per-brief engagement floor (min_likes / min_plays / min_stl_pct), applied
  at Apify scrape time and re-validated locally; spend_log.json records
  raw_returned vs kept_after_floor per scrape.
- Users + teams + memberships with owner/admin/editor/viewer roles; SSO
  upserts a user keyed on Azure oid, auto-creates a personal team, and a
  super-admin is bootstrapped via BOOTSTRAP_SUPER_ADMIN_EMAIL on first
  sign-in. Phase A integration test: 16/16 pass.
- 10-stage TS pipeline (brief → seed → scrape1 → select → scrape2 →
  validate → analyse → insights → trends → qa → build) wired through one
  CLI; each stage idempotent + resumable from disk via .state sentinels.
  §4.5 rubrics shipped under prompts/ and loaded into Claude calls.
- React 18 + Vite + TS + Tailwind operator SPA: brief intake form,
  team management, super-admin user list, help/FAQ ported from V1.
- Separate Docker Compose project (name: social-reporting-v2, port 3457,
  Postgres 5437) with deploy/setup-v2.sh, deploy-v2.sh, rollback-to-v1.sh
  scripts that take over V1's /social-reports URL and let us roll back.

Verification: 62 unit tests pass (auth/session, ids extractor with full URL
fixture, engagement floor, recipes, manifest, linking-fix, MoM compare).
Live smoke run on a Dove brief: 1400 raw → 253 kept (82% culled) → 21
fully-bundled videos → 25 editorial trends across 8 brief-driven categories,
with drift=0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 17:39:07 -04:00

213 lines
8.6 KiB
SQL

-- Social Reporting V2 — fresh schema
-- Coexists with V1 in a separate database (`social_reporting_v2`).
-- Forward-only migrations under v2/db/migrations/.
CREATE EXTENSION IF NOT EXISTS "pgcrypto"; -- gen_random_uuid()
CREATE EXTENSION IF NOT EXISTS "citext"; -- case-insensitive email
-- ─── Identity ───────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
azure_oid TEXT UNIQUE NOT NULL,
email CITEXT UNIQUE NOT NULL,
display_name TEXT NOT NULL,
is_super_admin BOOLEAN NOT NULL DEFAULT FALSE,
password_hash TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_login_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS idx_users_azure_oid ON users(azure_oid);
CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);
CREATE TABLE IF NOT EXISTS teams (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
slug TEXT UNIQUE NOT NULL,
name TEXT NOT NULL,
is_personal BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
DO $$ BEGIN
CREATE TYPE team_role AS ENUM ('owner','admin','editor','viewer');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE TABLE IF NOT EXISTS team_memberships (
team_id UUID NOT NULL REFERENCES teams(id) ON DELETE CASCADE,
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
role team_role NOT NULL,
added_by UUID REFERENCES users(id),
added_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (team_id, user_id)
);
CREATE INDEX IF NOT EXISTS idx_memberships_user ON team_memberships(user_id);
-- ─── Briefs / Reports ───────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS briefs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
team_id UUID NOT NULL REFERENCES teams(id) ON DELETE RESTRICT,
owner_id UUID NOT NULL REFERENCES users(id) ON DELETE RESTRICT,
slug TEXT NOT NULL,
client_name TEXT NOT NULL,
category TEXT NOT NULL,
business_question TEXT NOT NULL,
date_window_days INTEGER NOT NULL DEFAULT 30,
budget_usd NUMERIC(10,2) NOT NULL,
platforms TEXT[] NOT NULL DEFAULT ARRAY['tiktok'],
positioning JSONB,
kpis JSONB,
context_vision TEXT,
-- Hashtag engagement floor (the V2 quality knob)
min_likes INTEGER NOT NULL DEFAULT 1000,
min_plays INTEGER NOT NULL DEFAULT 10000,
min_stl_pct NUMERIC(5,2) NOT NULL DEFAULT 0,
prior_report_id UUID,
brief_yaml JSONB NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (team_id, slug)
);
CREATE INDEX IF NOT EXISTS idx_briefs_team ON briefs(team_id);
CREATE INDEX IF NOT EXISTS idx_briefs_owner ON briefs(owner_id);
DO $$ BEGIN
CREATE TYPE report_status AS ENUM (
'pending','seeds','pass1','select','pass2','validate',
'analyse','insights','trends','qa','build','completed','failed'
);
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE TABLE IF NOT EXISTS reports (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
brief_id UUID NOT NULL REFERENCES briefs(id) ON DELETE CASCADE,
team_id UUID NOT NULL REFERENCES teams(id),
triggered_by UUID NOT NULL REFERENCES users(id),
status report_status NOT NULL DEFAULT 'pending',
current_stage INTEGER NOT NULL DEFAULT 0,
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
finished_at TIMESTAMPTZ,
apify_cost_usd NUMERIC(10,4) NOT NULL DEFAULT 0,
claude_cost_usd NUMERIC(10,4) NOT NULL DEFAULT 0,
total_cost_usd NUMERIC(10,4) NOT NULL DEFAULT 0,
fs_root TEXT NOT NULL,
manifest_passed_at TIMESTAMPTZ,
error_message TEXT
);
CREATE INDEX IF NOT EXISTS idx_reports_team ON reports(team_id, started_at DESC);
CREATE INDEX IF NOT EXISTS idx_reports_brief ON reports(brief_id, started_at DESC);
ALTER TABLE briefs
ADD CONSTRAINT briefs_prior_report_fk
FOREIGN KEY (prior_report_id) REFERENCES reports(id) ON DELETE SET NULL
DEFERRABLE INITIALLY DEFERRED;
CREATE TABLE IF NOT EXISTS cost_events (
id BIGSERIAL PRIMARY KEY,
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
stage INTEGER NOT NULL,
stage_name TEXT NOT NULL,
source TEXT NOT NULL CHECK (source IN ('claude','apify')),
label TEXT NOT NULL,
model TEXT,
input_tokens INTEGER NOT NULL DEFAULT 0,
output_tokens INTEGER NOT NULL DEFAULT 0,
cost_usd NUMERIC(10,6) NOT NULL DEFAULT 0,
metadata JSONB
);
CREATE INDEX IF NOT EXISTS idx_cost_report ON cost_events(report_id, created_at);
-- ─── Videos / Assets / Manifest (THE LINKING FIX) ───────────────────────
-- TikTok numeric id is the canonical key. URL is presentation, not key.
CREATE TABLE IF NOT EXISTS videos (
id TEXT PRIMARY KEY,
platform TEXT NOT NULL DEFAULT 'tiktok',
handle TEXT NOT NULL,
url_canonical TEXT NOT NULL,
caption TEXT,
hashtags TEXT[],
plays BIGINT,
likes BIGINT,
saves BIGINT,
comments_count INTEGER,
shares BIGINT,
stl_pct NUMERIC(5,2),
duration_sec INTEGER,
posted_at TIMESTAMPTZ,
cover_url TEXT,
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_videos_handle ON videos(handle);
CREATE INDEX IF NOT EXISTS idx_videos_posted ON videos(posted_at DESC);
DO $$ BEGIN
CREATE TYPE asset_kind AS ENUM ('metadata','cover','transcript','comments','frames','bundle');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
DO $$ BEGIN
CREATE TYPE asset_status AS ENUM ('pending','ok','failed','dropped');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE TABLE IF NOT EXISTS video_assets (
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
video_id TEXT NOT NULL REFERENCES videos(id),
asset_kind asset_kind NOT NULL,
status asset_status NOT NULL DEFAULT 'pending',
fs_path TEXT,
byte_size BIGINT,
error TEXT,
source_url TEXT,
attempt_count INTEGER NOT NULL DEFAULT 0,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (report_id, video_id, asset_kind)
);
CREATE INDEX IF NOT EXISTS idx_assets_status ON video_assets(report_id, status);
CREATE TABLE IF NOT EXISTS manifest_checks (
report_id UUID PRIMARY KEY REFERENCES reports(id) ON DELETE CASCADE,
selected_count INTEGER NOT NULL,
metadata_ok INTEGER NOT NULL DEFAULT 0,
transcript_ok INTEGER NOT NULL DEFAULT 0,
comments_ok INTEGER NOT NULL DEFAULT 0,
frames_ok INTEGER NOT NULL DEFAULT 0,
cover_ok INTEGER NOT NULL DEFAULT 0,
all_ok_count INTEGER NOT NULL DEFAULT 0,
coverage_pct NUMERIC(5,2) NOT NULL DEFAULT 0,
passed BOOLEAN NOT NULL DEFAULT FALSE,
missing JSONB NOT NULL DEFAULT '[]'::jsonb,
built_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS selected_videos (
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
video_id TEXT NOT NULL REFERENCES videos(id),
rank_score NUMERIC(10,4),
recipe_label TEXT NOT NULL,
is_backfill BOOLEAN NOT NULL DEFAULT FALSE,
PRIMARY KEY (report_id, video_id)
);
-- ─── Trends (junction table — the only place trend↔video lives) ─────────
CREATE TABLE IF NOT EXISTS trends (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
slug TEXT NOT NULL,
name TEXT NOT NULL,
category TEXT NOT NULL,
relevance_tier TEXT NOT NULL CHECK (relevance_tier IN ('core','peripheral')),
velocity NUMERIC(6,3),
description TEXT,
body_jsonb JSONB NOT NULL,
UNIQUE (report_id, slug)
);
CREATE INDEX IF NOT EXISTS idx_trends_report ON trends(report_id);
CREATE TABLE IF NOT EXISTS trend_videos (
trend_id UUID NOT NULL REFERENCES trends(id) ON DELETE CASCADE,
video_id TEXT NOT NULL REFERENCES videos(id),
rank INTEGER,
PRIMARY KEY (trend_id, video_id)
);
CREATE INDEX IF NOT EXISTS idx_trend_videos_video ON trend_videos(video_id);