V2 lives entirely under v2/ and is built around three asks the team raised about V1: per-video assets sometimes drifted onto the wrong trend, hashtag scrapes returned junk that wasn't filterable per-client, and there was no multi-user model behind Microsoft SSO. Highlights: - Stable TikTok numeric-id key for every per-video asset; URL form drift is logged loudly to drift_log.jsonl and never silently nulls assets. Stage 5 manifest hard-gates Stage 6 if any selected video is missing any required asset; --drop-failing auto-backfills from the next-best recipe candidates. - Per-brief engagement floor (min_likes / min_plays / min_stl_pct), applied at Apify scrape time and re-validated locally; spend_log.json records raw_returned vs kept_after_floor per scrape. - Users + teams + memberships with owner/admin/editor/viewer roles; SSO upserts a user keyed on Azure oid, auto-creates a personal team, and a super-admin is bootstrapped via BOOTSTRAP_SUPER_ADMIN_EMAIL on first sign-in. Phase A integration test: 16/16 pass. - 10-stage TS pipeline (brief → seed → scrape1 → select → scrape2 → validate → analyse → insights → trends → qa → build) wired through one CLI; each stage idempotent + resumable from disk via .state sentinels. §4.5 rubrics shipped under prompts/ and loaded into Claude calls. - React 18 + Vite + TS + Tailwind operator SPA: brief intake form, team management, super-admin user list, help/FAQ ported from V1. - Separate Docker Compose project (name: social-reporting-v2, port 3457, Postgres 5437) with deploy/setup-v2.sh, deploy-v2.sh, rollback-to-v1.sh scripts that take over V1's /social-reports URL and let us roll back. Verification: 62 unit tests pass (auth/session, ids extractor with full URL fixture, engagement floor, recipes, manifest, linking-fix, MoM compare). Live smoke run on a Dove brief: 1400 raw → 253 kept (82% culled) → 21 fully-bundled videos → 25 editorial trends across 8 brief-driven categories, with drift=0. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
213 lines
8.6 KiB
SQL
213 lines
8.6 KiB
SQL
-- Social Reporting V2 — fresh schema
|
|
-- Coexists with V1 in a separate database (`social_reporting_v2`).
|
|
-- Forward-only migrations under v2/db/migrations/.
|
|
|
|
CREATE EXTENSION IF NOT EXISTS "pgcrypto"; -- gen_random_uuid()
|
|
CREATE EXTENSION IF NOT EXISTS "citext"; -- case-insensitive email
|
|
|
|
-- ─── Identity ───────────────────────────────────────────────────────────
|
|
|
|
CREATE TABLE IF NOT EXISTS users (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
azure_oid TEXT UNIQUE NOT NULL,
|
|
email CITEXT UNIQUE NOT NULL,
|
|
display_name TEXT NOT NULL,
|
|
is_super_admin BOOLEAN NOT NULL DEFAULT FALSE,
|
|
password_hash TEXT,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
last_login_at TIMESTAMPTZ
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_users_azure_oid ON users(azure_oid);
|
|
CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);
|
|
|
|
CREATE TABLE IF NOT EXISTS teams (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
slug TEXT UNIQUE NOT NULL,
|
|
name TEXT NOT NULL,
|
|
is_personal BOOLEAN NOT NULL DEFAULT FALSE,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
DO $$ BEGIN
|
|
CREATE TYPE team_role AS ENUM ('owner','admin','editor','viewer');
|
|
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
|
|
|
|
CREATE TABLE IF NOT EXISTS team_memberships (
|
|
team_id UUID NOT NULL REFERENCES teams(id) ON DELETE CASCADE,
|
|
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
role team_role NOT NULL,
|
|
added_by UUID REFERENCES users(id),
|
|
added_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
PRIMARY KEY (team_id, user_id)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_memberships_user ON team_memberships(user_id);
|
|
|
|
-- ─── Briefs / Reports ───────────────────────────────────────────────────
|
|
|
|
CREATE TABLE IF NOT EXISTS briefs (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
team_id UUID NOT NULL REFERENCES teams(id) ON DELETE RESTRICT,
|
|
owner_id UUID NOT NULL REFERENCES users(id) ON DELETE RESTRICT,
|
|
slug TEXT NOT NULL,
|
|
client_name TEXT NOT NULL,
|
|
category TEXT NOT NULL,
|
|
business_question TEXT NOT NULL,
|
|
date_window_days INTEGER NOT NULL DEFAULT 30,
|
|
budget_usd NUMERIC(10,2) NOT NULL,
|
|
platforms TEXT[] NOT NULL DEFAULT ARRAY['tiktok'],
|
|
positioning JSONB,
|
|
kpis JSONB,
|
|
context_vision TEXT,
|
|
-- Hashtag engagement floor (the V2 quality knob)
|
|
min_likes INTEGER NOT NULL DEFAULT 1000,
|
|
min_plays INTEGER NOT NULL DEFAULT 10000,
|
|
min_stl_pct NUMERIC(5,2) NOT NULL DEFAULT 0,
|
|
prior_report_id UUID,
|
|
brief_yaml JSONB NOT NULL,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
UNIQUE (team_id, slug)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_briefs_team ON briefs(team_id);
|
|
CREATE INDEX IF NOT EXISTS idx_briefs_owner ON briefs(owner_id);
|
|
|
|
DO $$ BEGIN
|
|
CREATE TYPE report_status AS ENUM (
|
|
'pending','seeds','pass1','select','pass2','validate',
|
|
'analyse','insights','trends','qa','build','completed','failed'
|
|
);
|
|
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
|
|
|
|
CREATE TABLE IF NOT EXISTS reports (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
brief_id UUID NOT NULL REFERENCES briefs(id) ON DELETE CASCADE,
|
|
team_id UUID NOT NULL REFERENCES teams(id),
|
|
triggered_by UUID NOT NULL REFERENCES users(id),
|
|
status report_status NOT NULL DEFAULT 'pending',
|
|
current_stage INTEGER NOT NULL DEFAULT 0,
|
|
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
finished_at TIMESTAMPTZ,
|
|
apify_cost_usd NUMERIC(10,4) NOT NULL DEFAULT 0,
|
|
claude_cost_usd NUMERIC(10,4) NOT NULL DEFAULT 0,
|
|
total_cost_usd NUMERIC(10,4) NOT NULL DEFAULT 0,
|
|
fs_root TEXT NOT NULL,
|
|
manifest_passed_at TIMESTAMPTZ,
|
|
error_message TEXT
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_reports_team ON reports(team_id, started_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_reports_brief ON reports(brief_id, started_at DESC);
|
|
|
|
ALTER TABLE briefs
|
|
ADD CONSTRAINT briefs_prior_report_fk
|
|
FOREIGN KEY (prior_report_id) REFERENCES reports(id) ON DELETE SET NULL
|
|
DEFERRABLE INITIALLY DEFERRED;
|
|
|
|
CREATE TABLE IF NOT EXISTS cost_events (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
stage INTEGER NOT NULL,
|
|
stage_name TEXT NOT NULL,
|
|
source TEXT NOT NULL CHECK (source IN ('claude','apify')),
|
|
label TEXT NOT NULL,
|
|
model TEXT,
|
|
input_tokens INTEGER NOT NULL DEFAULT 0,
|
|
output_tokens INTEGER NOT NULL DEFAULT 0,
|
|
cost_usd NUMERIC(10,6) NOT NULL DEFAULT 0,
|
|
metadata JSONB
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_cost_report ON cost_events(report_id, created_at);
|
|
|
|
-- ─── Videos / Assets / Manifest (THE LINKING FIX) ───────────────────────
|
|
|
|
-- TikTok numeric id is the canonical key. URL is presentation, not key.
|
|
CREATE TABLE IF NOT EXISTS videos (
|
|
id TEXT PRIMARY KEY,
|
|
platform TEXT NOT NULL DEFAULT 'tiktok',
|
|
handle TEXT NOT NULL,
|
|
url_canonical TEXT NOT NULL,
|
|
caption TEXT,
|
|
hashtags TEXT[],
|
|
plays BIGINT,
|
|
likes BIGINT,
|
|
saves BIGINT,
|
|
comments_count INTEGER,
|
|
shares BIGINT,
|
|
stl_pct NUMERIC(5,2),
|
|
duration_sec INTEGER,
|
|
posted_at TIMESTAMPTZ,
|
|
cover_url TEXT,
|
|
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_videos_handle ON videos(handle);
|
|
CREATE INDEX IF NOT EXISTS idx_videos_posted ON videos(posted_at DESC);
|
|
|
|
DO $$ BEGIN
|
|
CREATE TYPE asset_kind AS ENUM ('metadata','cover','transcript','comments','frames','bundle');
|
|
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
|
|
|
|
DO $$ BEGIN
|
|
CREATE TYPE asset_status AS ENUM ('pending','ok','failed','dropped');
|
|
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
|
|
|
|
CREATE TABLE IF NOT EXISTS video_assets (
|
|
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
|
|
video_id TEXT NOT NULL REFERENCES videos(id),
|
|
asset_kind asset_kind NOT NULL,
|
|
status asset_status NOT NULL DEFAULT 'pending',
|
|
fs_path TEXT,
|
|
byte_size BIGINT,
|
|
error TEXT,
|
|
source_url TEXT,
|
|
attempt_count INTEGER NOT NULL DEFAULT 0,
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
PRIMARY KEY (report_id, video_id, asset_kind)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_assets_status ON video_assets(report_id, status);
|
|
|
|
CREATE TABLE IF NOT EXISTS manifest_checks (
|
|
report_id UUID PRIMARY KEY REFERENCES reports(id) ON DELETE CASCADE,
|
|
selected_count INTEGER NOT NULL,
|
|
metadata_ok INTEGER NOT NULL DEFAULT 0,
|
|
transcript_ok INTEGER NOT NULL DEFAULT 0,
|
|
comments_ok INTEGER NOT NULL DEFAULT 0,
|
|
frames_ok INTEGER NOT NULL DEFAULT 0,
|
|
cover_ok INTEGER NOT NULL DEFAULT 0,
|
|
all_ok_count INTEGER NOT NULL DEFAULT 0,
|
|
coverage_pct NUMERIC(5,2) NOT NULL DEFAULT 0,
|
|
passed BOOLEAN NOT NULL DEFAULT FALSE,
|
|
missing JSONB NOT NULL DEFAULT '[]'::jsonb,
|
|
built_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS selected_videos (
|
|
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
|
|
video_id TEXT NOT NULL REFERENCES videos(id),
|
|
rank_score NUMERIC(10,4),
|
|
recipe_label TEXT NOT NULL,
|
|
is_backfill BOOLEAN NOT NULL DEFAULT FALSE,
|
|
PRIMARY KEY (report_id, video_id)
|
|
);
|
|
|
|
-- ─── Trends (junction table — the only place trend↔video lives) ─────────
|
|
|
|
CREATE TABLE IF NOT EXISTS trends (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
report_id UUID NOT NULL REFERENCES reports(id) ON DELETE CASCADE,
|
|
slug TEXT NOT NULL,
|
|
name TEXT NOT NULL,
|
|
category TEXT NOT NULL,
|
|
relevance_tier TEXT NOT NULL CHECK (relevance_tier IN ('core','peripheral')),
|
|
velocity NUMERIC(6,3),
|
|
description TEXT,
|
|
body_jsonb JSONB NOT NULL,
|
|
UNIQUE (report_id, slug)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_trends_report ON trends(report_id);
|
|
|
|
CREATE TABLE IF NOT EXISTS trend_videos (
|
|
trend_id UUID NOT NULL REFERENCES trends(id) ON DELETE CASCADE,
|
|
video_id TEXT NOT NULL REFERENCES videos(id),
|
|
rank INTEGER,
|
|
PRIMARY KEY (trend_id, video_id)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_trend_videos_video ON trend_videos(video_id);
|