ferrero-opentext/Python-Version/database/init.sql
nickviljoen f28b5221f7 Enhancement: Capture CreativeX score on B1→B2 global masters
Extracts CreativeX score and URL from DAM master metadata during the
B1→B2 download, persists to creativex_scores with new status
'b1-master-cx-score' (dedup by tracking_id), and surfaces the score in
the b1_to_b2_complete and b1_to_b2_partial emails — falling back to
"No CreativeX Score" when the master has no score yet. Skipped
already-downloaded assets backfill from full_metadata JSONB on next pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 11:31:07 +02:00

395 lines
14 KiB
PL/PgSQL

-- Ferrero Asset Tracking Database - Initialization Script
-- PostgreSQL 15+
-- Last Updated: November 5, 2025
\echo 'Creating Ferrero Asset Tracking database structure...'
-- Create extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pg_trgm"; -- For text search
-- Set timezone
SET timezone = 'UTC';
\echo 'Creating tables...'
-- ============================================================================
-- Table: master_assets
-- Purpose: Stores master assets downloaded from DAM with tracking IDs
-- ============================================================================
CREATE TABLE IF NOT EXISTS master_assets (
-- Primary Key
id SERIAL PRIMARY KEY,
-- Tracking & Identification
tracking_id VARCHAR(6) UNIQUE NOT NULL,
opentext_id VARCHAR(255) NOT NULL,
-- File Information
original_filename VARCHAR(500) NOT NULL,
file_extension VARCHAR(20),
file_size_bytes BIGINT,
mime_type VARCHAR(100),
-- Metadata Fields (extracted for quick access)
brand_code VARCHAR(5),
brand_name VARCHAR(255),
country_code VARCHAR(2),
country_name VARCHAR(255),
language_code VARCHAR(3),
language_name VARCHAR(100),
subject_title VARCHAR(255),
asset_type VARCHAR(3),
asset_type_name VARCHAR(255),
duration_seconds INTEGER,
aspect_ratio VARCHAR(10),
width_px INTEGER,
height_px INTEGER,
-- Campaign Relationships
global_master_campaign_id VARCHAR(50),
global_master_folder_id VARCHAR(255),
local_campaign_id VARCHAR(50),
global_master_tracking_id VARCHAR(6),
-- Workflow Information
upload_directory VARCHAR(1000),
description TEXT,
-- Full Metadata Storage (JSONB - Complete metadata, no truncation)
full_metadata JSONB,
-- Tags and Categories
tags TEXT[],
categories TEXT[],
-- Status
status VARCHAR(50) DEFAULT 'active',
is_deleted BOOLEAN DEFAULT FALSE,
deleted_at TIMESTAMP,
-- Tracking
ingested_by VARCHAR(255),
ingested_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
\echo 'Table master_assets created'
-- ============================================================================
-- Table: derivative_assets
-- Purpose: Tracks derivative assets (localized versions) from master assets
-- ============================================================================
CREATE TABLE IF NOT EXISTS derivative_assets (
-- Primary Key
id SERIAL PRIMARY KEY,
-- Link to Master Asset
tracking_id VARCHAR(6) NOT NULL REFERENCES master_assets(tracking_id),
master_asset_id INTEGER REFERENCES master_assets(id) ON DELETE CASCADE,
-- Derivative File Information
derivative_filename VARCHAR(500),
file_extension VARCHAR(20),
dam_asset_id VARCHAR(255),
-- Upload Information
upload_status VARCHAR(50) DEFAULT 'pending',
upload_error TEXT,
uploaded_by VARCHAR(255),
uploaded_at TIMESTAMP,
-- Status
status VARCHAR(50) DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
\echo 'Table derivative_assets created'
-- ============================================================================
-- Table: asset_events
-- Purpose: Audit log for all asset operations
-- ============================================================================
CREATE TABLE IF NOT EXISTS asset_events (
id SERIAL PRIMARY KEY,
tracking_id VARCHAR(6),
event_type VARCHAR(100) NOT NULL,
event_data JSONB,
event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(255)
);
\echo 'Table asset_events created'
-- ============================================================================
-- Table: workflow_state
-- Purpose: Tracks workflow execution state for monitoring
-- ============================================================================
CREATE TABLE IF NOT EXISTS workflow_state (
id SERIAL PRIMARY KEY,
workflow_name VARCHAR(100) NOT NULL,
campaign_id VARCHAR(255),
last_run_at TIMESTAMP,
last_status VARCHAR(50),
error_message TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
\echo 'Table workflow_state created'
-- ============================================================================
-- Table: campaign_status
-- Purpose: Tracks campaign processing and live status (prevents duplicate webhooks)
-- ============================================================================
CREATE TABLE IF NOT EXISTS campaign_status (
-- Primary Key
id SERIAL PRIMARY KEY,
-- Campaign Identification
campaign_id VARCHAR(255) UNIQUE NOT NULL, -- DAM campaign folder ID (unique!)
campaign_number VARCHAR(50) NOT NULL, -- C000000078
campaign_name VARCHAR(500) NOT NULL,
-- Live Status
live_campaign VARCHAR(3) NOT NULL, -- 'YES' or 'NO'
status VARCHAR(10) NOT NULL, -- A1, A2, A4, A5, B1, B2, etc.
-- Webhook Tracking
webhook_sent BOOLEAN DEFAULT FALSE,
webhook_sent_at TIMESTAMP,
-- Timestamps
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
\echo 'Table campaign_status created'
-- ============================================================================
-- Table: creativex_scores
-- Purpose: Stores CreativeX quality scores extracted from PDFs via LlamaExtract
-- ============================================================================
CREATE TABLE IF NOT EXISTS creativex_scores (
-- Primary Key
id SERIAL PRIMARY KEY,
-- File Information
filename VARCHAR(500) NOT NULL,
box_file_id VARCHAR(255),
-- CreativeX Data (parsed fields for quick access)
creativex_id VARCHAR(255),
creativex_url TEXT,
quality_score VARCHAR(50),
-- Full Extraction Data (JSONB - Complete LlamaExtract response for future use)
full_extraction_data JSONB,
-- Tracking ID (links to master_assets for master scores)
tracking_id VARCHAR(6),
-- Timestamps
extracted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
status VARCHAR(50) DEFAULT 'active', -- 'active', 'superseded', 'master-cx-score' (A1 local masters), 'b1-master-cx-score' (B1 global masters)
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
\echo 'Table creativex_scores created'
\echo 'Tables created successfully'
-- ============================================================================
-- INDEXES
-- ============================================================================
\echo 'Creating indexes...'
-- master_assets indexes
CREATE INDEX IF NOT EXISTS idx_master_assets_tracking_id ON master_assets(tracking_id);
CREATE INDEX IF NOT EXISTS idx_master_assets_opentext_id ON master_assets(opentext_id);
CREATE INDEX IF NOT EXISTS idx_master_assets_status ON master_assets(status);
CREATE INDEX IF NOT EXISTS idx_master_assets_brand_code ON master_assets(brand_code);
CREATE INDEX IF NOT EXISTS idx_master_assets_created_at ON master_assets(created_at);
CREATE INDEX IF NOT EXISTS idx_master_assets_global_master ON master_assets(global_master_campaign_id);
CREATE INDEX IF NOT EXISTS idx_master_assets_local_campaign ON master_assets(local_campaign_id);
CREATE INDEX IF NOT EXISTS idx_master_assets_opentext_local ON master_assets(opentext_id, local_campaign_id);
CREATE INDEX IF NOT EXISTS idx_master_assets_global_master_tracking ON master_assets(global_master_tracking_id);
-- derivative_assets indexes
CREATE INDEX IF NOT EXISTS idx_derivative_tracking_id ON derivative_assets(tracking_id);
CREATE INDEX IF NOT EXISTS idx_derivative_status ON derivative_assets(upload_status);
CREATE INDEX IF NOT EXISTS idx_derivative_created_at ON derivative_assets(created_at);
-- asset_events indexes
CREATE INDEX IF NOT EXISTS idx_events_tracking_id ON asset_events(tracking_id);
CREATE INDEX IF NOT EXISTS idx_events_timestamp ON asset_events(event_timestamp);
CREATE INDEX IF NOT EXISTS idx_events_type ON asset_events(event_type);
-- workflow_state indexes
CREATE INDEX IF NOT EXISTS idx_workflow_name ON workflow_state(workflow_name);
CREATE INDEX IF NOT EXISTS idx_workflow_campaign ON workflow_state(campaign_id);
-- campaign_status indexes
CREATE INDEX IF NOT EXISTS idx_campaign_status_campaign_id ON campaign_status(campaign_id);
CREATE INDEX IF NOT EXISTS idx_campaign_status_number ON campaign_status(campaign_number);
CREATE INDEX IF NOT EXISTS idx_campaign_status_status ON campaign_status(status);
CREATE INDEX IF NOT EXISTS idx_campaign_status_live ON campaign_status(live_campaign);
CREATE INDEX IF NOT EXISTS idx_campaign_status_webhook_sent ON campaign_status(webhook_sent);
-- creativex_scores indexes
CREATE INDEX IF NOT EXISTS idx_creativex_filename ON creativex_scores(filename);
CREATE INDEX IF NOT EXISTS idx_creativex_box_file ON creativex_scores(box_file_id);
CREATE INDEX IF NOT EXISTS idx_creativex_status ON creativex_scores(status);
CREATE INDEX IF NOT EXISTS idx_creativex_tracking_id ON creativex_scores(tracking_id);
\echo 'Indexes created successfully'
-- ============================================================================
-- FUNCTIONS
-- ============================================================================
\echo 'Creating functions...'
-- Function: Update updated_at timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Function: Log master asset events
CREATE OR REPLACE FUNCTION log_master_asset_event()
RETURNS TRIGGER AS $$
BEGIN
IF (TG_OP = 'INSERT') THEN
INSERT INTO asset_events (tracking_id, event_type, event_data)
VALUES (NEW.tracking_id, 'master_asset_created', row_to_json(NEW)::jsonb);
RETURN NEW;
ELSIF (TG_OP = 'UPDATE') THEN
INSERT INTO asset_events (tracking_id, event_type, event_data)
VALUES (NEW.tracking_id, 'master_asset_updated',
jsonb_build_object('old', row_to_json(OLD)::jsonb, 'new', row_to_json(NEW)::jsonb));
RETURN NEW;
ELSIF (TG_OP = 'DELETE') THEN
INSERT INTO asset_events (tracking_id, event_type, event_data)
VALUES (OLD.tracking_id, 'master_asset_deleted', row_to_json(OLD)::jsonb);
RETURN OLD;
END IF;
END;
$$ LANGUAGE plpgsql;
\echo 'Functions created successfully'
-- ============================================================================
-- TRIGGERS
-- ============================================================================
\echo 'Creating triggers...'
-- Trigger: Auto-update updated_at on master_assets
DROP TRIGGER IF EXISTS update_master_assets_updated_at ON master_assets;
CREATE TRIGGER update_master_assets_updated_at
BEFORE UPDATE ON master_assets
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Trigger: Log master asset changes
DROP TRIGGER IF EXISTS log_master_asset_changes ON master_assets;
CREATE TRIGGER log_master_asset_changes
AFTER INSERT OR UPDATE OR DELETE ON master_assets
FOR EACH ROW
EXECUTE FUNCTION log_master_asset_event();
-- Trigger: Auto-update updated_at on derivative_assets
DROP TRIGGER IF EXISTS update_derivative_assets_updated_at ON derivative_assets;
CREATE TRIGGER update_derivative_assets_updated_at
BEFORE UPDATE ON derivative_assets
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Trigger: Auto-update updated_at on workflow_state
DROP TRIGGER IF EXISTS update_workflow_state_updated_at ON workflow_state;
CREATE TRIGGER update_workflow_state_updated_at
BEFORE UPDATE ON workflow_state
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Trigger: Auto-update updated_at on campaign_status
DROP TRIGGER IF EXISTS update_campaign_status_updated_at ON campaign_status;
CREATE TRIGGER update_campaign_status_updated_at
BEFORE UPDATE ON campaign_status
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
\echo 'Triggers created successfully'
-- ============================================================================
-- GRANTS
-- ============================================================================
\echo 'Setting up permissions...'
-- Grant all privileges on tables
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO ferrero_user;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO ferrero_user;
-- Grant usage on schema
GRANT USAGE ON SCHEMA public TO ferrero_user;
\echo 'Permissions granted'
-- ============================================================================
-- VERIFICATION
-- ============================================================================
\echo ''
\echo '============================================================'
\echo 'Database initialization complete!'
\echo '============================================================'
\echo ''
\echo 'Tables created:'
\echo ' - master_assets (35 columns)'
\echo ' - derivative_assets'
\echo ' - asset_events'
\echo ' - workflow_state'
\echo ' - campaign_status'
\echo ' - creativex_scores'
\echo ''
\echo 'Indexes created: 16'
\echo 'Triggers created: 4'
\echo 'Functions created: 2'
\echo ''
\echo 'Ready to use!'
\echo '============================================================'
-- Show table counts
SELECT
'master_assets' as table_name,
COUNT(*) as row_count
FROM master_assets
UNION ALL
SELECT
'derivative_assets' as table_name,
COUNT(*) as row_count
FROM derivative_assets
UNION ALL
SELECT
'asset_events' as table_name,
COUNT(*) as row_count
FROM asset_events
UNION ALL
SELECT
'workflow_state' as table_name,
COUNT(*) as row_count
FROM workflow_state;