ferrero-opentext/Python-Version/database/init.sql
DJP b6b9d7337a Add CreativeX score extraction and storage system
Implements new workflow to extract CreativeX quality scores from PDFs
using LlamaExtract AI and store results in PostgreSQL database.

Components added:
- creativex_scoring_storing.py: Main script to process PDFs from Box
- creativex_scores table: Database table with JSONB for full JSON storage
- Database methods: store_creativex_score() and get_creativex_score_by_filename()
- Email templates: creativex_complete, creativex_partial, creativex_no_files
- Configuration: creativex section in config.yaml
- CREATIVEX_DEPLOYMENT.md: Complete deployment and usage guide

Features:
- Monitors Box folder 350605024645 for PDFs
- Extracts scores using LlamaExtract agent "Creativex-Extract"
- Stores 4 key fields (filename, ID, URL, score) + full JSON
- Deletes processed PDFs from Box after successful extraction
- Sends email notifications for success/partial/no-files scenarios
- Manual execution (python scripts/creativex_scoring_storing.py)

Database schema:
- Table: creativex_scores with 10 columns
- Indexes on filename, box_file_id, status for fast lookups
- JSONB column stores complete extraction for future flexibility

Future integration ready:
db.get_creativex_score_by_filename() available for DAM upload workflows
to attach CreativeX metadata during asset processing.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-11 16:15:45 -05:00

389 lines
13 KiB
PL/PgSQL

-- Ferrero Asset Tracking Database - Initialization Script
-- PostgreSQL 15+
-- Last Updated: November 5, 2025
\echo 'Creating Ferrero Asset Tracking database structure...'
-- Create extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pg_trgm"; -- For text search
-- Set timezone
SET timezone = 'UTC';
\echo 'Creating tables...'
-- ============================================================================
-- Table: master_assets
-- Purpose: Stores master assets downloaded from DAM with tracking IDs
-- ============================================================================
CREATE TABLE IF NOT EXISTS master_assets (
-- Primary Key
id SERIAL PRIMARY KEY,
-- Tracking & Identification
tracking_id VARCHAR(6) UNIQUE NOT NULL,
opentext_id VARCHAR(255) NOT NULL,
-- File Information
original_filename VARCHAR(500) NOT NULL,
file_extension VARCHAR(20),
file_size_bytes BIGINT,
mime_type VARCHAR(100),
-- Metadata Fields (extracted for quick access)
brand_code VARCHAR(5),
brand_name VARCHAR(255),
country_code VARCHAR(2),
country_name VARCHAR(255),
language_code VARCHAR(3),
language_name VARCHAR(100),
subject_title VARCHAR(255),
asset_type VARCHAR(3),
asset_type_name VARCHAR(255),
duration_seconds INTEGER,
aspect_ratio VARCHAR(10),
width_px INTEGER,
height_px INTEGER,
-- Campaign Relationships
global_master_campaign_id VARCHAR(50),
global_master_folder_id VARCHAR(255),
local_campaign_id VARCHAR(50),
-- Workflow Information
upload_directory VARCHAR(1000),
description TEXT,
-- Full Metadata Storage (JSONB - Complete metadata, no truncation)
full_metadata JSONB,
-- Tags and Categories
tags TEXT[],
categories TEXT[],
-- Status
status VARCHAR(50) DEFAULT 'active',
is_deleted BOOLEAN DEFAULT FALSE,
deleted_at TIMESTAMP,
-- Tracking
ingested_by VARCHAR(255),
ingested_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
\echo 'Table master_assets created'
-- ============================================================================
-- Table: derivative_assets
-- Purpose: Tracks derivative assets (localized versions) from master assets
-- ============================================================================
CREATE TABLE IF NOT EXISTS derivative_assets (
-- Primary Key
id SERIAL PRIMARY KEY,
-- Link to Master Asset
tracking_id VARCHAR(6) NOT NULL REFERENCES master_assets(tracking_id),
master_asset_id INTEGER REFERENCES master_assets(id) ON DELETE CASCADE,
-- Derivative File Information
derivative_filename VARCHAR(500),
file_extension VARCHAR(20),
dam_asset_id VARCHAR(255),
-- Upload Information
upload_status VARCHAR(50) DEFAULT 'pending',
upload_error TEXT,
uploaded_by VARCHAR(255),
uploaded_at TIMESTAMP,
-- Status
status VARCHAR(50) DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
\echo 'Table derivative_assets created'
-- ============================================================================
-- Table: asset_events
-- Purpose: Audit log for all asset operations
-- ============================================================================
CREATE TABLE IF NOT EXISTS asset_events (
id SERIAL PRIMARY KEY,
tracking_id VARCHAR(6),
event_type VARCHAR(100) NOT NULL,
event_data JSONB,
event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(255)
);
\echo 'Table asset_events created'
-- ============================================================================
-- Table: workflow_state
-- Purpose: Tracks workflow execution state for monitoring
-- ============================================================================
CREATE TABLE IF NOT EXISTS workflow_state (
id SERIAL PRIMARY KEY,
workflow_name VARCHAR(100) NOT NULL,
campaign_id VARCHAR(255),
last_run_at TIMESTAMP,
last_status VARCHAR(50),
error_message TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
\echo 'Table workflow_state created'
-- ============================================================================
-- Table: campaign_status
-- Purpose: Tracks campaign processing and live status (prevents duplicate webhooks)
-- ============================================================================
CREATE TABLE IF NOT EXISTS campaign_status (
-- Primary Key
id SERIAL PRIMARY KEY,
-- Campaign Identification
campaign_id VARCHAR(255) UNIQUE NOT NULL, -- DAM campaign folder ID (unique!)
campaign_number VARCHAR(50) NOT NULL, -- C000000078
campaign_name VARCHAR(500) NOT NULL,
-- Live Status
live_campaign VARCHAR(3) NOT NULL, -- 'YES' or 'NO'
status VARCHAR(10) NOT NULL, -- A1, A2, A4, A5, B1, B2, etc.
-- Webhook Tracking
webhook_sent BOOLEAN DEFAULT FALSE,
webhook_sent_at TIMESTAMP,
-- Timestamps
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
\echo 'Table campaign_status created'
-- ============================================================================
-- Table: creativex_scores
-- Purpose: Stores CreativeX quality scores extracted from PDFs via LlamaExtract
-- ============================================================================
CREATE TABLE IF NOT EXISTS creativex_scores (
-- Primary Key
id SERIAL PRIMARY KEY,
-- File Information
filename VARCHAR(500) NOT NULL,
box_file_id VARCHAR(255),
-- CreativeX Data (parsed fields for quick access)
creativex_id VARCHAR(255),
creativex_url TEXT,
quality_score VARCHAR(50),
-- Full Extraction Data (JSONB - Complete LlamaExtract response for future use)
full_extraction_data JSONB,
-- Timestamps
extracted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
status VARCHAR(50) DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
\echo 'Table creativex_scores created'
\echo 'Tables created successfully'
-- ============================================================================
-- INDEXES
-- ============================================================================
\echo 'Creating indexes...'
-- master_assets indexes
CREATE INDEX IF NOT EXISTS idx_master_assets_tracking_id ON master_assets(tracking_id);
CREATE INDEX IF NOT EXISTS idx_master_assets_opentext_id ON master_assets(opentext_id);
CREATE INDEX IF NOT EXISTS idx_master_assets_status ON master_assets(status);
CREATE INDEX IF NOT EXISTS idx_master_assets_brand_code ON master_assets(brand_code);
CREATE INDEX IF NOT EXISTS idx_master_assets_created_at ON master_assets(created_at);
CREATE INDEX IF NOT EXISTS idx_master_assets_global_master ON master_assets(global_master_campaign_id);
CREATE INDEX IF NOT EXISTS idx_master_assets_local_campaign ON master_assets(local_campaign_id);
CREATE INDEX IF NOT EXISTS idx_master_assets_opentext_local ON master_assets(opentext_id, local_campaign_id);
-- derivative_assets indexes
CREATE INDEX IF NOT EXISTS idx_derivative_tracking_id ON derivative_assets(tracking_id);
CREATE INDEX IF NOT EXISTS idx_derivative_status ON derivative_assets(upload_status);
CREATE INDEX IF NOT EXISTS idx_derivative_created_at ON derivative_assets(created_at);
-- asset_events indexes
CREATE INDEX IF NOT EXISTS idx_events_tracking_id ON asset_events(tracking_id);
CREATE INDEX IF NOT EXISTS idx_events_timestamp ON asset_events(event_timestamp);
CREATE INDEX IF NOT EXISTS idx_events_type ON asset_events(event_type);
-- workflow_state indexes
CREATE INDEX IF NOT EXISTS idx_workflow_name ON workflow_state(workflow_name);
CREATE INDEX IF NOT EXISTS idx_workflow_campaign ON workflow_state(campaign_id);
-- campaign_status indexes
CREATE INDEX IF NOT EXISTS idx_campaign_status_campaign_id ON campaign_status(campaign_id);
CREATE INDEX IF NOT EXISTS idx_campaign_status_number ON campaign_status(campaign_number);
CREATE INDEX IF NOT EXISTS idx_campaign_status_status ON campaign_status(status);
CREATE INDEX IF NOT EXISTS idx_campaign_status_live ON campaign_status(live_campaign);
CREATE INDEX IF NOT EXISTS idx_campaign_status_webhook_sent ON campaign_status(webhook_sent);
-- creativex_scores indexes
CREATE INDEX IF NOT EXISTS idx_creativex_filename ON creativex_scores(filename);
CREATE INDEX IF NOT EXISTS idx_creativex_box_file ON creativex_scores(box_file_id);
CREATE INDEX IF NOT EXISTS idx_creativex_status ON creativex_scores(status);
\echo 'Indexes created successfully'
-- ============================================================================
-- FUNCTIONS
-- ============================================================================
\echo 'Creating functions...'
-- Function: Update updated_at timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Function: Log master asset events
CREATE OR REPLACE FUNCTION log_master_asset_event()
RETURNS TRIGGER AS $$
BEGIN
IF (TG_OP = 'INSERT') THEN
INSERT INTO asset_events (tracking_id, event_type, event_data)
VALUES (NEW.tracking_id, 'master_asset_created', row_to_json(NEW)::jsonb);
RETURN NEW;
ELSIF (TG_OP = 'UPDATE') THEN
INSERT INTO asset_events (tracking_id, event_type, event_data)
VALUES (NEW.tracking_id, 'master_asset_updated',
jsonb_build_object('old', row_to_json(OLD)::jsonb, 'new', row_to_json(NEW)::jsonb));
RETURN NEW;
ELSIF (TG_OP = 'DELETE') THEN
INSERT INTO asset_events (tracking_id, event_type, event_data)
VALUES (OLD.tracking_id, 'master_asset_deleted', row_to_json(OLD)::jsonb);
RETURN OLD;
END IF;
END;
$$ LANGUAGE plpgsql;
\echo 'Functions created successfully'
-- ============================================================================
-- TRIGGERS
-- ============================================================================
\echo 'Creating triggers...'
-- Trigger: Auto-update updated_at on master_assets
DROP TRIGGER IF EXISTS update_master_assets_updated_at ON master_assets;
CREATE TRIGGER update_master_assets_updated_at
BEFORE UPDATE ON master_assets
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Trigger: Log master asset changes
DROP TRIGGER IF EXISTS log_master_asset_changes ON master_assets;
CREATE TRIGGER log_master_asset_changes
AFTER INSERT OR UPDATE OR DELETE ON master_assets
FOR EACH ROW
EXECUTE FUNCTION log_master_asset_event();
-- Trigger: Auto-update updated_at on derivative_assets
DROP TRIGGER IF EXISTS update_derivative_assets_updated_at ON derivative_assets;
CREATE TRIGGER update_derivative_assets_updated_at
BEFORE UPDATE ON derivative_assets
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Trigger: Auto-update updated_at on workflow_state
DROP TRIGGER IF EXISTS update_workflow_state_updated_at ON workflow_state;
CREATE TRIGGER update_workflow_state_updated_at
BEFORE UPDATE ON workflow_state
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Trigger: Auto-update updated_at on campaign_status
DROP TRIGGER IF EXISTS update_campaign_status_updated_at ON campaign_status;
CREATE TRIGGER update_campaign_status_updated_at
BEFORE UPDATE ON campaign_status
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
\echo 'Triggers created successfully'
-- ============================================================================
-- GRANTS
-- ============================================================================
\echo 'Setting up permissions...'
-- Grant all privileges on tables
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO ferrero_user;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO ferrero_user;
-- Grant usage on schema
GRANT USAGE ON SCHEMA public TO ferrero_user;
\echo 'Permissions granted'
-- ============================================================================
-- VERIFICATION
-- ============================================================================
\echo ''
\echo '============================================================'
\echo 'Database initialization complete!'
\echo '============================================================'
\echo ''
\echo 'Tables created:'
\echo ' - master_assets (35 columns)'
\echo ' - derivative_assets'
\echo ' - asset_events'
\echo ' - workflow_state'
\echo ' - campaign_status'
\echo ' - creativex_scores'
\echo ''
\echo 'Indexes created: 15'
\echo 'Triggers created: 4'
\echo 'Functions created: 2'
\echo ''
\echo 'Ready to use!'
\echo '============================================================'
-- Show table counts
SELECT
'master_assets' as table_name,
COUNT(*) as row_count
FROM master_assets
UNION ALL
SELECT
'derivative_assets' as table_name,
COUNT(*) as row_count
FROM derivative_assets
UNION ALL
SELECT
'asset_events' as table_name,
COUNT(*) as row_count
FROM asset_events
UNION ALL
SELECT
'workflow_state' as table_name,
COUNT(*) as row_count
FROM workflow_state;