20 KiB
Ferrero Content Scaling - Database Schema Documentation
Database: ferrero_tracking PostgreSQL Version: 15+ Last Updated: November 3, 2025
Overview
The database tracks master assets, derivative assets, and campaign relationships for the Ferrero content scaling workflow.
Key Features:
- Prefix-based tracking ID system (master files start with 'M', regular files never start with 'M')
- Full metadata storage (JSONB, no truncation)
- Global campaign relationship tracking
- All-done checks for workflow automation
Database Connection
Local Development:
Host: localhost
Port: 5437
Database: ferrero_tracking
User: ferrero_user
Password: ferrero_pass_2025
Connection String:
postgresql://ferrero_user:ferrero_pass_2025@localhost:5437/ferrero_tracking
Tables
master_assets
Purpose: Stores master assets downloaded from DAM with tracking IDs and full metadata
Schema:
CREATE TABLE master_assets (
-- Primary Key
id SERIAL PRIMARY KEY,
-- Tracking & Identification
tracking_id VARCHAR(6) UNIQUE NOT NULL,
opentext_id VARCHAR(255) NOT NULL,
-- File Information
original_filename VARCHAR(500) NOT NULL,
file_extension VARCHAR(20),
file_size_bytes BIGINT,
mime_type VARCHAR(100),
-- Metadata Fields (extracted for quick access)
brand_code VARCHAR(5),
brand_name VARCHAR(255),
country_code VARCHAR(2),
country_name VARCHAR(255),
language_code VARCHAR(3),
language_name VARCHAR(100),
subject_title VARCHAR(255),
asset_type VARCHAR(3),
asset_type_name VARCHAR(255),
duration_seconds INTEGER,
aspect_ratio VARCHAR(10),
width_px INTEGER,
height_px INTEGER,
-- Campaign Relationships (NEW!)
global_master_campaign_id VARCHAR(50),
global_master_folder_id VARCHAR(255),
-- Workflow Information
upload_directory VARCHAR(1000),
description TEXT,
-- Full Metadata Storage (NEW - CRITICAL!)
full_metadata JSONB,
-- Tags and Categories
tags TEXT[],
categories TEXT[],
-- Status
status VARCHAR(50) DEFAULT 'active',
is_deleted BOOLEAN DEFAULT FALSE,
deleted_at TIMESTAMP,
-- Tracking
ingested_by VARCHAR(255),
ingested_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
Indexes:
CREATE INDEX idx_master_assets_tracking_id ON master_assets(tracking_id);
CREATE INDEX idx_master_assets_opentext_id ON master_assets(opentext_id);
CREATE INDEX idx_master_assets_status ON master_assets(status);
CREATE INDEX idx_master_assets_brand_code ON master_assets(brand_code);
CREATE INDEX idx_master_assets_created_at ON master_assets(created_at);
CREATE INDEX idx_master_assets_global_master ON master_assets(global_master_campaign_id);
Triggers:
-- Auto-update updated_at timestamp
CREATE TRIGGER update_master_assets_updated_at
BEFORE UPDATE ON master_assets
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Log changes
CREATE TRIGGER log_master_asset_changes
AFTER INSERT OR UPDATE OR DELETE ON master_assets
FOR EACH ROW
EXECUTE FUNCTION log_master_asset_event();
Key Changes Made:
- ✅ Added
full_metadata JSONB(Session 2) - Stores complete 200KB+ metadata without truncation - ✅ Added
global_master_campaign_id(Session 3) - Tracks Global master campaign reference - ✅ Added
global_master_folder_id(Session 3) - Stores Global master folder ID
derivative_assets
Purpose: Tracks derivative assets (localized versions) created from master assets
Schema:
CREATE TABLE derivative_assets (
-- Primary Key
id SERIAL PRIMARY KEY,
-- Link to Master Asset
tracking_id VARCHAR(6) NOT NULL REFERENCES master_assets(tracking_id),
master_asset_id INTEGER REFERENCES master_assets(id) ON DELETE CASCADE,
-- Derivative File Information
derivative_filename VARCHAR(500) NOT NULL,
file_extension VARCHAR(20),
-- Metadata from V2 Filename
language_code VARCHAR(3),
language_name VARCHAR(100),
asset_type VARCHAR(3),
asset_type_name VARCHAR(255),
duration_seconds INTEGER,
aspect_ratio VARCHAR(10),
omg_job_number VARCHAR(10),
spot_version VARCHAR(10),
has_master_flag BOOLEAN DEFAULT FALSE,
-- Status Tracking
status VARCHAR(50) DEFAULT 'active',
uploaded_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(255)
);
Indexes:
CREATE INDEX idx_derivative_assets_tracking_id ON derivative_assets(tracking_id);
CREATE INDEX idx_derivative_assets_master_id ON derivative_assets(master_asset_id);
CREATE INDEX idx_derivative_assets_omg_job ON derivative_assets(omg_job_number);
Foreign Keys:
ALTER TABLE derivative_assets
ADD CONSTRAINT derivative_assets_tracking_id_fkey
FOREIGN KEY (tracking_id) REFERENCES master_assets(tracking_id);
ALTER TABLE derivative_assets
ADD CONSTRAINT derivative_assets_master_asset_id_fkey
FOREIGN KEY (master_asset_id) REFERENCES master_assets(id) ON DELETE CASCADE;
Triggers:
CREATE TRIGGER log_derivative_asset_changes
AFTER INSERT OR UPDATE ON derivative_assets
FOR EACH ROW
EXECUTE FUNCTION log_derivative_asset_event();
Migration Scripts
Complete Setup (Fresh Install)
File: setup_database.sql
-- Create database (run as postgres user)
CREATE DATABASE ferrero_tracking;
CREATE USER ferrero_user WITH PASSWORD 'ferrero_pass_2025';
GRANT ALL PRIVILEGES ON DATABASE ferrero_tracking TO ferrero_user;
-- Connect to database
\c ferrero_tracking
-- Create tables (see full schema above)
-- ... master_assets table ...
-- ... derivative_assets table ...
-- Create indexes
-- ... all indexes ...
-- Create triggers
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION log_master_asset_event()
RETURNS TRIGGER AS $$
BEGIN
-- Log to audit table if exists
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
Migration 1: Add full_metadata JSONB (October 30, 2025)
Purpose: Store complete DAM metadata without truncation
Before: Metadata truncated to 5,000 characters in description field After: Full metadata stored in JSONB column (200KB+)
-- Add column
ALTER TABLE master_assets ADD COLUMN IF NOT EXISTS full_metadata JSONB;
-- Update existing records (if migrating from old schema)
-- Note: Old data was truncated, can't recover lost metadata
-- New downloads will populate this column
Impact:
- ✅ No more metadata truncation
- ✅ Can query metadata using PostgreSQL JSON operators
- ✅ Efficient storage and retrieval
- ✅ Enables full metadata preservation
Migration 2: Add Global Campaign Reference (November 3, 2025)
Purpose: Track relationship between local campaigns and global masters
Why: Local Adaptation campaigns reference a Global Master campaign. This relationship needs to be stored for future cross-campaign features.
-- Add columns
ALTER TABLE master_assets
ADD COLUMN IF NOT EXISTS global_master_campaign_id VARCHAR(50);
ALTER TABLE master_assets
ADD COLUMN IF NOT EXISTS global_master_folder_id VARCHAR(255);
-- Add index for queries
CREATE INDEX IF NOT EXISTS idx_master_assets_global_master
ON master_assets(global_master_campaign_id);
How It's Populated:
- Extracted from
inherited_metadata_collections[]in asset JSON - Field:
FERRERO.FIELD.GLOBAL CAMPAIGN REFERENCE - Only populated for Local Adaptation assets (A1→A2 workflow)
- Global Master assets (B1→B2) have NULL values (they don't reference themselves)
Impact:
- ✅ Can find all local assets for a Global Master
- ✅ Enables cross-campaign reporting
- ✅ Foundation for future features
Example Query:
-- Find all local assets for Global Master C000000068
SELECT tracking_id, original_filename, created_at
FROM master_assets
WHERE global_master_campaign_id = 'C000000068'
ORDER BY created_at DESC;
Column Descriptions
master_assets Table
| Column | Type | Nullable | Default | Description |
|---|---|---|---|---|
id |
SERIAL | NOT NULL | auto | Primary key |
tracking_id |
VARCHAR(6) | NOT NULL | - | Unique 6-char tracking ID (links master to derivatives) Master files (B1→B2): Start with 'M' (e.g., M4x7Qp)Regular files (A1→A2, A5→A6): Never start with 'M' (e.g., a4x7Qp) |
opentext_id |
VARCHAR(255) | NOT NULL | - | DAM asset ID (hex string like 0008a50...) |
original_filename |
VARCHAR(500) | NOT NULL | - | Filename without extension |
file_extension |
VARCHAR(20) | NULL | - | File extension with dot (.mp4, .jpg) |
file_size_bytes |
BIGINT | NULL | - | File size in bytes |
mime_type |
VARCHAR(100) | NULL | - | MIME type (video/mp4, image/jpeg) |
brand_code |
VARCHAR(5) | NULL | - | Brand code (RAF, KIN, etc.) |
country_code |
VARCHAR(2) | NULL | - | Country code (DE, IT, etc.) |
language_code |
VARCHAR(3) | NULL | - | Language code (de, it, etc.) |
asset_type |
VARCHAR(3) | NULL | - | Asset type code (TVC, OLV, etc.) |
width_px |
INTEGER | NULL | - | Video/image width in pixels |
height_px |
INTEGER | NULL | - | Video/image height in pixels |
upload_directory |
VARCHAR(1000) | NULL | - | Final Assets folder ID for upload |
description |
TEXT | NULL | - | Box file info (simplified, not metadata) |
full_metadata |
JSONB | NULL | - | Complete DAM metadata (200KB+, no truncation) ✨ |
global_master_campaign_id |
VARCHAR(50) | NULL | - | Global master campaign ID (C000000068) ✨ |
global_master_folder_id |
VARCHAR(255) | NULL | - | Global master folder ID ✨ |
status |
VARCHAR(50) | NULL | 'active' | Record status (active, deleted) |
created_at |
TIMESTAMP | NULL | NOW() | When record was created |
updated_at |
TIMESTAMP | NULL | NOW() | When record was last updated |
✨ = New columns added during this project
Queries
Common Queries
1. Find all assets with tracking IDs:
SELECT tracking_id, original_filename, created_at
FROM master_assets
WHERE status = 'active'
ORDER BY created_at DESC
LIMIT 20;
2. Get full metadata for a tracking ID:
SELECT full_metadata
FROM master_assets
WHERE tracking_id = 'ABC123';
3. Find assets for a specific Global Master campaign:
SELECT tracking_id, original_filename, global_master_campaign_id
FROM master_assets
WHERE global_master_campaign_id = 'C000000068'
ORDER BY created_at DESC;
4. Check campaign upload completion:
-- Count master assets
SELECT COUNT(DISTINCT tracking_id)
FROM master_assets
WHERE campaign_id = 'campaign_hex_id'
AND status = 'active';
-- Count uploaded derivatives
SELECT COUNT(DISTINCT ma.tracking_id)
FROM master_assets ma
INNER JOIN derivative_assets da ON ma.tracking_id = da.tracking_id
WHERE ma.campaign_id = 'campaign_hex_id';
5. Get all local campaigns for a Global Master:
-- Assets that reference a specific Global Master
SELECT
tracking_id,
original_filename,
global_master_campaign_id,
global_master_folder_id,
created_at
FROM master_assets
WHERE global_master_campaign_id = 'C000000068'
AND global_master_campaign_id IS NOT NULL
ORDER BY created_at DESC;
6. Statistics:
-- Count by status
SELECT status, COUNT(*)
FROM master_assets
GROUP BY status;
-- Count by workflow type
SELECT
CASE
WHEN global_master_campaign_id IS NULL THEN 'Global Master (B1)'
ELSE 'Local Adaptation (A1)'
END as workflow_type,
COUNT(*)
FROM master_assets
WHERE status = 'active'
GROUP BY workflow_type;
Backup & Restore
Backup Database
Full backup:
pg_dump -h localhost -p 5437 -U ferrero_user -d ferrero_tracking > backup_$(date +%Y%m%d).sql
Schema only:
pg_dump -h localhost -p 5437 -U ferrero_user -d ferrero_tracking --schema-only > schema.sql
Data only:
pg_dump -h localhost -p 5437 -U ferrero_user -d ferrero_tracking --data-only > data.sql
Restore Database
From backup:
psql -h localhost -p 5437 -U ferrero_user -d ferrero_tracking < backup_20251103.sql
Create fresh database:
# Drop and recreate (CAUTION: Deletes all data!)
dropdb -h localhost -p 5437 -U ferrero_user ferrero_tracking
createdb -h localhost -p 5437 -U ferrero_user ferrero_tracking
# Restore schema and data
psql -h localhost -p 5437 -U ferrero_user -d ferrero_tracking < backup.sql
Migration History
Session 1 (October 29, 2025)
Initial database setup
- Created master_assets table
- Created derivative_assets table
- Basic tracking ID system
Session 2 (October 30, 2025)
Migration: Add full_metadata JSONB
Problem: Metadata was truncated to 5,000 characters Solution: Add JSONB column for complete metadata storage
ALTER TABLE master_assets ADD COLUMN full_metadata JSONB;
Impact:
- Before: 5,000 chars max (truncated 195KB of data)
- After: Full 200KB+ metadata preserved
- Enables complete metadata inheritance
Session 3 (November 3, 2025)
Migration: Add Global Campaign Reference
Problem: No way to track which Global Master a local asset came from Solution: Extract and store Global Campaign Reference
ALTER TABLE master_assets
ADD COLUMN global_master_campaign_id VARCHAR(50);
ALTER TABLE master_assets
ADD COLUMN global_master_folder_id VARCHAR(255);
CREATE INDEX idx_master_assets_global_master
ON master_assets(global_master_campaign_id);
Impact:
- Can query all local assets for a Global Master
- Enables cross-campaign features
- Foundation for reporting
Production Deployment
Server Setup
1. Create Database:
# On production server
sudo -u postgres psql
CREATE DATABASE ferrero_tracking;
CREATE USER ferrero_user WITH PASSWORD 'YOUR_SECURE_PASSWORD';
GRANT ALL PRIVILEGES ON DATABASE ferrero_tracking TO ferrero_user;
\q
2. Import Schema:
# Upload schema.sql to server
scp schema.sql user@server:~/
# Import
psql -h localhost -p 5432 -U ferrero_user -d ferrero_tracking < schema.sql
3. Verify:
psql -h localhost -p 5432 -U ferrero_user -d ferrero_tracking
# Check tables
\dt
# Check master_assets schema
\d master_assets
# Verify new columns
\d+ master_assets | grep -E "full_metadata|global_master"
Environment Configuration
Update .env with production database:
DB_HOST=localhost # or production server IP
DB_PORT=5432 # or custom port
DB_USER=ferrero_user
DB_PASSWORD=your_production_password
Maintenance
Regular Maintenance
Vacuum database (weekly):
VACUUM ANALYZE master_assets;
VACUUM ANALYZE derivative_assets;
Check database size:
SELECT pg_size_pretty(pg_database_size('ferrero_tracking'));
Check table sizes:
SELECT
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
Cleanup Old Records
Archive old tracking IDs (optional):
-- Move to archive table after 6 months
INSERT INTO master_assets_archive
SELECT * FROM master_assets
WHERE created_at < NOW() - INTERVAL '6 months'
AND status = 'active';
-- Update status
UPDATE master_assets
SET status = 'archived', updated_at = CURRENT_TIMESTAMP
WHERE created_at < NOW() - INTERVAL '6 months';
Data Examples
Sample master_assets Record
tracking_id: ABC123
opentext_id: 0008a50461e6a554b5eeec2effc02222fedf2801
original_filename: 06_RAFFAELLO_MAESTRO_SD
file_extension: .mp4
file_size_bytes: 251723
mime_type: video/mp4
brand_code: RAF
country_code: NULL
language_code: NULL
asset_type: TVC
upload_directory: ea0dbf86e13e3634895746d3a986558ec2eb8be1
description: Box File ID: 2031029170878
Box URL: https://app.box.com/file/2031029170878
DAM Asset ID: 0008a50461e6a554b5eeec2effc02222fedf2801
full_metadata: {
"name": "06_RAFFAELLO_MAESTRO_SD.mp4",
"asset_id": "0008a50461e6a554b5eeec2effc02222fedf2801",
"metadata": { ... complete 200KB metadata ... },
"inherited_metadata_collections": [ ... campaign info ... ]
}
global_master_campaign_id: C000000068
global_master_folder_id: 676f2bcde4c7bcf7ef783e97f7495069bf50b6bc
status: active
created_at: 2025-11-03 13:20:00
Sample derivative_assets Record
tracking_id: ABC123
derivative_filename: RAF_DE_de_TEST_OLV_001_6S_16x9
file_extension: .mp4
language_code: DE
asset_type: OLV
omg_job_number: 1234567
status: active
uploaded_at: 2025-11-03 14:30:00
Monitoring Queries
Dashboard Queries
Recent activity (last 24 hours):
SELECT
COUNT(*) as total_assets,
COUNT(DISTINCT DATE(created_at)) as days,
MIN(created_at) as earliest,
MAX(created_at) as latest
FROM master_assets
WHERE created_at > NOW() - INTERVAL '24 hours';
Assets by workflow type:
SELECT
CASE
WHEN global_master_campaign_id IS NULL THEN 'Global Masters (B1→B2)'
ELSE 'Local Adaptation (A1→A2)'
END as workflow,
COUNT(*) as count,
COUNT(DISTINCT DATE(created_at)) as days_active
FROM master_assets
WHERE created_at > NOW() - INTERVAL '7 days'
AND status = 'active'
GROUP BY workflow;
Upload completion rate:
SELECT
COUNT(DISTINCT ma.tracking_id) as total_masters,
COUNT(DISTINCT da.tracking_id) as uploaded_derivatives,
ROUND(100.0 * COUNT(DISTINCT da.tracking_id) / COUNT(DISTINCT ma.tracking_id), 2) as completion_rate
FROM master_assets ma
LEFT JOIN derivative_assets da ON ma.tracking_id = da.tracking_id
WHERE ma.created_at > NOW() - INTERVAL '7 days';
Performance Optimization
Recommended Indexes
Already created:
✅ idx_master_assets_tracking_id (UNIQUE)
✅ idx_master_assets_opentext_id
✅ idx_master_assets_status
✅ idx_master_assets_created_at
✅ idx_master_assets_global_master (NEW!)
JSONB Indexing (if needed for queries)
GIN index for full_metadata queries:
-- If you need to query inside full_metadata JSONB
CREATE INDEX idx_master_assets_full_metadata_gin
ON master_assets USING GIN (full_metadata);
Specific JSON path indexes:
-- Index on specific metadata field
CREATE INDEX idx_full_metadata_asset_type
ON master_assets ((full_metadata->'metadata'->'metadata_element_list'));
Security
User Permissions
Application user (ferrero_user):
-- Should have INSERT, SELECT, UPDATE on master_assets and derivative_assets
GRANT SELECT, INSERT, UPDATE ON master_assets TO ferrero_user;
GRANT SELECT, INSERT, UPDATE ON derivative_assets TO ferrero_user;
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO ferrero_user;
Read-only user (for reporting):
CREATE USER ferrero_readonly WITH PASSWORD 'readonly_password';
GRANT CONNECT ON DATABASE ferrero_tracking TO ferrero_readonly;
GRANT SELECT ON master_assets TO ferrero_readonly;
GRANT SELECT ON derivative_assets TO ferrero_readonly;
SSL/TLS
For production, enable SSL:
-- postgresql.conf
ssl = on
ssl_cert_file = '/path/to/server.crt'
ssl_key_file = '/path/to/server.key'
Troubleshooting
Common Issues
Issue: Connection refused
# Check PostgreSQL is running
pg_isready -h localhost -p 5437
# Check if port is open
netstat -an | grep 5437
Issue: Permission denied
# Verify user has permissions
psql -h localhost -p 5437 -U ferrero_user -d ferrero_tracking \
-c "SELECT has_table_privilege('ferrero_user', 'master_assets', 'INSERT');"
Issue: Duplicate tracking_id
# Check for duplicates
SELECT tracking_id, COUNT(*)
FROM master_assets
GROUP BY tracking_id
HAVING COUNT(*) > 1;
Issue: full_metadata too large
-- Check metadata sizes
SELECT
tracking_id,
pg_size_pretty(pg_column_size(full_metadata)) as metadata_size
FROM master_assets
WHERE full_metadata IS NOT NULL
ORDER BY pg_column_size(full_metadata) DESC
LIMIT 10;
Version History
| Version | Date | Changes |
|---|---|---|
| 1.0 | Oct 29, 2025 | Initial schema |
| 1.1 | Oct 30, 2025 | Added full_metadata JSONB |
| 2.0 | Nov 3, 2025 | Added global_master_campaign_id and global_master_folder_id |
For complete documentation, see PROJECT_STATUS_2025-11-03.md