ferrero-opentext/Python-Version/config/config.yaml
DJP b6b9d7337a Add CreativeX score extraction and storage system
Implements new workflow to extract CreativeX quality scores from PDFs
using LlamaExtract AI and store results in PostgreSQL database.

Components added:
- creativex_scoring_storing.py: Main script to process PDFs from Box
- creativex_scores table: Database table with JSONB for full JSON storage
- Database methods: store_creativex_score() and get_creativex_score_by_filename()
- Email templates: creativex_complete, creativex_partial, creativex_no_files
- Configuration: creativex section in config.yaml
- CREATIVEX_DEPLOYMENT.md: Complete deployment and usage guide

Features:
- Monitors Box folder 350605024645 for PDFs
- Extracts scores using LlamaExtract agent "Creativex-Extract"
- Stores 4 key fields (filename, ID, URL, score) + full JSON
- Deletes processed PDFs from Box after successful extraction
- Sends email notifications for success/partial/no-files scenarios
- Manual execution (python scripts/creativex_scoring_storing.py)

Database schema:
- Table: creativex_scores with 10 columns
- Indexes on filename, box_file_id, status for fast lookups
- JSONB column stores complete extraction for future flexibility

Future integration ready:
db.get_creativex_score_by_filename() available for DAM upload workflows
to attach CreativeX metadata during asset processing.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-11 16:15:45 -05:00

118 lines
3 KiB
YAML

# Ferrero Content Scaling - Main Configuration
# All settings can be overridden by environment-specific configs
# Environment selector (set via ENV environment variable)
environment: ${ENV:-staging}
# DAM Configuration
dam:
base_url: ${DAM_BASE_URL}
# OAuth2 Authentication (default - current working method)
auth_url: ${DAM_AUTH_URL}
client_id: ${DAM_CLIENT_ID}
client_secret: ${DAM_CLIENT_SECRET}
# mTLS Certificate Authentication (optional - use with --auth-pfx flag)
mtls_base_url: ${DAM_MTLS_BASE_URL:-}
mtls_cert_path: ${DAM_MTLS_CERT_PATH:-}
mtls_cert_password: ${DAM_MTLS_CERT_PASSWORD:-}
timeout_seconds: 120
# Box Configuration
box:
enterprise_id: ${BOX_ENTERPRISE_ID}
client_id: ${BOX_CLIENT_ID}
client_secret: ${BOX_CLIENT_SECRET}
jwt_key_id: ${BOX_JWT_KEY_ID}
rsa_private_key_path: ../Box-config.json
passphrase: ${BOX_PASSPHRASE}
# Separate folders for different workflows
root_folder_a1_a2: ${BOX_ROOT_FOLDER_A1_A2} # For downloaded Local master assets (348304357505)
root_folder_a2_a3: ${BOX_ROOT_FOLDER_A2_A3} # For agency uploads to process (348526703108)
root_folder_b1_b2: ${BOX_ROOT_FOLDER_B1_B2} # For downloaded Global master assets (349261192115)
webhook_signature_keys:
- ${BOX_WEBHOOK_PRIMARY_KEY:-}
- ${BOX_WEBHOOK_SECONDARY_KEY:-}
# Database Configuration
database:
host: ${DB_HOST:-localhost}
port: ${DB_PORT:-5437}
database: ferrero_tracking
user: ${DB_USER}
password: ${DB_PASSWORD}
# Polling Configuration (A1→A2)
polling:
enabled: true
interval_seconds: 300 # 5 minutes
max_campaigns_per_run: 10
# Webhook Configuration (A2→A3 receiver)
webhook_receiver:
enabled: true
host: 0.0.0.0
port: ${WEBHOOK_RECEIVER_PORT:-5555}
validate_signatures: true
# Outgoing Webhooks (we call these)
webhooks:
campaign_status_update:
enabled: true
url: ${CAMPAIGN_STATUS_WEBHOOK_URL}
timeout_seconds: 10
retry_on_failure: true
max_retries: 3
auth:
type: none # bearer, basic, or none
token: ${WEBHOOK_AUTH_TOKEN:-}
# Retry Configuration
retry:
max_attempts: 3
backoff: exponential # exponential, linear, fixed
initial_delay_seconds: 5
max_delay_seconds: 60
# Notification Configuration
notifications:
enabled: true
smtp:
server: ${SMTP_SERVER}
port: ${SMTP_PORT}
user: ${SMTP_USER}
password: ${SMTP_PASSWORD}
sender_email: ${SENDER_EMAIL}
recipients:
success:
- ${REPORT_EMAILS}
errors:
- ${ERROR_EMAIL}
critical:
- ${ERROR_EMAIL}
templates_path: config/email_templates.yaml
# Field Configuration
fields:
mappings_file: config/field_mappings.yaml
# CreativeX Configuration
creativex:
llama_api_key: ${LLAMA_CLOUD_API_KEY}
agent_name: Creativex-Extract
box_folder_id: "350605024645"
# Logging Configuration
logging:
level: INFO
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file:
directory: logs
max_bytes: 10485760 # 10MB
backup_count: 5
console: true
# Temp File Configuration
temp:
directory: temp/downloads
cleanup_after_hours: 24
max_size_mb: 1000