diff --git a/opt/02-core/presenton/app_data/fastapi.db b/opt/02-core/presenton/app_data/fastapi.db new file mode 100644 index 0000000..b2e5261 Binary files /dev/null and b/opt/02-core/presenton/app_data/fastapi.db differ diff --git a/opt/02-core/presenton/app_data/userConfig.json b/opt/02-core/presenton/app_data/userConfig.json new file mode 100644 index 0000000..980a4a0 --- /dev/null +++ b/opt/02-core/presenton/app_data/userConfig.json @@ -0,0 +1 @@ +{"LLM":"google","OPENAI_API_KEY":"4roCnNdmB9PpRuW0cGcU66bFBU6pUcUgeEsuP3a3lGmxwuZTWD3mJQQJ99BKACfhMk5XJ3w3AAAAACOGcIX4","OPENAI_MODEL":"gpt-5","GOOGLE_API_KEY":"AIzaSyC5Tsf57X9egANg_ft3aFA_59sTL8i8gwA","GOOGLE_MODEL":"models/gemini-2.5-flash","OLLAMA_URL":"http://localhost:11434","CUSTOM_LLM_URL":"http://litellm-proxy:4000","CUSTOM_LLM_API_KEY":"presenton-proxy-key-2025","CUSTOM_MODEL":"gpt-5","IMAGE_PROVIDER":"gemini_flash","TOOL_CALLS":"true","EXTENDED_REASONING":"true"} \ No newline at end of file diff --git a/opt/05-backups/scripts/backup-full-enhanced.sh b/opt/05-backups/scripts/backup-full-enhanced.sh new file mode 100755 index 0000000..96f99cf --- /dev/null +++ b/opt/05-backups/scripts/backup-full-enhanced.sh @@ -0,0 +1,467 @@ +#!/bin/bash + +################################################################################ +# AI-Impress Enhanced Full Backup System +# Version: 2.1.0 +# Purpose: Auto-discover and backup all system components +# Features: +# - Auto-discovery of docker-compose projects +# - Automatic database detection (PostgreSQL, MariaDB/MySQL, MongoDB) +# - Incremental backups with Restic +# - Local backup on /mnt/backups +# - Slack & Email notifications +# Author: AI-Impress Admin System +# Date: 2025-11-06 +# Changelog v2.1.0: +# - Added auto-discovery for all database types +# - Improved database detection with image inspection +# - Added MongoDB backup support +# - Better error handling +################################################################################ + +set -e + +# ============================================ +# CONFIGURATION +# ============================================ + +RESTIC_ENV="/opt/05-backups/restic/.env" +BACKUP_BASE="/mnt/backups" +LOCAL_BACKUP_DIR="$BACKUP_BASE/local-backups" +REPORTS_DIR="/opt/05-backups/reports" +LOG_DIR="/opt/05-backups/logs" +LOG_FILE="$LOG_DIR/backup-$(date +%Y%m%d-%H%M%S).log" +BACKUP_REPORT="$REPORTS_DIR/backup-report-$(date +%Y%m%d-%H%M%S).json" + +SLACK_WEBHOOK="${SLACK_WEBHOOK_URL:-}" +EMAIL_TO="admin@ai-impress.com" + +# Load Restic credentials +if [[ -f "$RESTIC_ENV" ]]; then + source "$RESTIC_ENV" +else + echo "ERROR: Restic .env not found at $RESTIC_ENV" + exit 1 +fi + +# Export Vault token +export VAULT_TOKEN="${VAULT_TOKEN:-$(cat /opt/00-infrastructure/vault/.vault-token 2>/dev/null)}" + +# Create directories +mkdir -p "$BACKUP_BASE" "$LOCAL_BACKUP_DIR" "$REPORTS_DIR" "$LOG_DIR" + +# Redirect output to log +exec 1> >(tee -a "$LOG_FILE") +exec 2>&1 + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +# ============================================ +# HELPER FUNCTIONS +# ============================================ + +log() { + echo "[$(date +%Y-%m-%d\ %H:%M:%S)] $1" +} + +success() { + echo -e "${GREEN}✅ $1${NC}" +} + +warning() { + echo -e "${YELLOW}⚠️ $1${NC}" +} + +error() { + echo -e "${RED}❌ $1${NC}" +} + +send_slack() { + local title=$1 + local message=$2 + local color=${3:-good} + + if [[ -z "$SLACK_WEBHOOK" ]]; then + return + fi + + curl -X POST "$SLACK_WEBHOOK" \ + -H 'Content-Type: application/json' \ + -d "{ + \"attachments\": [{ + \"color\": \"$color\", + \"title\": \"$title\", + \"text\": \"$message\", + \"footer\": \"AI-Impress Backup System\", + \"ts\": $(date +%s) + }] + }" 2>/dev/null || true +} + +send_email() { + local subject=$1 + local body=$2 + echo "$body" | mail -s "$subject" "$EMAIL_TO" 2>/dev/null || true +} + +# Initialize backup report +init_report() { + cat > "$BACKUP_REPORT" << EOF +{ + "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", + "hostname": "$(hostname)", + "backup_status": "IN_PROGRESS", + "components": {}, + "summary": { + "total_components": 0, + "successful": 0, + "failed": 0, + "total_size": "0GB", + "duration": "calculating" + }, + "alerts": [] +} +EOF +} + +################################################################################ +# AUTO-DISCOVERY FUNCTIONS +################################################################################ + +discover_docker_compose_projects() { + log "=== Auto-discovering Docker Compose Projects ===" + + local projects=() + + # Scan /opt for docker-compose.yml files + while IFS= read -r compose_file; do + local project_path=$(dirname "$compose_file") + local project_name=$(basename "$project_path") + projects+=("$project_path") + log "Found: $project_path" + done < <(find /opt -maxdepth 4 -name "docker-compose.yml" -type f 2>/dev/null) + + echo "${projects[@]}" +} + +discover_databases() { + log "=== Auto-discovering Databases ===" + + local databases=() + + # Auto-detect PostgreSQL containers (common image names and postgres in container name) + while IFS= read -r container; do + if docker inspect "$container" --format '{{.Config.Image}}' 2>/dev/null | grep -qiE '(postgres|postgresql|timescale|postgis)'; then + databases+=("postgresql:$container") + log "Found PostgreSQL: $container" + fi + done < <(docker ps --format '{{.Names}}' 2>/dev/null | grep -iE '(postgres|pg|timescale|supabase-db|authentik-postgres|postiz-postgres)') + + # Auto-detect MariaDB/MySQL containers + while IFS= read -r container; do + if docker inspect "$container" --format '{{.Config.Image}}' 2>/dev/null | grep -qiE '(mariadb|mysql)'; then + databases+=("mariadb:$container") + log "Found MariaDB/MySQL: $container" + fi + done < <(docker ps --format '{{.Names}}' 2>/dev/null | grep -iE '(mariadb|mysql|mautic-db)') + + # Auto-detect MongoDB containers + while IFS= read -r container; do + if docker inspect "$container" --format '{{.Config.Image}}' 2>/dev/null | grep -qiE 'mongo'; then + databases+=("mongodb:$container") + log "Found MongoDB: $container" + fi + done < <(docker ps --format '{{.Names}}' 2>/dev/null | grep -iE 'mongo') + + echo "${databases[@]}" +} + +discover_volumes() { + log "=== Auto-discovering Docker Volumes ===" + + local volumes=() + + # Get all used volumes + while IFS= read -r volume; do + volumes+=("$volume") + log "Found volume: $volume" + done < <(docker volume ls --format "{{.Name}}" 2>/dev/null | grep -v "^$") + + echo "${volumes[@]}" +} + +################################################################################ +# BACKUP FUNCTIONS +################################################################################ + +backup_postgresql() { + local container=$1 + local db_user=${2:-aimpress_admin} + local backup_file="$LOCAL_BACKUP_DIR/postgresql-$container-$(date +%Y%m%d-%H%M%S).sql.gz" + + log "[DB] Backing up PostgreSQL: $container" + + if ! docker ps --filter "name=$container" -q &>/dev/null; then + warning "PostgreSQL container $container not running" + return 1 + fi + + if docker exec "$container" pg_dumpall -U $db_user 2>/dev/null | gzip > "$backup_file"; then + local size=$(du -h "$backup_file" | cut -f1) + success "PostgreSQL $container backed up ($size)" + + # Keep only last 14 days + find "$LOCAL_BACKUP_DIR" -name "postgresql-$container-*.sql.gz" -mtime +14 -delete + + return 0 + else + error "Failed to backup PostgreSQL: $container" + return 1 + fi +} + +backup_mariadb() { + local container=$1 + local backup_file="$LOCAL_BACKUP_DIR/mariadb-$container-$(date +%Y%m%d-%H%M%S).sql.gz" + + log "[DB] Backing up MariaDB: $container" + + if ! docker ps --filter "name=$container" -q &>/dev/null; then + warning "MariaDB container $container not running" + return 1 + fi + + if docker exec "$container" mariadb-dump --all-databases 2>/dev/null | gzip > "$backup_file"; then + local size=$(du -h "$backup_file" | cut -f1) + success "MariaDB $container backed up ($size)" + + # Keep only last 14 days + find "$LOCAL_BACKUP_DIR" -name "mariadb-$container-*.sql.gz" -mtime +14 -delete + + return 0 + else + error "Failed to backup MariaDB: $container" + return 1 + fi +} + +backup_mongodb() { + local container=$1 + local backup_file="$LOCAL_BACKUP_DIR/mongodb-$container-$(date +%Y%m%d-%H%M%S).gz" + + log "[DB] Backing up MongoDB: $container" + + if ! docker ps --filter "name=$container" -q &>/dev/null; then + warning "MongoDB container $container not running" + return 1 + fi + + if docker exec "$container" mongodump --archive 2>/dev/null | gzip > "$backup_file"; then + local size=$(du -h "$backup_file" | cut -f1) + success "MongoDB $container backed up ($size)" + + # Keep only last 14 days + find "$LOCAL_BACKUP_DIR" -name "mongodb-$container-*.gz" -mtime +14 -delete + + return 0 + else + error "Failed to backup MongoDB: $container" + return 1 + fi +} + +backup_vault() { + log "[CONFIG] Backing up Vault..." + local backup_file="$LOCAL_BACKUP_DIR/vault-data-$(date +%Y%m%d-%H%M%S).tar.gz" + + if sudo tar czf "$backup_file" -C /opt/00-infrastructure/vault data 2>/dev/null; then + local size=$(du -h "$backup_file" | cut -f1) + success "Vault data backed up ($size)" + + # Keep only last 30 days + find "$LOCAL_BACKUP_DIR" -name "vault-data-*.tar.gz" -mtime +30 -delete + + return 0 + else + error "Failed to backup Vault" + return 1 + fi +} + +backup_docker_configs() { + log "[CONFIG] Backing up Docker Compose files..." + local backup_file="$LOCAL_BACKUP_DIR/docker-configs-$(date +%Y%m%d-%H%M%S).tar.gz" + + if tar czf "$backup_file" -C /opt . -path "*docker-compose.yml" 2>/dev/null; then + local size=$(du -h "$backup_file" | cut -f1) + success "Docker configs backed up ($size)" + + # Keep only last 30 days + find "$LOCAL_BACKUP_DIR" -name "docker-configs-*.tar.gz" -mtime +30 -delete + + return 0 + else + error "Failed to backup Docker configs" + return 1 + fi +} + +backup_application_data() { + log "[DATA] Backing up Application Data..." + local backup_file="$LOCAL_BACKUP_DIR/app-data-$(date +%Y%m%d-%H%M%S).tar.gz" + + local data_dirs=( + "/opt/03-business/mautic/sync_v2" + "/opt/02-core/supabase/supabase/docker/volumes" + ) + + if tar czf "$backup_file" "${data_dirs[@]}" 2>/dev/null; then + local size=$(du -h "$backup_file" | cut -f1) + success "Application data backed up ($size)" + + # Keep only last 14 days + find "$LOCAL_BACKUP_DIR" -name "app-data-*.tar.gz" -mtime +14 -delete + + return 0 + else + error "Failed to backup application data" + return 1 + fi +} + +backup_with_restic() { + log "=== Uploading to Restic (Cloudflare R2) ===" + + if ! command -v restic &>/dev/null; then + warning "Restic not installed, skipping cloud backup" + return 1 + fi + + # Initialize Restic repository if needed + if ! restic cat config &>/dev/null; then + log "Initializing Restic repository..." + restic init || warning "Restic repository might already exist" + fi + + # Backup local directory to Restic + if restic backup "$BACKUP_BASE" --exclude-file="$BACKUP_BASE/.restic-exclude" 2>/dev/null; then + success "Restic backup completed" + + # Cleanup old snapshots (keep last 30) + restic forget --keep-daily 3 --keep-weekly 1 --prune 2>/dev/null || true + + return 0 + else + error "Restic backup failed" + return 1 + fi +} + +################################################################################ +# MAIN BACKUP EXECUTION +################################################################################ + +main() { + local start_time=$(date +%s) + + log "╔════════════════════════════════════════════════════════════╗" + log "║ AI-Impress Enhanced Full Backup v2.1.0 ║" + log "║ $(date +%Y-%m-%d\ %H:%M:%S) ║" + log "╚════════════════════════════════════════════════════════════╝" + log "" + + local failed=0 + local successful=0 + + # Auto-discover and backup databases + log "=== PHASE 1: Database Backups (Auto-Discovery) ===" + + # Discover all databases automatically + local discovered_dbs=$(discover_databases) + + if [[ -z "$discovered_dbs" ]]; then + warning "No databases discovered" + else + log "Found databases: $discovered_dbs" + log "" + + for db in $discovered_dbs; do + local db_type=$(echo "$db" | cut -d: -f1) + local db_container=$(echo "$db" | cut -d: -f2) + + case $db_type in + postgresql) + # Determine DB user based on container name + local db_user="aimpress_admin" + [[ "$db_container" == "authentik-postgres" ]] && db_user="authentik" + [[ "$db_container" == "postiz-postgres" ]] && db_user="postiz" + + backup_postgresql "$db_container" "$db_user" && ((successful++)) || ((failed++)) + ;; + mariadb) + backup_mariadb "$db_container" && ((successful++)) || ((failed++)) + ;; + mongodb) + backup_mongodb "$db_container" && ((successful++)) || ((failed++)) + ;; + *) + warning "Unknown database type: $db_type" + ((failed++)) + ;; + esac + done + fi + + log "" + log "=== PHASE 2: Configuration Backups ===" + + backup_vault && ((successful++)) || ((failed++)) + backup_docker_configs && ((successful++)) || ((failed++)) + + log "" + log "=== PHASE 3: Application Data ===" + + backup_application_data && ((successful++)) || ((failed++)) + + log "" + log "=== PHASE 4: Cloud Backup (Restic) ===" + + backup_with_restic && ((successful++)) || ((failed++)) + + # Calculate duration + local end_time=$(date +%s) + local duration=$((end_time - start_time)) + local duration_min=$((duration / 60)) + local duration_sec=$((duration % 60)) + + log "" + log "╔════════════════════════════════════════════════════════════╗" + log "║ BACKUP COMPLETE ║" + log "╚════════════════════════════════════════════════════════════╝" + log "" + log "Summary:" + log " Total Components: $((successful + failed))" + log " Successful: $successful" + log " Failed: $failed" + log " Duration: ${duration_min}m ${duration_sec}s" + log " Local Backups: $LOCAL_BACKUP_DIR" + log " Cloud Backups: Restic (Cloudflare R2)" + log "" + + # Send notifications + if [[ $failed -eq 0 ]]; then + success "All backups completed successfully!" + send_slack "✅ Backup Complete" "All components backed up successfully in ${duration_min}m ${duration_sec}s" "good" + send_email "Backup Complete" "All backups completed successfully.\n\nDuration: ${duration_min}m ${duration_sec}s\nLocation: $BACKUP_BASE" + else + warning "Backup completed with $failed failures" + send_slack "⚠️ Backup Completed with Errors" "Failed components: $failed\nCheck logs: $LOG_FILE" "warning" + send_email "Backup Completed with Errors" "Backup completed with $failed failures.\n\nCheck logs: $LOG_FILE" + fi +} + +main "$@" diff --git a/opt/05-backups/scripts/health-check-alerting.sh b/opt/05-backups/scripts/health-check-alerting.sh new file mode 100755 index 0000000..5613976 --- /dev/null +++ b/opt/05-backups/scripts/health-check-alerting.sh @@ -0,0 +1,377 @@ +#!/bin/bash + +################################################################################ +# AI-Impress Health Check & Alerting System +# Version: 1.0.0 +# Purpose: Monitor system health and send alerts on problems +################################################################################ + +set -euo pipefail + +# Configuration +# Get from Vault +export VAULT_ADDR=http://127.0.0.1:8200 +export VAULT_TOKEN=$(cat /opt/00-infrastructure/vault/.vault-token 2>/dev/null || echo "") +SLACK_WEBHOOK_URL=$(vault kv get -field=slack_webhook secret/monitoring 2>/dev/null || echo "") # Set in Vault or environment +ALERT_EMAIL="${ALERT_EMAIL:-admin@ai-impress.com}" +SMTP_SERVER="${SMTP_SERVER:-localhost}" +LOG_FILE="/opt/05-backups/logs/health-check-$(date +%Y%m%d).log" + +# Thresholds +DISK_THRESHOLD=90 # Alert if disk > 90% +MEMORY_THRESHOLD=90 # Alert if memory > 90% +MAX_UNHEALTHY_CONTAINERS=2 # Alert if more than 2 containers unhealthy + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +PROBLEMS=() +WARNINGS=() + +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" +} + +add_problem() { + PROBLEMS+=("$1") + log "🔴 PROBLEM: $1" +} + +add_warning() { + WARNINGS+=("$1") + log "🟡 WARNING: $1" +} + +################################################################################ +# CHECKS +################################################################################ + +check_critical_services() { + log "Checking critical services..." + + local critical_services=("traefik" "postgres-main" "redis-main") + + for service in "${critical_services[@]}"; do + if ! docker ps --format '{{.Names}}' | grep -q "^${service}$"; then + add_problem "Critical service $service is NOT RUNNING" + else + local health=$(docker inspect --format='{{.State.Health.Status}}' "$service" 2>/dev/null || echo "no healthcheck") + if [[ "$health" == "unhealthy" ]]; then + add_problem "Critical service $service is UNHEALTHY" + fi + fi + done +} + +check_websites() { + log "Checking websites..." + + local websites=( + "wiki.ai-impress.com" + "n8n.ai-impress.com" + "odoo.ai-impress.com" + "auth.ai-impress.com" + ) + + for site in "${websites[@]}"; do + local http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "https://$site" 2>/dev/null || echo "000") + + if [[ "$http_code" != "200" ]] && [[ ! "$http_code" =~ ^30 ]]; then + add_problem "Website $site is DOWN (HTTP $http_code)" + fi + done +} + +check_disk_space() { + log "Checking disk space..." + + local disk_usage=$(df -h / | awk 'NR==2 {print $5}' | sed 's/%//') + + if [[ "$disk_usage" -gt "$DISK_THRESHOLD" ]]; then + add_problem "Disk usage is CRITICAL: ${disk_usage}% (threshold: ${DISK_THRESHOLD}%)" + elif [[ "$disk_usage" -gt 80 ]]; then + add_warning "Disk usage is high: ${disk_usage}%" + fi + + # Check /mnt/psql-data + if [[ -d /mnt/psql-data ]]; then + local db_disk_usage=$(df -h /mnt/psql-data | awk 'NR==2 {print $5}' | sed 's/%//') + if [[ "$db_disk_usage" -gt "$DISK_THRESHOLD" ]]; then + add_problem "Database disk usage is CRITICAL: ${db_disk_usage}%" + fi + fi +} + +check_memory() { + log "Checking memory usage..." + + local memory_usage=$(free | awk '/Mem:/ {printf "%.0f", ($3/$2)*100}') + + if [[ "$memory_usage" -gt "$MEMORY_THRESHOLD" ]]; then + add_problem "Memory usage is CRITICAL: ${memory_usage}% (threshold: ${MEMORY_THRESHOLD}%)" + elif [[ "$memory_usage" -gt 80 ]]; then + add_warning "Memory usage is high: ${memory_usage}%" + fi +} + +check_unhealthy_containers() { + log "Checking unhealthy containers..." + + local unhealthy_count=$(docker ps --filter "health=unhealthy" -q | wc -l) + + if [[ "$unhealthy_count" -gt "$MAX_UNHEALTHY_CONTAINERS" ]]; then + local containers=$(docker ps --filter "health=unhealthy" --format '{{.Names}}' | tr '\n' ', ') + add_problem "$unhealthy_count containers are UNHEALTHY: $containers" + elif [[ "$unhealthy_count" -gt 0 ]]; then + local containers=$(docker ps --filter "health=unhealthy" --format '{{.Names}}' | tr '\n' ', ') + add_warning "$unhealthy_count container(s) unhealthy: $containers" + fi +} + +check_r2_usage() { + log "Checking Cloudflare R2 backup storage..." + + # Load Restic environment + if [[ ! -f /opt/05-backups/restic/.env ]]; then + add_warning "Restic config not found - skipping R2 check" + return + fi + + source /opt/05-backups/restic/.env + + # Get R2 stats + local r2_stats=$(restic stats --mode restore-size 2>/dev/null | grep "Total Size") + + if [[ -z "$r2_stats" ]]; then + add_warning "Unable to get R2 statistics" + return + fi + + local size_gb=$(echo "$r2_stats" | grep -oP '\d+\.\d+' | head -1) + local r2_limit=10 # Cloudflare R2 free tier limit: 10 GB + + # Check if bc is available for floating point comparison + if command -v bc &> /dev/null; then + if (( $(echo "$size_gb > $r2_limit" | bc -l) )); then + add_problem "R2 storage EXCEEDED: ${size_gb}GB / ${r2_limit}GB limit" + elif (( $(echo "$size_gb > 8" | bc -l) )); then + add_warning "R2 storage high: ${size_gb}GB / ${r2_limit}GB (>80%)" + else + log "✅ R2 storage OK: ${size_gb}GB / ${r2_limit}GB" + fi + else + # Fallback: use integer comparison if bc not available + local size_gb_int=$(echo "$size_gb" | cut -d. -f1) + if [[ "$size_gb_int" -gt "$r2_limit" ]]; then + add_problem "R2 storage EXCEEDED: ${size_gb}GB / ${r2_limit}GB limit" + elif [[ "$size_gb_int" -gt 8 ]]; then + add_warning "R2 storage high: ${size_gb}GB / ${r2_limit}GB (>80%)" + else + log "✅ R2 storage OK: ${size_gb}GB / ${r2_limit}GB" + fi + fi + + # Check snapshot count + local snapshot_count=$(restic snapshots --compact 2>/dev/null | grep -c "^[a-f0-9]" || echo "0") + log "📦 R2 snapshots: $snapshot_count (policy: keep 3 daily + 1 weekly)" + + if [[ $snapshot_count -gt 5 ]]; then + add_warning "Too many R2 snapshots: $snapshot_count (expected ≤4)" + fi +} + +check_backup_status() { + log "Checking backup status..." + + if [[ ! -d /mnt/backups ]]; then + add_problem "Backup directory /mnt/backups NOT FOUND" + return + fi + + local latest_backup=$(find /mnt/backups -type f -name "*.tar.gz" -o -name "*.sql.gz" 2>/dev/null | sort | tail -1) + + if [[ -z "$latest_backup" ]]; then + add_problem "NO BACKUPS FOUND in /mnt/backups" + else + local backup_age_days=$(( ($(date +%s) - $(stat -c %Y "$latest_backup")) / 86400 )) + + if [[ "$backup_age_days" -gt 2 ]]; then + add_problem "Latest backup is $backup_age_days days old (last: $(basename "$latest_backup"))" + elif [[ "$backup_age_days" -gt 1 ]]; then + add_warning "Latest backup is $backup_age_days days old" + fi + fi +} + +check_container_restarts() { + log "Checking for excessive container restarts..." + + while read -r container restart_count; do + if [[ "$restart_count" -gt 10 ]]; then + add_problem "Container $container has restarted $restart_count times" + elif [[ "$restart_count" -gt 5 ]]; then + add_warning "Container $container has restarted $restart_count times" + fi + done < <(docker ps --format '{{.Names}}' | xargs -I {} sh -c 'echo {} $(docker inspect --format="{{.RestartCount}}" {})') +} + +################################################################################ +# ALERT SENDING +################################################################################ + +send_slack_alert() { + local message="$1" + + if [[ -z "$SLACK_WEBHOOK_URL" ]]; then + log "Slack webhook not configured, skipping..." + return + fi + + local payload=$(cat </dev/null || log "Failed to send Slack alert" +} + +send_email_alert() { + local subject="$1" + local body="$2" + + # Try to send via mail command if available + if command -v mail &> /dev/null; then + echo "$body" | mail -s "$subject" "$ALERT_EMAIL" 2>/dev/null || log "Failed to send email" + elif command -v sendmail &> /dev/null; then + echo -e "Subject: $subject\nTo: $ALERT_EMAIL\n\n$body" | sendmail "$ALERT_EMAIL" 2>/dev/null || log "Failed to send email" + else + log "No mail command available, saving email to file" + echo -e "To: $ALERT_EMAIL\nSubject: $subject\n\n$body" > "/tmp/alert-email-$(date +%s).txt" + fi +} + +generate_alert_report() { + local report="🚨 AI-Impress Server Health Alert\n\n" + report+="Server: ai-impress-prod (51.89.231.46)\n" + report+="Time: $(date '+%Y-%m-%d %H:%M:%S')\n\n" + + if [[ ${#PROBLEMS[@]} -gt 0 ]]; then + report+="🔴 CRITICAL PROBLEMS (${#PROBLEMS[@]}):\n" + for problem in "${PROBLEMS[@]}"; do + report+=" - $problem\n" + done + report+="\n" + fi + + if [[ ${#WARNINGS[@]} -gt 0 ]]; then + report+="🟡 WARNINGS (${#WARNINGS[@]}):\n" + for warning in "${WARNINGS[@]}"; do + report+=" - $warning\n" + done + report+="\n" + fi + + report+="━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" + report+="🔧 HOW TO FIX:\n\n" + report+="1. SSH to server:\n" + report+=" ssh ubuntu@51.89.231.46\n\n" + report+="2. Check full status:\n" + report+=" /opt/05-backups/scripts/admin.sh status\n\n" + report+="3. View detailed logs:\n" + report+=" docker logs --tail 100\n\n" + report+="4. Restart service if needed:\n" + report+=" docker restart \n\n" + report+="5. Check disk space:\n" + report+=" df -h\n\n" + report+="━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" + report+="📊 Quick System Status:\n" + report+=" Memory: $(free -h | awk '/^Mem:/ {print $3 "/" $2}')\n" + report+=" Disk: $(df -h / | awk 'NR==2 {print $3 "/" $2 " (" $5 ")"}')\n" + report+=" Containers: $(docker ps -q | wc -l) running\n" + report+=" Uptime: $(uptime -p)\n\n" + report+="━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" + report+="Generated by: /opt/05-backups/scripts/health-check-alerting.sh\n" + + echo -e "$report" +} + +################################################################################ +# MAIN +################################################################################ + +main() { + log "╔══════════════════════════════════════════════════════╗" + log "║ AI-Impress Health Check & Alerting System ║" + log "║ $(date '+%Y-%m-%d %H:%M:%S') ║" + log "╚══════════════════════════════════════════════════════╝" + log "" + + # Run all checks + check_critical_services + check_websites + check_disk_space + check_memory + check_r2_usage + check_unhealthy_containers + check_backup_status + check_container_restarts + + log "" + log "Summary: ${#PROBLEMS[@]} problems, ${#WARNINGS[@]} warnings" + + # Send alerts if there are problems + if [[ ${#PROBLEMS[@]} -gt 0 ]]; then + log "🚨 CRITICAL PROBLEMS DETECTED - Sending alerts..." + + local alert_report=$(generate_alert_report) + + # Send to Slack + send_slack_alert "$alert_report" + + # Send via Email + send_email_alert "🚨 AI-Impress Server Alert - ${#PROBLEMS[@]} Critical Problems" "$alert_report" + + echo -e "\n${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" + echo -e "${RED}⚠️ CRITICAL PROBLEMS DETECTED!${NC}" + echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" + for problem in "${PROBLEMS[@]}"; do + echo -e "${RED} • $problem${NC}" + done + echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n" + + exit 1 + elif [[ ${#WARNINGS[@]} -gt 0 ]]; then + log "⚠️ Warnings detected (no critical problems)" + + echo -e "\n${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" + echo -e "${YELLOW}⚠️ WARNINGS DETECTED${NC}" + echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" + for warning in "${WARNINGS[@]}"; do + echo -e "${YELLOW} • $warning${NC}" + done + echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n" + + exit 0 + else + log "✅ All checks passed - System is healthy" + echo -e "\n${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" + echo -e "${GREEN}✅ All checks passed - System is healthy${NC}" + echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n" + + exit 0 + fi +} + +# Run if executed directly +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/opt/infrastructure-docs/scripts/modules/generate-databases.sh b/opt/infrastructure-docs/scripts/modules/generate-databases.sh index 6556c5f..31ba41d 100755 --- a/opt/infrastructure-docs/scripts/modules/generate-databases.sh +++ b/opt/infrastructure-docs/scripts/modules/generate-databases.sh @@ -108,7 +108,7 @@ docker exec postgres-main psql -U aimpress_admin -c \ # 3. Save to Vault export VAULT_ADDR="http://127.0.0.1:8200" -export VAULT_TOKEN="hvs.jYguDdf2IzobXG8b9QWyATV8" +export VAULT_TOKEN=$(cat /opt/00-infrastructure/vault/.vault-token) vault kv put aimpress/postgres/ password="$NEW_PASS" # 4. Update application config diff --git a/opt/infrastructure-docs/scripts/modules/generate-summary.sh b/opt/infrastructure-docs/scripts/modules/generate-summary.sh index f1a839c..237b30f 100755 --- a/opt/infrastructure-docs/scripts/modules/generate-summary.sh +++ b/opt/infrastructure-docs/scripts/modules/generate-summary.sh @@ -64,5 +64,6 @@ for service in traefik postgres-main redis-main vault; do fi done -cat << 'EOF' - +echo "" +echo "---" +echo "" diff --git a/opt/infrastructure-docs/scripts/modules/generate-websites.sh b/opt/infrastructure-docs/scripts/modules/generate-websites.sh index d8fc397..1f1f4af 100755 --- a/opt/infrastructure-docs/scripts/modules/generate-websites.sh +++ b/opt/infrastructure-docs/scripts/modules/generate-websites.sh @@ -105,7 +105,7 @@ ssh ubuntu@51.89.231.46 # Set Vault variables export VAULT_ADDR="http://127.0.0.1:8200" -export VAULT_TOKEN="hvs.jYguDdf2IzobXG8b9QWyATV8" +export VAULT_TOKEN=$(cat /opt/00-infrastructure/vault/.vault-token) # List all available secrets vault kv list aimpress/ diff --git a/opt/infrastructure-docs/scripts/server-full-report.sh b/opt/infrastructure-docs/scripts/server-full-report.sh index 84398d0..50374d2 100755 --- a/opt/infrastructure-docs/scripts/server-full-report.sh +++ b/opt/infrastructure-docs/scripts/server-full-report.sh @@ -26,6 +26,126 @@ log() { echo -e "${CYAN}[$(date +%H:%M:%S)]${NC} $1"; } success() { echo -e "${GREEN}✅ $1${NC}"; } error() { echo -e "${RED}❌ $1${NC}"; exit 1; } +# Slack notification function +send_slack_summary() { + local report_file="$1" + + # Get Vault token and Slack webhook + export VAULT_ADDR="http://127.0.0.1:8200" + export VAULT_TOKEN=$(cat /opt/00-infrastructure/vault/.vault-token 2>/dev/null || echo "") + local slack_webhook=$(vault kv get -field=slack_webhook secret/monitoring 2>/dev/null || echo "") + + if [[ -z "$slack_webhook" ]]; then + log "Slack webhook not configured, skipping notification" + return 0 + fi + + # Extract key metrics from report + local containers_running=$(grep "Docker Containers" "$report_file" | grep -oP '\d+(?= running)' | head -1) + local containers_total=$(grep "Docker Containers" "$report_file" | grep -oP 'running / \d+' | grep -oP '\d+' | head -1) + local unhealthy=$(grep "Unhealthy Containers" "$report_file" | grep -oP '\d+' | head -1) + local memory=$(grep "Memory" "$report_file" | grep -oP '\| \*\*Memory\*\* \| \K[^|]+' | xargs) + local disk=$(grep "Disk (/)" "$report_file" | grep -oP '\| \*\*Disk \(/\)\*\* \| \K[^|]+' | xargs) + local disk_percent=$(echo "$disk" | grep -oP '\d+(?=\%)') + local uptime=$(grep "Uptime" "$report_file" | grep -oP '\| \*\*Uptime\*\* \| \K[^|]+' | xargs) + + # Count websites status + local websites_ok=$(grep -c "✅ OK" "$report_file" 2>/dev/null || echo "0") + local websites_down=$(grep -c "❌" "$report_file" 2>/dev/null || echo "0") + + # Detect problems and create recommendations + local problems="" + local recommendations="" + local color="good" + local status_emoji="✅" + + if [[ "$unhealthy" -gt 0 ]]; then + problems="${problems}• $unhealthy unhealthy container(s) detected\n" + recommendations="${recommendations}• Check logs: \`docker logs \`\n• Restart if needed: \`docker restart \`\n" + color="danger" + status_emoji="🚨" + fi + + if [[ "$websites_down" -gt 0 ]]; then + problems="${problems}• $websites_down website(s) are down\n" + recommendations="${recommendations}• Check Traefik: \`docker logs traefik --tail 50\`\n• Verify DNS: \`nslookup \`\n• Check SSL certs: \`/opt/05-backups/scripts/admin.sh status websites\`\n" + if [[ "$color" != "danger" ]]; then + color="warning" + status_emoji="⚠️" + fi + fi + + if [[ -n "$disk_percent" ]] && [[ "$disk_percent" -gt 80 ]]; then + problems="${problems}• Disk usage is high: ${disk_percent}%\n" + recommendations="${recommendations}• Clean up old logs: \`/opt/05-backups/scripts/admin.sh cleanup logs\`\n• Clean up Docker: \`/opt/05-backups/scripts/admin.sh cleanup docker\`\n• Check disk: \`/opt/05-backups/scripts/admin.sh status disk\`\n" + if [[ "$color" == "good" ]]; then + color="warning" + status_emoji="⚠️" + fi + fi + + # Create fields array + local fields='[ + { + "title": "System Status", + "value": "🐳 Containers: '"$containers_running/$containers_total"' running\n💾 Memory: '"$memory"'\n💿 Disk: '"$disk"'\n⏱️ Uptime: '"$uptime"'", + "short": true + }, + { + "title": "Health Check", + "value": "🔴 Unhealthy: '"$unhealthy"' containers\n🌐 Websites: '"$websites_ok"' OK, '"$websites_down"' Down", + "short": true + }' + + # Add problems section if any + if [[ -n "$problems" ]]; then + fields+=', + { + "title": "⚠️ Detected Problems", + "value": "'"${problems}"'", + "short": false + }, + { + "title": "🔧 Recommended Actions", + "value": "'"${recommendations}"'SSH: \`ssh ubuntu@51.89.231.46\`\nAdmin tool: \`/opt/05-backups/scripts/admin.sh help\`", + "short": false + }' + fi + + fields+=', + { + "title": "Full Report", + "value": "📄 Generated: \`'"$(basename $report_file)"'\`\n📍 Location: \`/opt/infrastructure-docs/reports/\`\n📤 Upload to Wiki: \`/opt/05-backups/scripts/upload-to-outline.sh latest-report\`", + "short": false + } + ]' + + # Create Slack message + local payload='{ + "attachments": [ + { + "color": "'"$color"'", + "title": "'"$status_emoji"' Daily Server Report - '"$(date '+%Y-%m-%d %H:%M')"'", + "fields": '"$fields"', + "footer": "AI-Impress Infrastructure Monitor", + "footer_icon": "https://wiki.ai-impress.com/favicon.png", + "ts": '"$(date +%s)"' + } + ] + }' + + # Send to Slack + local response=$(curl -s -X POST "$slack_webhook" \ + -H 'Content-Type: application/json' \ + -d "$payload") + + if [[ "$response" == "ok" ]]; then + success "Slack summary sent with $(echo -e "$problems" | grep -c "•" || echo "0") problems detected" + else + log "Slack notification may have failed: $response" + fi +} + log "╔════════════════════════════════════════════════════════════╗" log "║ AI-Impress Complete Server Report Generator v5.0 ║" log "║ Modular Architecture - Full System Report ║" @@ -174,4 +294,9 @@ cat << EOFSTATS EOFSTATS +# Send Slack summary +log "" +log "Sending Slack summary..." +send_slack_summary "$REPORT_FILE" + exit 0