#!/bin/bash ################################################################################ # AI-Impress Enhanced Full Backup System # Version: 2.2.0 # Purpose: Auto-discover and backup all system components # Features: # - Auto-discovery of docker-compose projects # - Automatic database detection (PostgreSQL, MariaDB/MySQL, MongoDB) # - Incremental backups with Restic # - Local backup on /mnt/backups # - Slack & Email notifications # - 100% disaster recovery coverage # Author: AI-Impress Admin System # Date: 2025-11-13 # Changelog v2.2.0: # - CRITICAL: Fixed Docker volume names for accurate backups # - Added ALL critical volumes: n8n, Odoo, Authentik-postgres, Outline, WikiJS # - Added Grafana dashboards to application data backup # - Added cleanup_old_local_backups() function (7-day retention) # - Changed R2 retention from 1 day to 3 days for safety # - Fixed Supabase storage path # - 100% infrastructure recovery now possible # Changelog v2.1.0: # - Added auto-discovery for all database types # - Improved database detection with image inspection # - Added MongoDB backup support # - Better error handling ################################################################################ set -e # ============================================ # CONFIGURATION # ============================================ RESTIC_ENV="/opt/05-backups/restic/.env" BACKUP_BASE="/mnt/backups" LOCAL_BACKUP_DIR="$BACKUP_BASE/local-backups" REPORTS_DIR="/opt/05-backups/reports" LOG_DIR="/opt/05-backups/logs" LOG_FILE="$LOG_DIR/backup-$(date +%Y%m%d-%H%M%S).log" BACKUP_REPORT="$REPORTS_DIR/backup-report-$(date +%Y%m%d-%H%M%S).json" # Dual-repo configuration RESTIC_REPO_HOT="s3:https://6aff840a680098927b58beb93b59dd03.r2.cloudflarestorage.com/aimpress-backups" # R2 for quick recovery RESTIC_REPO_COLD="rclone:gdrive:ai-impress-backups" # Google Drive for long-term storage SLACK_WEBHOOK="${SLACK_WEBHOOK_URL:-}" EMAIL_TO="admin@ai-impress.com" # Load Restic credentials if [[ -f "$RESTIC_ENV" ]]; then source "$RESTIC_ENV" else echo "ERROR: Restic .env not found at $RESTIC_ENV" exit 1 fi # Export Vault token export VAULT_TOKEN="${VAULT_TOKEN:-$(cat /opt/00-infrastructure/vault/.vault-token 2>/dev/null)}" # Create directories mkdir -p "$BACKUP_BASE" "$LOCAL_BACKUP_DIR" "$REPORTS_DIR" "$LOG_DIR" # Redirect output to log exec 1> >(tee -a "$LOG_FILE") exec 2>&1 # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # ============================================ # HELPER FUNCTIONS # ============================================ log() { echo "[$(date +%Y-%m-%d\ %H:%M:%S)] $1" } success() { echo -e "${GREEN}✅ $1${NC}" } warning() { echo -e "${YELLOW}⚠️ $1${NC}" } error() { echo -e "${RED}❌ $1${NC}" } send_slack() { local title=$1 local message=$2 local color=${3:-good} if [[ -z "$SLACK_WEBHOOK" ]]; then return fi curl -X POST "$SLACK_WEBHOOK" \ -H 'Content-Type: application/json' \ -d "{ \"attachments\": [{ \"color\": \"$color\", \"title\": \"$title\", \"text\": \"$message\", \"footer\": \"AI-Impress Backup System\", \"ts\": $(date +%s) }] }" 2>/dev/null || true } send_email() { local subject=$1 local body=$2 echo "$body" | mail -s "$subject" "$EMAIL_TO" 2>/dev/null || true } # Initialize backup report init_report() { cat > "$BACKUP_REPORT" << EOF { "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", "hostname": "$(hostname)", "backup_status": "IN_PROGRESS", "components": {}, "summary": { "total_components": 0, "successful": 0, "failed": 0, "total_size": "0GB", "duration": "calculating" }, "alerts": [] } EOF } ################################################################################ # AUTO-DISCOVERY FUNCTIONS ################################################################################ discover_docker_compose_projects() { log "=== Auto-discovering Docker Compose Projects ===" local projects=() # Scan /opt for docker-compose.yml files while IFS= read -r compose_file; do local project_path=$(dirname "$compose_file") local project_name=$(basename "$project_path") projects+=("$project_path") log "Found: $project_path" done < <(find /opt -maxdepth 4 -name "docker-compose.yml" -type f 2>/dev/null) echo "${projects[@]}" } discover_databases() { local databases=() # Auto-detect PostgreSQL containers (common image names and postgres in container name) while IFS= read -r container; do if [[ -n "$container" ]] && docker inspect "$container" --format '{{.Config.Image}}' 2>/dev/null | grep -qiE '(postgres|postgresql|timescale|postgis)'; then databases+=("postgresql:$container") fi done < <(docker ps --format '{{.Names}}' 2>/dev/null | grep -iE '(postgres|pg|timescale|supabase-db|postiz-postgres|authentik-postgres)') # Auto-detect MariaDB/MySQL containers while IFS= read -r container; do if [[ -n "$container" ]] && docker inspect "$container" --format '{{.Config.Image}}' 2>/dev/null | grep -qiE '(mariadb|mysql)'; then databases+=("mariadb:$container") fi done < <(docker ps --format '{{.Names}}' 2>/dev/null | grep -iE '(mariadb|mysql|mautic-db)') # Auto-detect MongoDB containers while IFS= read -r container; do if [[ -n "$container" ]] && docker inspect "$container" --format '{{.Config.Image}}' 2>/dev/null | grep -qiE 'mongo'; then databases+=("mongodb:$container") fi done < <(docker ps --format '{{.Names}}' 2>/dev/null | grep -iE 'mongo') echo "${databases[@]}" } discover_volumes() { log "=== Auto-discovering Docker Volumes ===" local volumes=() # Get all used volumes while IFS= read -r volume; do volumes+=("$volume") log "Found volume: $volume" done < <(docker volume ls --format "{{.Name}}" 2>/dev/null | grep -v "^$") echo "${volumes[@]}" } ################################################################################ # BACKUP FUNCTIONS ################################################################################ backup_postgresql() { local container=$1 local db_user=${2:-aimpress_admin} local backup_file="$LOCAL_BACKUP_DIR/postgresql-$container-$(date +%Y%m%d-%H%M%S).sql.gz" log "[DB] Backing up PostgreSQL: $container" if ! docker ps --filter "name=$container" -q &>/dev/null; then warning "PostgreSQL container $container not running" return 1 fi if docker exec "$container" pg_dumpall -U $db_user 2>/dev/null | gzip > "$backup_file"; then local size=$(du -h "$backup_file" | cut -f1) success "PostgreSQL $container backed up ($size)" # Keep only last 14 days find "$LOCAL_BACKUP_DIR" -name "postgresql-$container-*.sql.gz" -mtime +14 -delete return 0 else error "Failed to backup PostgreSQL: $container" return 1 fi } backup_mariadb() { local container=$1 local backup_file="$LOCAL_BACKUP_DIR/mariadb-$container-$(date +%Y%m%d-%H%M%S).sql.gz" log "[DB] Backing up MariaDB: $container" if ! docker ps --filter "name=$container" -q &>/dev/null; then warning "MariaDB container $container not running" return 1 fi if docker exec "$container" mariadb-dump --all-databases 2>/dev/null | gzip > "$backup_file"; then local size=$(du -h "$backup_file" | cut -f1) success "MariaDB $container backed up ($size)" # Keep only last 14 days find "$LOCAL_BACKUP_DIR" -name "mariadb-$container-*.sql.gz" -mtime +14 -delete return 0 else error "Failed to backup MariaDB: $container" return 1 fi } backup_mongodb() { local container=$1 local backup_file="$LOCAL_BACKUP_DIR/mongodb-$container-$(date +%Y%m%d-%H%M%S).gz" log "[DB] Backing up MongoDB: $container" if ! docker ps --filter "name=$container" -q &>/dev/null; then warning "MongoDB container $container not running" return 1 fi if docker exec "$container" mongodump --archive 2>/dev/null | gzip > "$backup_file"; then local size=$(du -h "$backup_file" | cut -f1) success "MongoDB $container backed up ($size)" # Keep only last 14 days find "$LOCAL_BACKUP_DIR" -name "mongodb-$container-*.gz" -mtime +14 -delete return 0 else error "Failed to backup MongoDB: $container" return 1 fi } backup_vault() { log "[CONFIG] Backing up Vault..." local backup_file="$LOCAL_BACKUP_DIR/vault-data-$(date +%Y%m%d-%H%M%S).tar.gz" if sudo tar czf "$backup_file" -C /opt/00-infrastructure/vault data 2>/dev/null; then local size=$(du -h "$backup_file" | cut -f1) success "Vault data backed up ($size)" # Keep only last 30 days find "$LOCAL_BACKUP_DIR" -name "vault-data-*.tar.gz" -mtime +30 -delete return 0 else error "Failed to backup Vault" return 1 fi } backup_docker_configs() { log "[CONFIG] Backing up Docker Compose files..." local backup_file="$LOCAL_BACKUP_DIR/docker-configs-$(date +%Y%m%d-%H%M%S).tar.gz" # Find all docker-compose.yml files and create archive if find /opt -maxdepth 4 -name "docker-compose.yml" -o -name ".env" 2>/dev/null | tar czf "$backup_file" -T - 2>/dev/null; then local size=$(du -h "$backup_file" | cut -f1) success "Docker configs backed up ($size)" # Keep only last 30 days find "$LOCAL_BACKUP_DIR" -name "docker-configs-*.tar.gz" -mtime +30 -delete return 0 else error "Failed to backup Docker configs" return 1 fi } backup_docker_volume() { local volume_name=$1 local service_name=$2 local backup_file="$LOCAL_BACKUP_DIR/${service_name}-volume-$(date +%Y%m%d-%H%M%S).tar.gz" log "[VOLUME] Backing up Docker volume: $volume_name" # Get volume mount point local volume_path=$(docker volume inspect "$volume_name" --format '{{.Mountpoint}}' 2>/dev/null) if [[ -z "$volume_path" ]] || [[ ! -d "$volume_path" ]]; then warning "Volume $volume_name not found or not accessible" return 1 fi if sudo tar czf "$backup_file" -C "$(dirname "$volume_path")" "$(basename "$volume_path")" 2>/dev/null; then local size=$(du -h "$backup_file" | cut -f1) success "Volume $volume_name backed up ($size)" # Keep only last 7 days for volumes find "$LOCAL_BACKUP_DIR" -name "${service_name}-volume-*.tar.gz" -mtime +7 -delete return 0 else error "Failed to backup volume: $volume_name" return 1 fi } backup_application_data() { log "[DATA] Backing up Application Data..." local backup_file="$LOCAL_BACKUP_DIR/app-data-$(date +%Y%m%d-%H%M%S).tar.gz" # Build array of existing directories only local data_dirs=() local candidate_dirs=( "/opt/03-business/mautic/sync_v2" "/opt/02-core/supabase/supabase/docker/volumes/storage" "/opt/02-core/presonton/app_data" "/opt/01-security/authentik/media" "/opt/02-core/evolution-api/evolution_instances" "/opt/03-business/documenso/data" "/opt/01-security/vaultwarden/data" "/opt/04-tools/monitoring/grafana" ) for dir in "${candidate_dirs[@]}"; do if [[ -d "$dir" ]]; then data_dirs+=("$dir") log " Adding: $dir" else warning " Skipping (not found): $dir" fi done if [[ ${#data_dirs[@]} -eq 0 ]]; then warning "No application data directories found to backup" return 1 fi if tar czf "$backup_file" "${data_dirs[@]}" 2>/dev/null; then local size=$(du -h "$backup_file" | cut -f1) success "Application data backed up ($size)" # Keep only last 14 days find "$LOCAL_BACKUP_DIR" -name "app-data-*.tar.gz" -mtime +14 -delete return 0 else error "Failed to backup application data" return 1 fi } cleanup_old_local_backups() { log "[CLEANUP] Cleaning up old local backups (keeping 7 days)..." local deleted_count=0 # Clean up old database dumps (older than 7 days) while IFS= read -r file; do rm -f "$file" ((++deleted_count)) done < <(find "$LOCAL_BACKUP_DIR" -name "*.sql.gz" -mtime +7 2>/dev/null) # Clean up old tar archives (older than 7 days) while IFS= read -r file; do rm -f "$file" ((++deleted_count)) done < <(find "$LOCAL_BACKUP_DIR" -name "*.tar.gz" -mtime +7 2>/dev/null) # Clean up old MongoDB backups (older than 7 days) while IFS= read -r file; do rm -f "$file" ((++deleted_count)) done < <(find "$LOCAL_BACKUP_DIR" -name "mongodb-*.gz" -mtime +7 2>/dev/null) if [[ $deleted_count -gt 0 ]]; then success "Cleaned up $deleted_count old backup files" else log "No old backup files to clean up" fi return 0 } backup_with_restic() { log "=== Dual-Repo Cloud Backup Strategy ===" log " - HOT Storage (R2): Last 3 days for quick recovery" log " - COLD Storage (Google Drive): 7 days + 4 weeks + 3 months" if ! command -v restic &>/dev/null; then warning "Restic not installed, skipping cloud backup" return 1 fi # Create default exclude file if it doesn't exist local exclude_file="$BACKUP_BASE/.restic-exclude" if [[ ! -f "$exclude_file" ]]; then cat > "$exclude_file" << 'EOF' *.tmp *.log lost+found/ .DS_Store EOF fi local backup_success=0 # ===== HOT STORAGE: Cloudflare R2 ===== log "" log "[HOT] Backing up to Cloudflare R2 (fast recovery)..." local restic_output_hot=$(restic -r "$RESTIC_REPO_HOT" backup "$BACKUP_BASE" --exclude-file="$exclude_file" 2>&1) local restic_exit_hot=$? if [[ $restic_exit_hot -eq 0 ]] && echo "$restic_output_hot" | grep -q "snapshot.*saved"; then local snapshot_id_hot=$(echo "$restic_output_hot" | grep "snapshot" | grep "saved" | awk '{print $2}') success "R2 backup completed (snapshot: $snapshot_id_hot)" # Cleanup for R2: keep last 3 days for safety log "[HOT] Cleaning up R2 (keeping last 3 days)..." restic -r "$RESTIC_REPO_HOT" forget --keep-daily 3 --prune 2>/dev/null || warning "R2 cleanup failed" ((++backup_success)) else error "R2 backup failed (exit code: $restic_exit_hot)" echo "$restic_output_hot" | tail -10 fi # ===== COLD STORAGE: Google Drive ===== log "" log "[COLD] Backing up to Google Drive (long-term archive)..." local restic_output_cold=$(restic -r "$RESTIC_REPO_COLD" backup "$BACKUP_BASE" --exclude-file="$exclude_file" 2>&1) local restic_exit_cold=$? if [[ $restic_exit_cold -eq 0 ]] && echo "$restic_output_cold" | grep -q "snapshot.*saved"; then local snapshot_id_cold=$(echo "$restic_output_cold" | grep "snapshot" | grep "saved" | awk '{print $2}') success "Google Drive backup completed (snapshot: $snapshot_id_cold)" # Standard retention for Google Drive log "[COLD] Cleaning up Google Drive (7d/4w/3m retention)..." restic -r "$RESTIC_REPO_COLD" forget --keep-daily 7 --keep-weekly 4 --keep-monthly 3 --prune 2>/dev/null || warning "Google Drive cleanup failed" ((++backup_success)) else error "Google Drive backup failed (exit code: $restic_exit_cold)" echo "$restic_output_cold" | tail -10 fi log "" log "Backup Summary:" log " - R2 (hot): $([ $restic_exit_hot -eq 0 ] && echo '✅ Success' || echo '❌ Failed')" log " - Google Drive (cold): $([ $restic_exit_cold -eq 0 ] && echo '✅ Success' || echo '❌ Failed')" # Return success if at least one backup succeeded if [[ $backup_success -gt 0 ]]; then return 0 else return 1 fi } ################################################################################ # MAIN BACKUP EXECUTION ################################################################################ main() { local start_time=$(date +%s) log "╔════════════════════════════════════════════════════════════╗" log "║ AI-Impress Enhanced Full Backup v2.2.0 ║" log "║ $(date +%Y-%m-%d\ %H:%M:%S) ║" log "╚════════════════════════════════════════════════════════════╝" log "" local failed=0 local successful=0 # Auto-discover and backup databases log "=== PHASE 1: Database Backups (Auto-Discovery) ===" # Discover all databases automatically local discovered_dbs=$(discover_databases) if [[ -z "$discovered_dbs" ]]; then warning "No databases discovered" else log "Discovered databases:" for db in $discovered_dbs; do local db_type=$(echo "$db" | cut -d: -f1) local db_name=$(echo "$db" | cut -d: -f2) log " - $db_type: $db_name" done log "" for db in $discovered_dbs; do local db_type=$(echo "$db" | cut -d: -f1) local db_container=$(echo "$db" | cut -d: -f2) case $db_type in postgresql) # Determine DB user based on container name local db_user="aimpress_admin" [[ "$db_container" == "authentik-postgres" ]] && db_user="authentik" [[ "$db_container" == "postiz-postgres" ]] && db_user="postiz" [[ "$db_container" == "supabase-db" ]] && db_user="supabase_admin" if backup_postgresql "$db_container" "$db_user"; then ((++successful)) else ((++failed)) fi ;; mariadb) if backup_mariadb "$db_container"; then ((++successful)) else ((++failed)) fi ;; mongodb) if backup_mongodb "$db_container"; then ((++successful)) else ((++failed)) fi ;; *) warning "Unknown database type: $db_type" ((++failed)) ;; esac done fi log "" log "=== PHASE 2: Configuration Backups ===" if backup_vault; then ((++successful)); else ((++failed)); fi if backup_docker_configs; then ((++successful)); else ((++failed)); fi log "" log "=== PHASE 3: Application Data ===" if backup_application_data; then ((++successful)); else ((++failed)); fi log "" log "=== PHASE 3.5: Critical Docker Volumes ===" # Backup critical volumes that contain client data local critical_volumes=( "authentik_authentik-postgres-data:authentik-postgres" "authentik_authentik-redis-data:authentik-redis" "evolution-api_evolution-data:evolution" "n8n-shared_n8n-data:n8n" "odoo_odoo-data:odoo-data" "odoo_odoo-addons:odoo-addons" "vaultwarden_vaultwarden-data:vaultwarden" "outline_outline-data:outline" "wikijs_data:wikijs" ) for volume_entry in "${critical_volumes[@]}"; do local volume_name=$(echo "$volume_entry" | cut -d: -f1) local service_name=$(echo "$volume_entry" | cut -d: -f2) if docker volume inspect "$volume_name" &>/dev/null; then if backup_docker_volume "$volume_name" "$service_name"; then ((++successful)) else ((++failed)) fi else warning "Volume $volume_name not found, skipping" fi done log "" log "=== PHASE 3.9: Cleanup Old Local Backups ===" if cleanup_old_local_backups; then ((++successful)); else ((++failed)); fi log "" log "=== PHASE 4: Cloud Backup (Restic) ===" if backup_with_restic; then ((++successful)); else ((++failed)); fi # Calculate duration local end_time=$(date +%s) local duration=$((end_time - start_time)) local duration_min=$((duration / 60)) local duration_sec=$((duration % 60)) log "" log "╔════════════════════════════════════════════════════════════╗" log "║ BACKUP COMPLETE ║" log "╚════════════════════════════════════════════════════════════╝" log "" log "Summary:" log " Total Components: $((successful + failed))" log " Successful: $successful" log " Failed: $failed" log " Duration: ${duration_min}m ${duration_sec}s" log " Local Backups: $LOCAL_BACKUP_DIR" log " Cloud Backups: Restic (Cloudflare R2)" log "" # Send notifications if [[ $failed -eq 0 ]]; then success "All backups completed successfully!" send_slack "✅ Backup Complete" "All components backed up successfully in ${duration_min}m ${duration_sec}s" "good" send_email "Backup Complete" "All backups completed successfully.\n\nDuration: ${duration_min}m ${duration_sec}s\nLocation: $BACKUP_BASE" else warning "Backup completed with $failed failures" send_slack "⚠️ Backup Completed with Errors" "Failed components: $failed\nCheck logs: $LOG_FILE" "warning" send_email "Backup Completed with Errors" "Backup completed with $failed failures.\n\nCheck logs: $LOG_FILE" fi } main "$@"