Изменения: - Добавлена секция rule_files в prometheus.yml - Расширены правила алертинга с 6 до 18 алертов - Снижены пороги для более раннего обнаружения проблем: * CPU: warning 70% (было 80%), critical 85% (было 90%) * Memory: warning 80% (было 85%), critical 90% * Disk: warning 80%, critical 90% (было 90%) * ServiceDown: 30s (было 1m) - Добавлены новые алерты: * ContainerDown - падение контейнеров * ContainerHighMemory/CPU - перегрузка контейнеров * WebsiteDown/Slow - проблемы с веб-сервисами * SSLCertificateExpiring - истечение SSL сертификатов * PostgreSQLDown/Slow - проблемы с БД Результат: - 3 группы алертов: infrastructure (12), webservices (4), database (3) - Alertmanager настроен на Slack #server-status - Каждый сбой будет детектироваться в течение 30s-3m 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
78 lines
2.2 KiB
YAML
78 lines
2.2 KiB
YAML
global:
|
|
scrape_interval: 30s
|
|
evaluation_interval: 30s
|
|
scrape_timeout: 10s
|
|
|
|
# Оптимизация retention политики
|
|
|
|
rule_files:
|
|
- '/etc/prometheus/alerts/*.yml'
|
|
|
|
alerting:
|
|
alertmanagers:
|
|
- static_configs:
|
|
- targets:
|
|
- alertmanager:9093
|
|
|
|
scrape_configs:
|
|
- job_name: 'prometheus'
|
|
static_configs:
|
|
- targets: ['localhost:9090']
|
|
scrape_interval: 30s
|
|
|
|
- job_name: 'node-exporter'
|
|
static_configs:
|
|
- targets: ['node-exporter:9100']
|
|
scrape_interval: 30s
|
|
|
|
- job_name: 'cadvisor'
|
|
static_configs:
|
|
- targets: ['cadvisor:8080']
|
|
scrape_interval: 30s
|
|
metric_relabel_configs:
|
|
- source_labels: [__name__]
|
|
regex: '(container_tasks_state|container_memory_failures_total)'
|
|
action: drop
|
|
|
|
- job_name: 'alertmanager'
|
|
static_configs:
|
|
- targets: ['alertmanager:9093']
|
|
scrape_interval: 30s
|
|
|
|
- job_name: 'blackbox-ssl'
|
|
metrics_path: /probe
|
|
params:
|
|
module: [http_2xx]
|
|
static_configs:
|
|
- targets:
|
|
- https://ai-impress.com
|
|
- https://auth.ai-impress.com
|
|
- https://marketing.ai-impress.com
|
|
- https://n8n.ai-impress.com
|
|
- https://odoo.ai-impress.com
|
|
- https://pgadmin.ai-impress.com
|
|
- https://portainer.ai-impress.com
|
|
- https://rabbitmq.ai-impress.com
|
|
- https://social.ai-impress.com
|
|
- https://status.ai-impress.com
|
|
- https://supabase.ai-impress.com
|
|
- https://traefik.ai-impress.com
|
|
- https://uploads.ai-impress.com
|
|
- https://vault-admin.ai-impress.com
|
|
- https://vault.ai-impress.com
|
|
- https://webhook.ai-impress.com
|
|
- https://wiki.ai-impress.com
|
|
- https://wpp.ai-impress.com
|
|
- https://www.ai-impress.com
|
|
scrape_interval: 5m
|
|
relabel_configs:
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
- target_label: __address__
|
|
replacement: blackbox-exporter:9115
|
|
- job_name: "postgres-exporter"
|
|
static_configs:
|
|
- targets: ["postgres-exporter:9187"]
|
|
scrape_interval: 30s
|