Generating Prometheus alerts to webhook - allanrogerr/public GitHub Wiki

Generate template

mc admin prometheus generate acme bucket

Create prometheus config

cat << EOF > prometheus.yml
global:
   scrape_interval: 15s
scrape_configs:
- job_name: minio-bucket-job
  bearer_token: 1234abcd
  metrics_path: /minio/v2/metrics/bucket
  scheme: https
  static_configs:
  - targets: ['acme-0-0.lab.min.dev:9000']
alerting:
  alertmanagers:
  - static_configs:
    - targets: ['acme-client.minio.training:10093']
rule_files:
  - alert_rules.yml
EOF

Create alert

alert_rules.yml
---
cat << EOF > alert_rules.yml
groups:
- name: example
  rules:
  - alert: MinIODataFlowStoppedInBucketFiles
    expr: (  rate(minio_bucket_requests_total{job="minio-bucket-job",bucket="files",api="putobject"}[60s]) OR on() vector(0) +   rate(minio_bucket_requests_total{job="minio-bucket-job",bucket="files",api="putobjectpart"}[60s]) OR on() vector(0)) - (  rate(minio_bucket_requests_4xx_errors_total{job="minio-bucket-job",bucket="files",api="putobject"}[60s]) OR on() vector(0) +   rate(minio_bucket_requests_4xx_errors_total{job="minio-bucket-job",bucket="files",api="putobjectpart"}[60s]) OR on() vector(0) +  rate(minio_bucket_requests_5xx_errors_total{job="minio-bucket-job",bucket="files",api="putobject"}[60s]) OR on() vector(0) +   rate(minio_bucket_requests_5xx_errors_total{job="minio-bucket-job",bucket="files",api="putobjectpart"}[60s]) OR on() vector(0) +  rate(minio_bucket_requests_inflight_total{job="minio-bucket-job",bucket="files",api="putobject"}[60s]) OR on() vector(0) +   rate(minio_bucket_requests_inflight_total{job="minio-bucket-job",bucket="files",api="putobjectpart"}[60s]) OR on() vector(0) +  rate(minio_bucket_requests_canceled_total{job="minio-bucket-job",bucket="files",api="putobject"}[60s]) OR on() vector(0) +   rate(minio_bucket_requests_canceled_total{job="minio-bucket-job",bucket="files",api="putobjectpart"}[60s]) OR on() vector(0)) OR on() vector(0) <= 0
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "Data flow stopped for more than 1m for bucket 'files'"
      description: "Data flow has stopped for more than 1m for 'files'"
EOF

Configure alert

cat << EOF > config_alertmanager.yml
# The root route on which each incoming alert enters.
route:
  # The root route must not have any matchers as it is the entry point for
  # all alerts. It needs to have a receiver configured so alerts that do not
  # match any of the sub-routes are sent to someone.
  receiver: 'team-X-webhooks'
  group_wait: 10s
  group_interval: 1m
  routes:
  - receiver: team-X-webhooks
    repeat_interval: 1m
    continue: true
receivers:
- name: 'team-X-webhooks'
  webhook_configs:
  - url: https://webhook.site/57069658-86a1-4e6d-aa11-eb67c9a06ce3
EOF

Starting containers

docker ps -a
docker stop prometheus
docker rm prometheus
docker run -itd \
    --name prometheus \
    -p 10091:9090 \
    -v ~/prometheus.yml:/etc/prometheus/prometheus.yml \
    -v ~/alert_rules.yml:/etc/prometheus/alert_rules.yml \
    prom/prometheus &> /dev/null
docker logs prometheus
docker stop grafana
docker rm grafana
docker run -itd \
		--name=grafana \
		-p 10030:3000 \
		grafana/grafana-enterprise:10.3.1-ubuntu &> /dev/null
docker logs grafana
docker stop alertmanager
docker rm alertmanager
docker run -itd \
    --name alertmanager \
    -p 10093:9093 \
    -v ~/config_alertmanager.yml:/etc/alertmanager/config.yml \
    quay.io/prometheus/alertmanager --config.file=/etc/alertmanager/config.yml &> /dev/null
docker logs alertmanager

Observe alerts in Prometheus alerts, and on webhook endpoint

http://acme-client.minio.training:10091/
image
⚠️ **GitHub.com Fallback** ⚠️