Generating Prometheus alerts to webhook - allanrogerr/public GitHub Wiki
mc admin prometheus generate acme bucket
cat << EOF > prometheus.yml
global:
scrape_interval: 15s
scrape_configs:
- job_name: minio-bucket-job
bearer_token: 1234abcd
metrics_path: /minio/v2/metrics/bucket
scheme: https
static_configs:
- targets: ['acme-0-0.lab.min.dev:9000']
alerting:
alertmanagers:
- static_configs:
- targets: ['acme-client.minio.training:10093']
rule_files:
- alert_rules.yml
EOF
alert_rules.yml
---
cat << EOF > alert_rules.yml
groups:
- name: example
rules:
- alert: MinIODataFlowStoppedInBucketFiles
expr: ( rate(minio_bucket_requests_total{job="minio-bucket-job",bucket="files",api="putobject"}[60s]) OR on() vector(0) + rate(minio_bucket_requests_total{job="minio-bucket-job",bucket="files",api="putobjectpart"}[60s]) OR on() vector(0)) - ( rate(minio_bucket_requests_4xx_errors_total{job="minio-bucket-job",bucket="files",api="putobject"}[60s]) OR on() vector(0) + rate(minio_bucket_requests_4xx_errors_total{job="minio-bucket-job",bucket="files",api="putobjectpart"}[60s]) OR on() vector(0) + rate(minio_bucket_requests_5xx_errors_total{job="minio-bucket-job",bucket="files",api="putobject"}[60s]) OR on() vector(0) + rate(minio_bucket_requests_5xx_errors_total{job="minio-bucket-job",bucket="files",api="putobjectpart"}[60s]) OR on() vector(0) + rate(minio_bucket_requests_inflight_total{job="minio-bucket-job",bucket="files",api="putobject"}[60s]) OR on() vector(0) + rate(minio_bucket_requests_inflight_total{job="minio-bucket-job",bucket="files",api="putobjectpart"}[60s]) OR on() vector(0) + rate(minio_bucket_requests_canceled_total{job="minio-bucket-job",bucket="files",api="putobject"}[60s]) OR on() vector(0) + rate(minio_bucket_requests_canceled_total{job="minio-bucket-job",bucket="files",api="putobjectpart"}[60s]) OR on() vector(0)) OR on() vector(0) <= 0
for: 1m
labels:
severity: warning
annotations:
summary: "Data flow stopped for more than 1m for bucket 'files'"
description: "Data flow has stopped for more than 1m for 'files'"
EOF
Configure alert
cat << EOF > config_alertmanager.yml
# The root route on which each incoming alert enters.
route:
# The root route must not have any matchers as it is the entry point for
# all alerts. It needs to have a receiver configured so alerts that do not
# match any of the sub-routes are sent to someone.
receiver: 'team-X-webhooks'
group_wait: 10s
group_interval: 1m
routes:
- receiver: team-X-webhooks
repeat_interval: 1m
continue: true
receivers:
- name: 'team-X-webhooks'
webhook_configs:
- url: https://webhook.site/57069658-86a1-4e6d-aa11-eb67c9a06ce3
EOF
docker ps -a
docker stop prometheus
docker rm prometheus
docker run -itd \
--name prometheus \
-p 10091:9090 \
-v ~/prometheus.yml:/etc/prometheus/prometheus.yml \
-v ~/alert_rules.yml:/etc/prometheus/alert_rules.yml \
prom/prometheus &> /dev/null
docker logs prometheus
docker stop grafana
docker rm grafana
docker run -itd \
--name=grafana \
-p 10030:3000 \
grafana/grafana-enterprise:10.3.1-ubuntu &> /dev/null
docker logs grafana
docker stop alertmanager
docker rm alertmanager
docker run -itd \
--name alertmanager \
-p 10093:9093 \
-v ~/config_alertmanager.yml:/etc/alertmanager/config.yml \
quay.io/prometheus/alertmanager --config.file=/etc/alertmanager/config.yml &> /dev/null
docker logs alertmanager
http://acme-client.minio.training:10091/
