Prometheus rules for Telegraf - krreddy123/Prometheus GitHub Wiki
Prometheus rules for Telegraf
-
name: WINDOWS POD
rules:
-
alert: Windows pod CPU usage
expr: (avg(irate(win_proc_Percent_Processor_Time[5m])) by (pod) / 4) * 100 > 80 # where 4 - mumber of CPU core
for: 5m
labels:
severity: critical
annotations:
title: Windows pod CPU usage great then 80%.
description: Windows pod {{ $labels.pod }} CPU usage on node great then 80%.
-
alert: Windows pod RAM usage
expr: (sum( win_proc_Working_Set ) by (pod, namespace) / sum( kube_pod_container_resource_limits_memory_bytes ) by (pod, namespace) ) * 100 > 90
for: 5m
labels:
severity: critical
annotations:
title: Windows pod RAM usage great then 90 % of it's limit lasts 5 minutes
description: "Windows pod {{ $labels.pod }} RAM usage great then 90 % of it's limit lasts 5 minutes."
-
alert: Windows pod IIS thread count
expr: sum(win_proc_Thread_Count{exported_instance=~"w3wp"}) by (host) > 300
for: 5m
labels:
severity: critical
annotations:
title: Windows pod monitoring detected more then 300 threads lasts 5 minutes for application IIS w3svc process
description: Windows pod {{ $labels.pod }} monitoring detected more then 300 threads lasts 5 minutes for IIS w3svc process
-
alert: Windows pod thread count
expr: sum(win_proc_Thread_Count) by (host) > 800
for: 5m
labels:
severity: critical
annotations:
title: Windows pod monitoring detected more then 800 threads lasts 5 minutes
description: Windows pod {{ $labels.pod }} monitoring detected more then 800 threads lasts 5 minutes
-