Prometheus rules for Telegraf - krreddy123/Prometheus GitHub Wiki

Prometheus rules for Telegraf

name: WINDOWS POD

rules:
- alert: Windows pod CPU usage
  
  expr: (avg(irate(win_proc_Percent_Processor_Time[5m])) by (pod) / 4) * 100 > 80 # where 4 - mumber of CPU core
  
  for: 5m
  
  labels:
  
  severity: critical
  
  annotations:
  
  title: Windows pod CPU usage great then 80%.
  
  description: Windows pod {{ $labels.pod }} CPU usage on node great then 80%.
- alert: Windows pod RAM usage
  
  expr: (sum( win_proc_Working_Set ) by (pod, namespace) / sum( kube_pod_container_resource_limits_memory_bytes ) by (pod, namespace) ) * 100 > 90
  
  for: 5m
  
  labels:
  
  severity: critical
  
  annotations:
  
  title: Windows pod RAM usage great then 90 % of it's limit lasts 5 minutes
  
  description: "Windows pod {{ $labels.pod }} RAM usage great then 90 % of it's limit lasts 5 minutes."
- alert: Windows pod IIS thread count
  
  expr: sum(win_proc_Thread_Count{exported_instance=~"w3wp"}) by (host) > 300
  
  for: 5m
  
  labels:
  
  severity: critical
  
  annotations:
  
  title: Windows pod monitoring detected more then 300 threads lasts 5 minutes for application IIS w3svc process
  
  description: Windows pod {{ $labels.pod }} monitoring detected more then 300 threads lasts 5 minutes for IIS w3svc process
- alert: Windows pod thread count
  
  expr: sum(win_proc_Thread_Count) by (host) > 800
  
  for: 5m
  
  labels:
  
  severity: critical
  
  annotations:
  
  title: Windows pod monitoring detected more then 800 threads lasts 5 minutes
  
  description: Windows pod {{ $labels.pod }} monitoring detected more then 800 threads lasts 5 minutes