Skip to content

Commit 87f56da

Browse files
committed
feat(alerts): New KubePdbNotEnoughHealthyPods alert
1 parent 5894150 commit 87f56da

File tree

3 files changed

+60
-0
lines changed

3 files changed

+60
-0
lines changed

alerts/apps_alerts.libsonnet

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ local utils = import '../lib/utils.libsonnet';
55
kubeStateMetricsSelector: error 'must provide selector for kube-state-metrics',
66
kubeJobTimeoutDuration: error 'must provide value for kubeJobTimeoutDuration',
77
kubeDaemonSetRolloutStuckFor: '15m',
8+
kubePdbNotEnoughHealthyPodsFor: '15m',
89
namespaceSelector: null,
910
prefixedNamespaceSelector: if self.namespaceSelector != null then self.namespaceSelector + ',' else '',
1011
},
@@ -359,6 +360,28 @@ local utils = import '../lib/utils.libsonnet';
359360
'for': '15m',
360361
alert: 'KubeHpaMaxedOut',
361362
},
363+
{
364+
expr: |||
365+
(
366+
kube_poddisruptionbudget_status_desired_healthy{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
367+
-
368+
kube_poddisruptionbudget_status_current_healthy{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
369+
)
370+
> 0
371+
||| % $._config,
372+
labels: {
373+
severity: 'warning',
374+
},
375+
annotations: {
376+
description: 'PDB %s{{ $labels.namespace }}/{{ $labels.poddisruptionbudget }} expects {{ $value }} more healthy pods. The desired number of healthy pods has not been met for at least %s.' % [
377+
utils.ifShowMultiCluster($._config, '{{ $labels.%(clusterLabel)s }}/' % $._config),
378+
$._config.kubePdbNotEnoughHealthyPodsFor,
379+
],
380+
summary: 'PDB does not have enough healthy pods.',
381+
},
382+
'for': $._config.kubePdbNotEnoughHealthyPodsFor,
383+
alert: 'KubePdbNotEnoughHealthyPods',
384+
},
362385
],
363386
},
364387
],

runbook.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,16 @@ This page collects this repositories alerts and begins the process of describing
119119
+ *Action*: Check the job using `kubectl describe job <job>` and look at the pod logs using `kubectl logs <pod>` for further information.
120120
+ *Runbook*: [Link](https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed/)
121121

122+
##### Alert Name: "KubePdbNotEnoughHealthyPods"
123+
+ *Message*: `PDB {{ $labels.namespace }}/{{ $labels.poddisruptionbudget }} expects {{ $value }} more healthy pods. The desired number of healthy pods has not been met for at least 15m.`
124+
+ *Severity*: warning
125+
+ *Action*: Check the status of the PDB using
126+
`kubectl get poddisruptionbudgets <pdb> -o yaml` and
127+
compare `status.currentHealthy` with `status.desiredHealthy`.
128+
Check the Kubernetes documentation for more information about
129+
[pod distruptions](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/).
130+
+ *Runbook*: [Link](https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepdbnotenoughhealthypods/)
131+
122132
### Group Name: "kubernetes-resources"
123133

124134
##### Alert Name: "KubeCPUOvercommit"

tests/apps_alerts-test.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
rule_files:
2+
- ../prometheus_alerts.yaml
3+
4+
tests:
5+
- interval: 1m
6+
name: KubePdbNotEnoughHealthyPods fires when current healthly pods are less than desired
7+
input_series:
8+
- series: 'kube_poddisruptionbudget_status_desired_healthy{cluster="cluster1", namespace="ns1", poddisruptionbudget="pdb1", job="kube-state-metrics"}'
9+
values: '4x15'
10+
- series: 'kube_poddisruptionbudget_status_current_healthy{cluster="cluster1", namespace="ns1", poddisruptionbudget="pdb1", job="kube-state-metrics"}'
11+
values: '3x15'
12+
alert_rule_test:
13+
- eval_time: 14m
14+
alertname: KubePdbNotEnoughHealthyPods
15+
- eval_time: 15m
16+
alertname: KubePdbNotEnoughHealthyPods
17+
exp_alerts:
18+
- exp_labels:
19+
severity: "warning"
20+
cluster: "cluster1"
21+
namespace: "ns1"
22+
poddisruptionbudget: "pdb1"
23+
job: "kube-state-metrics"
24+
exp_annotations:
25+
description: "PDB ns1/pdb1 expects 1 more healthy pods. The desired number of healthy pods has not been met for at least 15m."
26+
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepdbnotenoughhealthypods"
27+
summary: "PDB does not have enough healthy pods."

0 commit comments

Comments
 (0)