Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions alerts/apps_alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ local utils = import '../lib/utils.libsonnet';
kubeStateMetricsSelector: error 'must provide selector for kube-state-metrics',
kubeJobTimeoutDuration: error 'must provide value for kubeJobTimeoutDuration',
kubeDaemonSetRolloutStuckFor: '15m',
kubePdbNotEnoughHealthyPodsFor: '15m',
namespaceSelector: null,
prefixedNamespaceSelector: if self.namespaceSelector != null then self.namespaceSelector + ',' else '',
},
Expand Down Expand Up @@ -359,6 +360,28 @@ local utils = import '../lib/utils.libsonnet';
'for': '15m',
alert: 'KubeHpaMaxedOut',
},
{
expr: |||
(
kube_poddisruptionbudget_status_desired_healthy{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
-
kube_poddisruptionbudget_status_current_healthy{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
)
> 0
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
description: 'PDB %s{{ $labels.namespace }}/{{ $labels.poddisruptionbudget }} expects {{ $value }} more healthy pods. The desired number of healthy pods has not been met for at least %s.' % [
utils.ifShowMultiCluster($._config, '{{ $labels.%(clusterLabel)s }}/' % $._config),
$._config.kubePdbNotEnoughHealthyPodsFor,
],
summary: 'PDB does not have enough healthy pods.',
},
'for': $._config.kubePdbNotEnoughHealthyPodsFor,
alert: 'KubePdbNotEnoughHealthyPods',
},
],
},
],
Expand Down
3 changes: 1 addition & 2 deletions rules/rules.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@
(import 'apps.libsonnet') +
(import 'kube_scheduler.libsonnet') +
(import 'node.libsonnet') +
(import 'kubelet.libsonnet') +
(import 'windows.libsonnet')
(import 'kubelet.libsonnet')
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This removes windows rules from the default build, see this comment where I was previously concerned about this change.

6 changes: 6 additions & 0 deletions runbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ This page collects this repositories alerts and begins the process of describing
+ *Action*: Check the job using `kubectl describe job <job>` and look at the pod logs using `kubectl logs <pod>` for further information.
+ *Runbook*: [Link](https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed/)

##### Alert Name: "KubePdbNotEnoughHealthyPods"
+ *Message*: `PDB {{ $labels.namespace }}/{{ $labels.poddisruptionbudget }} expects {{ $value }} more healthy pods. The desired number of healthy pods has not been met for at least 15m.`
+ *Severity*: warning
+ *Action*: Check the status of the PDB using `kubectl get poddisruptionbudgets <pdb> -o yaml` and compare `status.currentHealthy` with `status.desiredHealthy`. Check the Kubernetes documentation for more information about [pod distruptions](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/).
+ *Runbook*: [Link](https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepdbnotenoughhealthypods/)

### Group Name: "kubernetes-resources"

##### Alert Name: "KubeCPUOvercommit"
Expand Down
27 changes: 27 additions & 0 deletions tests/apps_alerts-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
rule_files:
- ../prometheus_alerts.yaml

tests:
- interval: 1m
name: KubePdbNotEnoughHealthyPods fires when current healthly pods are less than desired
input_series:
- series: 'kube_poddisruptionbudget_status_desired_healthy{cluster="cluster1", namespace="ns1", poddisruptionbudget="pdb1", job="kube-state-metrics"}'
values: '4x15'
- series: 'kube_poddisruptionbudget_status_current_healthy{cluster="cluster1", namespace="ns1", poddisruptionbudget="pdb1", job="kube-state-metrics"}'
values: '3x15'
alert_rule_test:
- eval_time: 14m
alertname: KubePdbNotEnoughHealthyPods
- eval_time: 15m
alertname: KubePdbNotEnoughHealthyPods
exp_alerts:
- exp_labels:
severity: "warning"
cluster: "cluster1"
namespace: "ns1"
poddisruptionbudget: "pdb1"
job: "kube-state-metrics"
exp_annotations:
description: "PDB ns1/pdb1 expects 1 more healthy pods. The desired number of healthy pods has not been met for at least 15m."
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepdbnotenoughhealthypods"
summary: "PDB does not have enough healthy pods."