@@ -22,6 +22,8 @@ local utils = import '../lib/utils.libsonnet';
2222 {
2323 expr: |||
2424 kube_node_status_condition{%(kubeStateMetricsSelector)s,condition="Ready",status="true"} == 0
25+ and on (%(clusterLabel)s, node)
26+ kube_node_spec_unschedulable{%(kubeStateMetricsSelector)s} == 0
2527 ||| % $._config,
2628 labels: {
2729 severity: 'warning' ,
@@ -30,7 +32,7 @@ local utils = import '../lib/utils.libsonnet';
3032 description: '{{ $labels.node }} has been unready for more than 15 minutes%s.' % [
3133 utils.ifShowMultiCluster($._config, ' on cluster {{ $labels.%(clusterLabel)s }}' % $._config),
3234 ],
33- summary: 'Node is not ready.' ,
35+ summary: 'Schedulable Node is not ready.' ,
3436 },
3537 'for' : '15m' ,
3638 alert: 'KubeNodeNotReady' ,
@@ -85,6 +87,8 @@ local utils = import '../lib/utils.libsonnet';
8587 alert: 'KubeNodeReadinessFlapping' ,
8688 expr: |||
8789 sum(changes(kube_node_status_condition{%(kubeStateMetricsSelector)s,status="true",condition="Ready"}[15m])) by (%(clusterLabel)s, node) > 2
90+ and on (%(clusterLabel)s, node)
91+ kube_node_spec_unschedulable{%(kubeStateMetricsSelector)s} == 0
8892 ||| % $._config,
8993 'for' : '15m' ,
9094 labels: {
@@ -94,7 +98,7 @@ local utils = import '../lib/utils.libsonnet';
9498 description: 'The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes%s.' % [
9599 utils.ifShowMultiCluster($._config, ' on cluster {{ $labels.%(clusterLabel)s }}' % $._config),
96100 ],
97- summary: 'Node readiness status is flapping.' ,
101+ summary: 'Schedulable Node readiness status is flapping.' ,
98102 },
99103 },
100104 {
0 commit comments