Skip to content

Commit 9ceec88

Browse files
TheRealNoobskl
andauthored
feat: filter NodeReadiness alerts on uncordoned status (kubernetes-monitoring#1012)
* feat: filter NodeReadiness alerts on uncordoned status Signed-off-by: TheRealNoob <[email protected]> * add tests Signed-off-by: TheRealNoob <[email protected]> * Update tests.yaml * Update tests.yaml --------- Signed-off-by: TheRealNoob <[email protected]> Co-authored-by: Stephen Lang <[email protected]>
1 parent af5e898 commit 9ceec88

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

alerts/kubelet.libsonnet

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ local utils = import '../lib/utils.libsonnet';
2222
{
2323
expr: |||
2424
kube_node_status_condition{%(kubeStateMetricsSelector)s,condition="Ready",status="true"} == 0
25+
and on (%(clusterLabel)s, node)
26+
kube_node_spec_unschedulable{%(kubeStateMetricsSelector)s} == 0
2527
||| % $._config,
2628
labels: {
2729
severity: 'warning',
@@ -85,6 +87,8 @@ local utils = import '../lib/utils.libsonnet';
8587
alert: 'KubeNodeReadinessFlapping',
8688
expr: |||
8789
sum(changes(kube_node_status_condition{%(kubeStateMetricsSelector)s,status="true",condition="Ready"}[15m])) by (%(clusterLabel)s, node) > 2
90+
and on (%(clusterLabel)s, node)
91+
kube_node_spec_unschedulable{%(kubeStateMetricsSelector)s} == 0
8892
||| % $._config,
8993
'for': '15m',
9094
labels: {

tests.yaml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,8 +570,49 @@ tests:
570570

571571
- interval: 1m
572572
input_series:
573+
# node=minikube is uncordoned so we expect the alert to fire
573574
- series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}'
575+
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
576+
- series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}'
577+
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
578+
# node=minikube2 is cordoned so we expect the alert to not fire
579+
- series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics"}'
580+
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
581+
- series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics"}'
582+
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
583+
alert_rule_test:
584+
- eval_time: 18m
585+
alertname: KubeNodeNotReady
586+
exp_alerts:
587+
- exp_labels:
588+
cluster: kubernetes
589+
node: minikube
590+
severity: warning
591+
condition: Ready
592+
endpoint: https-main
593+
instance: 10.0.2.15:10250
594+
job: kube-state-metrics
595+
namespace: monitoring
596+
pod: kube-state-metrics-b894d84cc-d6htw
597+
service: kube-state-metrics
598+
status: "true"
599+
exp_annotations:
600+
summary: "Node is not ready."
601+
description: 'minikube has been unready for more than 15 minutes.'
602+
runbook_url: 'https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready'
603+
604+
- interval: 1m
605+
input_series:
606+
# node=minikube is uncordoned so we expect the alert to fire
607+
- series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}'
608+
values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1'
609+
- series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}'
610+
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
611+
# node=minikube2 is cordoned so we expect the alert to not fire
612+
- series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics",status="true"}'
574613
values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1'
614+
- series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics"}'
615+
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
575616
alert_rule_test:
576617
- eval_time: 18m
577618
alertname: KubeNodeReadinessFlapping

0 commit comments

Comments
 (0)