diff --git a/alerts/kubelet.libsonnet b/alerts/kubelet.libsonnet index 1bc4ea558..e09170f8e 100644 --- a/alerts/kubelet.libsonnet +++ b/alerts/kubelet.libsonnet @@ -22,6 +22,8 @@ local utils = import '../lib/utils.libsonnet'; { expr: ||| kube_node_status_condition{%(kubeStateMetricsSelector)s,condition="Ready",status="true"} == 0 + and on (%(clusterLabel)s, node) + kube_node_spec_unschedulable{%(kubeStateMetricsSelector)s} == 0 ||| % $._config, labels: { severity: 'warning', @@ -85,6 +87,8 @@ local utils = import '../lib/utils.libsonnet'; alert: 'KubeNodeReadinessFlapping', expr: ||| sum(changes(kube_node_status_condition{%(kubeStateMetricsSelector)s,status="true",condition="Ready"}[15m])) by (%(clusterLabel)s, node) > 2 + and on (%(clusterLabel)s, node) + kube_node_spec_unschedulable{%(kubeStateMetricsSelector)s} == 0 ||| % $._config, 'for': '15m', labels: { diff --git a/tests.yaml b/tests.yaml index b73aca723..3dd5920ea 100644 --- a/tests.yaml +++ b/tests.yaml @@ -570,8 +570,49 @@ tests: - interval: 1m input_series: + # node=minikube is uncordoned so we expect the alert to fire - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + - series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + # node=minikube2 is cordoned so we expect the alert to not fire + - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics"}' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + - series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics"}' + values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' + alert_rule_test: + - eval_time: 18m + alertname: KubeNodeNotReady + exp_alerts: + - exp_labels: + cluster: kubernetes + node: minikube + severity: warning + condition: Ready + endpoint: https-main + instance: 10.0.2.15:10250 + job: kube-state-metrics + namespace: monitoring + pod: kube-state-metrics-b894d84cc-d6htw + service: kube-state-metrics + status: "true" + exp_annotations: + summary: "Node is not ready." + description: 'minikube has been unready for more than 15 minutes.' + runbook_url: 'https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready' + +- interval: 1m + input_series: + # node=minikube is uncordoned so we expect the alert to fire + - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' + values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1' + - series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + # node=minikube2 is cordoned so we expect the alert to not fire + - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics",status="true"}' values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1' + - series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics"}' + values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' alert_rule_test: - eval_time: 18m alertname: KubeNodeReadinessFlapping