Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions alerts/kubelet.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ local utils = import '../lib/utils.libsonnet';
{
expr: |||
kube_node_status_condition{%(kubeStateMetricsSelector)s,condition="Ready",status="true"} == 0
and on (%(clusterLabel)s, node)
kube_node_spec_unschedulable{%(kubeStateMetricsSelector)s} == 0
||| % $._config,
labels: {
severity: 'warning',
Expand Down Expand Up @@ -85,6 +87,8 @@ local utils = import '../lib/utils.libsonnet';
alert: 'KubeNodeReadinessFlapping',
expr: |||
sum(changes(kube_node_status_condition{%(kubeStateMetricsSelector)s,status="true",condition="Ready"}[15m])) by (%(clusterLabel)s, node) > 2
and on (%(clusterLabel)s, node)
kube_node_spec_unschedulable{%(kubeStateMetricsSelector)s} == 0
||| % $._config,
'for': '15m',
labels: {
Expand Down
41 changes: 41 additions & 0 deletions tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -570,8 +570,49 @@ tests:

- interval: 1m
input_series:
# node=minikube is uncordoned so we expect the alert to fire
- series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
# node=minikube2 is cordoned so we expect the alert to not fire
- series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics"}'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shorthand syntax suggestion that you might like to know, for next time:

Suggested change
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
values: '1x19'

docs

Which would translate to:

shorthand for '1+0x19', series starts at 1, then 19 further samples incrementing by 0.

alert_rule_test:
- eval_time: 18m
alertname: KubeNodeNotReady
exp_alerts:
- exp_labels:
cluster: kubernetes
node: minikube
severity: warning
condition: Ready
endpoint: https-main
instance: 10.0.2.15:10250
job: kube-state-metrics
namespace: monitoring
pod: kube-state-metrics-b894d84cc-d6htw
service: kube-state-metrics
status: "true"
exp_annotations:
summary: "Node is not ready."
description: 'minikube has been unready for more than 15 minutes.'
runbook_url: 'https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready"'

- interval: 1m
input_series:
# node=minikube is uncordoned so we expect the alert to fire
- series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}'
values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1'
- series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
# node=minikube2 is cordoned so we expect the alert to not fire
- series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics",status="true"}'
values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1'
- series: 'kube_node_spec_unschedulable{endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube2",pod="kube-state-metrics-b894d84cc-f5e9f",service="kube-state-metrics"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
alert_rule_test:
- eval_time: 18m
alertname: KubeNodeReadinessFlapping
Expand Down
Loading