Skip to content

Commit 4ff562d

Browse files
authored
Use kube_statefulset_replicas for KubeStatefulSetReplicasMismatch (#1026)
* Use `kube_statefulset_replicas` for `KubeStatefulSetReplicasMismatch` According to [0] and [1] `kube_statefulset_status_replicas` returns "the number of Pods created by the StatefulSet controller". Therefore this alert only fires when the StatefulSet controller is able to create the pod, but it does not get ready. In cases where the controller is unable to create the pod (e.g. invalid pod spec) this alert does not fire. This is contrary to the expectation of firing when the statefulset does not match the configured amount of replicas. [0]: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#statefulsetstatus-v1-apps [1]: https://github.com/kubernetes/kube-state-metrics/blob/2a95d4649b2fea55799032fb9c0b571c4ba7f776/internal/store/statefulset.go#L75 * Add tests for KubeStatefulSetReplicasMismatch
1 parent e272675 commit 4ff562d

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

alerts/apps_alerts.libsonnet

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ local utils = import '../lib/utils.libsonnet';
121121
(
122122
kube_statefulset_status_replicas_ready{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
123123
!=
124-
kube_statefulset_status_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
124+
kube_statefulset_replicas{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
125125
) and (
126126
changes(kube_statefulset_status_replicas_updated{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[10m])
127127
==

tests/tests.yaml

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,3 +1323,62 @@ tests:
13231323
description: 'Cluster has overcommitted memory resource requests for Namespaces.'
13241324
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit"
13251325
summary: "Cluster has overcommitted memory resource requests."
1326+
1327+
# Verify KubeStatefulSetReplicasMismatch fires, when no replicas could be created
1328+
- interval: 1m
1329+
name: StatefulSet replicas not created
1330+
input_series:
1331+
- series: 'kube_statefulset_replicas{cluster="kubernetes",namespace="test",job="kube-state-metrics",statefulset="sts"}'
1332+
values: '3x15'
1333+
- series: 'kube_statefulset_status_replicas{cluster="kubernetes",namespace="test",job="kube-state-metrics",statefulset="sts"}'
1334+
values: '0x15'
1335+
- series: 'kube_statefulset_status_replicas_ready{cluster="kubernetes",namespace="test",job="kube-state-metrics",statefulset="sts"}'
1336+
values: '0x15'
1337+
- series: 'kube_statefulset_status_replicas_updated{cluster="kubernetes",namespace="test",job="kube-state-metrics",statefulset="sts"}'
1338+
values: '0x15'
1339+
alert_rule_test:
1340+
- eval_time: 14m
1341+
alertname: KubeStatefulSetReplicasMismatch
1342+
- eval_time: 15m
1343+
alertname: KubeStatefulSetReplicasMismatch
1344+
exp_alerts:
1345+
- exp_labels:
1346+
severity: "warning"
1347+
job: "kube-state-metrics"
1348+
cluster: "kubernetes"
1349+
namespace: "test"
1350+
statefulset: "sts"
1351+
exp_annotations:
1352+
description: "StatefulSet test/sts has not matched the expected number of replicas for longer than 15 minutes."
1353+
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch"
1354+
summary: "StatefulSet has not matched the expected number of replicas."
1355+
1356+
# Verify KubeStatefulSetReplicasMismatch fires, when replicas could be created but are not ready
1357+
- interval: 1m
1358+
name: StatefulSet replicas created but not ready
1359+
input_series:
1360+
- series: 'kube_statefulset_replicas{cluster="kubernetes",namespace="test",job="kube-state-metrics",statefulset="sts"}'
1361+
values: '3x15'
1362+
- series: 'kube_statefulset_status_replicas{cluster="kubernetes",namespace="test",job="kube-state-metrics",statefulset="sts"}'
1363+
values: '3x15'
1364+
- series: 'kube_statefulset_status_replicas_ready{cluster="kubernetes",namespace="test",job="kube-state-metrics",statefulset="sts"}'
1365+
values: '0x15'
1366+
- series: 'kube_statefulset_status_replicas_updated{cluster="kubernetes",namespace="test",job="kube-state-metrics",statefulset="sts"}'
1367+
values: '0x15'
1368+
alert_rule_test:
1369+
- eval_time: 14m
1370+
alertname: KubeStatefulSetReplicasMismatch
1371+
- eval_time: 15m
1372+
alertname: KubeStatefulSetReplicasMismatch
1373+
exp_alerts:
1374+
- exp_labels:
1375+
severity: "warning"
1376+
job: "kube-state-metrics"
1377+
cluster: "kubernetes"
1378+
namespace: "test"
1379+
statefulset: "sts"
1380+
exp_annotations:
1381+
description: "StatefulSet test/sts has not matched the expected number of replicas for longer than 15 minutes."
1382+
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch"
1383+
summary: "StatefulSet has not matched the expected number of replicas."
1384+

0 commit comments

Comments
 (0)