Skip to content

Commit 3cb7958

Browse files
authored
Merge pull request #889 from chess-knight/fix/apiserver_cluster_label
Add missing cluster labels and aggregations for apiserver alerts
2 parents e2239f0 + e80710c commit 3cb7958

File tree

2 files changed

+12
-10
lines changed

2 files changed

+12
-10
lines changed

alerts/kube_apiserver.libsonnet

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ local utils = import '../lib/utils.libsonnet';
44
_config+:: {
55
kubeApiserverSelector: error 'must provide selector for kube-apiserver',
66

7-
kubeAPILatencyWarningSeconds: 1,
8-
97
certExpirationWarningSeconds: 7 * 24 * 3600,
108
certExpirationCriticalSeconds: 1 * 24 * 3600,
119
},
@@ -18,13 +16,16 @@ local utils = import '../lib/utils.libsonnet';
1816
{
1917
alert: 'KubeAPIErrorBudgetBurn',
2018
expr: |||
21-
sum(apiserver_request:burnrate%s) > (%.2f * %.5f)
22-
and
23-
sum(apiserver_request:burnrate%s) > (%.2f * %.5f)
19+
sum by(%s) (apiserver_request:burnrate%s) > (%.2f * %.5f)
20+
and on(%s)
21+
sum by(%s) (apiserver_request:burnrate%s) > (%.2f * %.5f)
2422
||| % [
23+
$._config.clusterLabel,
2524
w.long,
2625
w.factor,
2726
(1 - $._config.SLOs.apiserver.target),
27+
$._config.clusterLabel,
28+
$._config.clusterLabel,
2829
w.short,
2930
w.factor,
3031
(1 - $._config.SLOs.apiserver.target),
@@ -49,7 +50,7 @@ local utils = import '../lib/utils.libsonnet';
4950
{
5051
alert: 'KubeClientCertificateExpiration',
5152
expr: |||
52-
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationWarningSeconds)s
53+
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(%(clusterLabel)s, job) histogram_quantile(0.01, sum by (%(clusterLabel)s, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationWarningSeconds)s
5354
||| % $._config,
5455
'for': '5m',
5556
labels: {
@@ -63,7 +64,7 @@ local utils = import '../lib/utils.libsonnet';
6364
{
6465
alert: 'KubeClientCertificateExpiration',
6566
expr: |||
66-
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationCriticalSeconds)s
67+
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(%(clusterLabel)s, job) histogram_quantile(0.01, sum by (%(clusterLabel)s, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationCriticalSeconds)s
6768
||| % $._config,
6869
'for': '5m',
6970
labels: {
@@ -108,7 +109,7 @@ local utils = import '../lib/utils.libsonnet';
108109
{
109110
alert: 'KubeAPITerminatedRequests',
110111
expr: |||
111-
sum(rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) / ( sum(rate(apiserver_request_total{%(kubeApiserverSelector)s}[10m])) + sum(rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) ) > 0.20
112+
sum by(%(clusterLabel)s) (rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) / ( sum by(%(clusterLabel)s) (rate(apiserver_request_total{%(kubeApiserverSelector)s}[10m])) + sum by(%(clusterLabel)s) (rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) ) > 0.20
112113
||| % $._config,
113114
labels: {
114115
severity: 'warning',

tests.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,9 +1125,9 @@ tests:
11251125

11261126
- interval: 1m
11271127
input_series:
1128-
- series: 'apiserver_request_terminations_total{job="kube-apiserver",apiserver="kube-apiserver"}'
1128+
- series: 'apiserver_request_terminations_total{cluster="kubernetes",job="kube-apiserver",apiserver="kube-apiserver"}'
11291129
values: '1+1x10'
1130-
- series: 'apiserver_request_total{job="kube-apiserver",apiserver="kube-apiserver"}'
1130+
- series: 'apiserver_request_total{cluster="kubernetes",job="kube-apiserver",apiserver="kube-apiserver"}'
11311131
values: '1+2x10'
11321132
alert_rule_test:
11331133
- eval_time: 5m # alert hasn't fired
@@ -1137,6 +1137,7 @@ tests:
11371137
exp_alerts:
11381138
- exp_labels:
11391139
severity: warning
1140+
cluster: "kubernetes"
11401141
exp_annotations:
11411142
summary: "The kubernetes apiserver has terminated 33.33% of its incoming requests."
11421143
description: "The kubernetes apiserver has terminated 33.33% of its incoming requests."

0 commit comments

Comments
 (0)