Skip to content

Commit 275f05d

Browse files
committed
bindata/alerts/slo: fix burnrate calculation
Calculate the request burn rate based on the total number of read+write requests instead of separately calculating the burn rate for each request type. This used to cause an erroneous result when summing up the read and write burn rates together as it wouldn't account for the propertion of failures amongst all requests. Signed-off-by: Damien Grisonnet <[email protected]>
1 parent 37df1b1 commit 275f05d

File tree

2 files changed

+57
-50
lines changed

2 files changed

+57
-50
lines changed

bindata/assets/alerts/kube-apiserver-slos-basic.yaml

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ spec:
1313
runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/KubeAPIErrorBudgetBurn.md
1414
summary: The API server is burning too much error budget.
1515
expr: |
16-
sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)
16+
sum:apiserver_request:burnrate1h > (14.40 * 0.01000)
1717
and
18-
sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
18+
sum:apiserver_request:burnrate5m > (14.40 * 0.01000)
1919
for: 2m
2020
labels:
2121
long: 1h
@@ -28,9 +28,9 @@ spec:
2828
runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/KubeAPIErrorBudgetBurn.md
2929
summary: The API server is burning too much error budget.
3030
expr: |
31-
sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)
31+
sum:apiserver_request:burnrate6h > (6.00 * 0.01000)
3232
and
33-
sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
33+
sum:apiserver_request:burnrate30m > (6.00 * 0.01000)
3434
for: 15m
3535
labels:
3636
long: 6h
@@ -61,11 +61,9 @@ spec:
6161
# errors
6262
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
6363
)
64-
/
65-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
6664
labels:
6765
verb: read
68-
record: apiserver_request:burnrate5m
66+
record: apiserver_request:burn5m
6967
- expr: |
7068
(
7169
(
@@ -88,11 +86,9 @@ spec:
8886
# errors
8987
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
9088
)
91-
/
92-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
9389
labels:
9490
verb: read
95-
record: apiserver_request:burnrate30m
91+
record: apiserver_request:burn30m
9692
- expr: |
9793
(
9894
(
@@ -115,11 +111,9 @@ spec:
115111
# errors
116112
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
117113
)
118-
/
119-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
120114
labels:
121115
verb: read
122-
record: apiserver_request:burnrate1h
116+
record: apiserver_request:burn1h
123117
- expr: |
124118
(
125119
(
@@ -142,11 +136,9 @@ spec:
142136
# errors
143137
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
144138
)
145-
/
146-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
147139
labels:
148140
verb: read
149-
record: apiserver_request:burnrate6h
141+
record: apiserver_request:burn6h
150142
- expr: |
151143
(
152144
(
@@ -158,11 +150,9 @@ spec:
158150
+
159151
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
160152
)
161-
/
162-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
163153
labels:
164154
verb: write
165-
record: apiserver_request:burnrate1h
155+
record: apiserver_request:burn1h
166156
- expr: |
167157
(
168158
(
@@ -174,11 +164,9 @@ spec:
174164
+
175165
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
176166
)
177-
/
178-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
179167
labels:
180168
verb: write
181-
record: apiserver_request:burnrate30m
169+
record: apiserver_request:burn30m
182170
- expr: |
183171
(
184172
(
@@ -190,11 +178,9 @@ spec:
190178
+
191179
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
192180
)
193-
/
194-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
195181
labels:
196182
verb: write
197-
record: apiserver_request:burnrate5m
183+
record: apiserver_request:burn5m
198184
- expr: |
199185
(
200186
(
@@ -206,11 +192,29 @@ spec:
206192
+
207193
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
208194
)
209-
/
210-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
211195
labels:
212196
verb: write
213-
record: apiserver_request:burnrate6h
197+
record: apiserver_request:burn6h
198+
- expr: |
199+
sum(apiserver_request:burn5m)
200+
/
201+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[5m]))
202+
record: sum:apiserver_request:burnrate5m
203+
- expr: |
204+
sum(apiserver_request:burn30m)
205+
/
206+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[30m]))
207+
record: sum:apiserver_request:burnrate30m
208+
- expr: |
209+
sum(apiserver_request:burn1h)
210+
/
211+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[1h]))
212+
record: sum:apiserver_request:burnrate1h
213+
- expr: |
214+
sum(apiserver_request:burn6h)
215+
/
216+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[6h]))
217+
record: sum:apiserver_request:burnrate5m
214218
- expr: |
215219
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
216220
labels:

bindata/assets/alerts/kube-apiserver-slos-extended.yaml

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ spec:
1313
runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/KubeAPIErrorBudgetBurn.md
1414
summary: The API server is burning too much error budget.
1515
expr: |
16-
sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)
16+
sum:apiserver_request:burnrate1d > (3.00 * 0.01000)
1717
and
18-
sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
18+
sum:apiserver_request:burnrate2h > (3.00 * 0.01000)
1919
for: 1h
2020
labels:
2121
long: 1d
@@ -28,9 +28,9 @@ spec:
2828
runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/KubeAPIErrorBudgetBurn.md
2929
summary: The API server is burning too much error budget.
3030
expr: |
31-
sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)
31+
sum:apiserver_request:burnrate3d > (1.00 * 0.01000)
3232
and
33-
sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
33+
sum:apiserver_request:burnrate6h > (1.00 * 0.01000)
3434
for: 3h
3535
labels:
3636
long: 3d
@@ -61,11 +61,9 @@ spec:
6161
# errors
6262
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
6363
)
64-
/
65-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
6664
labels:
6765
verb: read
68-
record: apiserver_request:burnrate2h
66+
record: apiserver_request:burn2h
6967
- expr: |
7068
(
7169
(
@@ -88,11 +86,9 @@ spec:
8886
# errors
8987
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
9088
)
91-
/
92-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
9389
labels:
9490
verb: read
95-
record: apiserver_request:burnrate1d
91+
record: apiserver_request:burn1d
9692
- expr: |
9793
(
9894
(
@@ -115,11 +111,9 @@ spec:
115111
# errors
116112
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
117113
)
118-
/
119-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
120114
labels:
121115
verb: read
122-
record: apiserver_request:burnrate3d
116+
record: apiserver_request:burn3d
123117
- expr: |
124118
(
125119
(
@@ -131,11 +125,9 @@ spec:
131125
+
132126
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
133127
)
134-
/
135-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
136128
labels:
137129
verb: write
138-
record: apiserver_request:burnrate1d
130+
record: apiserver_request:burn1d
139131
- expr: |
140132
(
141133
(
@@ -147,11 +139,9 @@ spec:
147139
+
148140
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
149141
)
150-
/
151-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
152142
labels:
153143
verb: write
154-
record: apiserver_request:burnrate2h
144+
record: apiserver_request:burn2h
155145
- expr: |
156146
(
157147
(
@@ -163,8 +153,21 @@ spec:
163153
+
164154
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
165155
)
166-
/
167-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
168156
labels:
169157
verb: write
170-
record: apiserver_request:burnrate3d
158+
record: apiserver_request:burn3d
159+
- expr: |
160+
sum(apiserver_request:burn2h)
161+
/
162+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[2h]))
163+
record: sum:apiserver_request:burnrate2h
164+
- expr: |
165+
sum(apiserver_request:burn1d)
166+
/
167+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[1d]))
168+
record: sum:apiserver_request:burnrate1d
169+
- expr: |
170+
sum(apiserver_request:burn3d)
171+
/
172+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[3d]))
173+
record: sum:apiserver_request:burnrate3d

0 commit comments

Comments
 (0)