Skip to content

Commit f90c0d9

Browse files
Merge pull request #1744 from dgrisonnet/improve-burnrate
OCPBUGS-49764: bindata/alerts/slo: improve burnrate calculation
2 parents 1537626 + 275f05d commit f90c0d9

File tree

2 files changed

+57
-50
lines changed

2 files changed

+57
-50
lines changed

bindata/assets/alerts/kube-apiserver-slos-basic.yaml

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ spec:
1313
runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/KubeAPIErrorBudgetBurn.md
1414
summary: The API server is burning too much error budget.
1515
expr: |
16-
sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)
16+
sum:apiserver_request:burnrate1h > (14.40 * 0.01000)
1717
and
18-
sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
18+
sum:apiserver_request:burnrate5m > (14.40 * 0.01000)
1919
for: 2m
2020
labels:
2121
long: 1h
@@ -28,9 +28,9 @@ spec:
2828
runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/KubeAPIErrorBudgetBurn.md
2929
summary: The API server is burning too much error budget.
3030
expr: |
31-
sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)
31+
sum:apiserver_request:burnrate6h > (6.00 * 0.01000)
3232
and
33-
sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
33+
sum:apiserver_request:burnrate30m > (6.00 * 0.01000)
3434
for: 15m
3535
labels:
3636
long: 6h
@@ -61,11 +61,9 @@ spec:
6161
# errors
6262
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
6363
)
64-
/
65-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
6664
labels:
6765
verb: read
68-
record: apiserver_request:burnrate5m
66+
record: apiserver_request:burn5m
6967
- expr: |
7068
(
7169
(
@@ -88,11 +86,9 @@ spec:
8886
# errors
8987
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
9088
)
91-
/
92-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
9389
labels:
9490
verb: read
95-
record: apiserver_request:burnrate30m
91+
record: apiserver_request:burn30m
9692
- expr: |
9793
(
9894
(
@@ -115,11 +111,9 @@ spec:
115111
# errors
116112
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
117113
)
118-
/
119-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
120114
labels:
121115
verb: read
122-
record: apiserver_request:burnrate1h
116+
record: apiserver_request:burn1h
123117
- expr: |
124118
(
125119
(
@@ -142,11 +136,9 @@ spec:
142136
# errors
143137
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
144138
)
145-
/
146-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
147139
labels:
148140
verb: read
149-
record: apiserver_request:burnrate6h
141+
record: apiserver_request:burn6h
150142
- expr: |
151143
(
152144
(
@@ -158,11 +150,9 @@ spec:
158150
+
159151
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
160152
)
161-
/
162-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
163153
labels:
164154
verb: write
165-
record: apiserver_request:burnrate1h
155+
record: apiserver_request:burn1h
166156
- expr: |
167157
(
168158
(
@@ -174,11 +164,9 @@ spec:
174164
+
175165
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
176166
)
177-
/
178-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
179167
labels:
180168
verb: write
181-
record: apiserver_request:burnrate30m
169+
record: apiserver_request:burn30m
182170
- expr: |
183171
(
184172
(
@@ -190,11 +178,9 @@ spec:
190178
+
191179
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
192180
)
193-
/
194-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
195181
labels:
196182
verb: write
197-
record: apiserver_request:burnrate5m
183+
record: apiserver_request:burn5m
198184
- expr: |
199185
(
200186
(
@@ -206,11 +192,29 @@ spec:
206192
+
207193
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
208194
)
209-
/
210-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
211195
labels:
212196
verb: write
213-
record: apiserver_request:burnrate6h
197+
record: apiserver_request:burn6h
198+
- expr: |
199+
sum(apiserver_request:burn5m)
200+
/
201+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[5m]))
202+
record: sum:apiserver_request:burnrate5m
203+
- expr: |
204+
sum(apiserver_request:burn30m)
205+
/
206+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[30m]))
207+
record: sum:apiserver_request:burnrate30m
208+
- expr: |
209+
sum(apiserver_request:burn1h)
210+
/
211+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[1h]))
212+
record: sum:apiserver_request:burnrate1h
213+
- expr: |
214+
sum(apiserver_request:burn6h)
215+
/
216+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[6h]))
217+
record: sum:apiserver_request:burnrate5m
214218
- expr: |
215219
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
216220
labels:

bindata/assets/alerts/kube-apiserver-slos-extended.yaml

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ spec:
1313
runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/KubeAPIErrorBudgetBurn.md
1414
summary: The API server is burning too much error budget.
1515
expr: |
16-
sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)
16+
sum:apiserver_request:burnrate1d > (3.00 * 0.01000)
1717
and
18-
sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
18+
sum:apiserver_request:burnrate2h > (3.00 * 0.01000)
1919
for: 1h
2020
labels:
2121
long: 1d
@@ -28,9 +28,9 @@ spec:
2828
runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/KubeAPIErrorBudgetBurn.md
2929
summary: The API server is burning too much error budget.
3030
expr: |
31-
sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)
31+
sum:apiserver_request:burnrate3d > (1.00 * 0.01000)
3232
and
33-
sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
33+
sum:apiserver_request:burnrate6h > (1.00 * 0.01000)
3434
for: 3h
3535
labels:
3636
long: 3d
@@ -61,11 +61,9 @@ spec:
6161
# errors
6262
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
6363
)
64-
/
65-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
6664
labels:
6765
verb: read
68-
record: apiserver_request:burnrate2h
66+
record: apiserver_request:burn2h
6967
- expr: |
7068
(
7169
(
@@ -88,11 +86,9 @@ spec:
8886
# errors
8987
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
9088
)
91-
/
92-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
9389
labels:
9490
verb: read
95-
record: apiserver_request:burnrate1d
91+
record: apiserver_request:burn1d
9692
- expr: |
9793
(
9894
(
@@ -115,11 +111,9 @@ spec:
115111
# errors
116112
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
117113
)
118-
/
119-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
120114
labels:
121115
verb: read
122-
record: apiserver_request:burnrate3d
116+
record: apiserver_request:burn3d
123117
- expr: |
124118
(
125119
(
@@ -131,11 +125,9 @@ spec:
131125
+
132126
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
133127
)
134-
/
135-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
136128
labels:
137129
verb: write
138-
record: apiserver_request:burnrate1d
130+
record: apiserver_request:burn1d
139131
- expr: |
140132
(
141133
(
@@ -147,11 +139,9 @@ spec:
147139
+
148140
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
149141
)
150-
/
151-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
152142
labels:
153143
verb: write
154-
record: apiserver_request:burnrate2h
144+
record: apiserver_request:burn2h
155145
- expr: |
156146
(
157147
(
@@ -163,8 +153,21 @@ spec:
163153
+
164154
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
165155
)
166-
/
167-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
168156
labels:
169157
verb: write
170-
record: apiserver_request:burnrate3d
158+
record: apiserver_request:burn3d
159+
- expr: |
160+
sum(apiserver_request:burn2h)
161+
/
162+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[2h]))
163+
record: sum:apiserver_request:burnrate2h
164+
- expr: |
165+
sum(apiserver_request:burn1d)
166+
/
167+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[1d]))
168+
record: sum:apiserver_request:burnrate1d
169+
- expr: |
170+
sum(apiserver_request:burn3d)
171+
/
172+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE"}[3d]))
173+
record: sum:apiserver_request:burnrate3d

0 commit comments

Comments
 (0)