Skip to content

Commit 730ff1e

Browse files
Merge pull request #1742 from dgrisonnet/slo-health-endpoints
OCPBUGS-49763: Disregard health endpoints in the burn rate alerts
2 parents c3e5c90 + 0eb743e commit 730ff1e

File tree

2 files changed

+56
-56
lines changed

2 files changed

+56
-56
lines changed

bindata/assets/alerts/kube-apiserver-slos-basic.yaml

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -43,23 +43,23 @@ spec:
4343
(
4444
(
4545
# too slow
46-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))
46+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[5m]))
4747
-
4848
(
4949
(
50-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(.0)?"}[5m]))
50+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le=~"1(.0)?"}[5m]))
5151
or
5252
vector(0)
5353
)
5454
+
55-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(.0)?"}[5m]))
55+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le=~"5(.0)?"}[5m]))
5656
+
57-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(.0)?"}[5m]))
57+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le=~"30(.0)?"}[5m]))
5858
)
5959
)
6060
+
6161
# errors
62-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
62+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[5m]))
6363
)
6464
labels:
6565
verb: read
@@ -68,23 +68,23 @@ spec:
6868
(
6969
(
7070
# too slow
71-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m]))
71+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[30m]))
7272
-
7373
(
7474
(
75-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(.0)?"}[30m]))
75+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le=~"1(.0)?"}[30m]))
7676
or
7777
vector(0)
7878
)
7979
+
80-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(.0)?"}[30m]))
80+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le=~"5(.0)?"}[30m]))
8181
+
82-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(.0)?"}[30m]))
82+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le=~"30(.0)?"}[30m]))
8383
)
8484
)
8585
+
8686
# errors
87-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
87+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[30m]))
8888
)
8989
labels:
9090
verb: read
@@ -93,23 +93,23 @@ spec:
9393
(
9494
(
9595
# too slow
96-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h]))
96+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[1h]))
9797
-
9898
(
9999
(
100-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(.0)?"}[1h]))
100+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le=~"1(.0)?"}[1h]))
101101
or
102102
vector(0)
103103
)
104104
+
105-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(.0)?"}[1h]))
105+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le=~"5(.0)?"}[1h]))
106106
+
107-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(.0)?"}[1h]))
107+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le="30(.0)?"}[1h]))
108108
)
109109
)
110110
+
111111
# errors
112-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
112+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[1h]))
113113
)
114114
labels:
115115
verb: read
@@ -118,23 +118,23 @@ spec:
118118
(
119119
(
120120
# too slow
121-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h]))
121+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[6h]))
122122
-
123123
(
124124
(
125-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(.0)?"}[6h]))
125+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le=~"1(.0)?"}[6h]))
126126
or
127127
vector(0)
128128
)
129129
+
130-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(.0)?"}[6h]))
130+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le=~"5(.0)?"}[6h]))
131131
+
132-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(.0)?"}[6h]))
132+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le=~"30(.0)?"}[6h]))
133133
)
134134
)
135135
+
136136
# errors
137-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
137+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[6h]))
138138
)
139139
labels:
140140
verb: read
@@ -143,12 +143,12 @@ spec:
143143
(
144144
(
145145
# too slow
146-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h]))
146+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[1h]))
147147
-
148-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(.0)?"}[1h]))
148+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"1(.0)?"}[1h]))
149149
)
150150
+
151-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
151+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[1h]))
152152
)
153153
labels:
154154
verb: write
@@ -157,12 +157,12 @@ spec:
157157
(
158158
(
159159
# too slow
160-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m]))
160+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[30m]))
161161
-
162-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(.0)?"}[30m]))
162+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"1(.0)?"}[30m]))
163163
)
164164
+
165-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
165+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[30m]))
166166
)
167167
labels:
168168
verb: write
@@ -171,12 +171,12 @@ spec:
171171
(
172172
(
173173
# too slow
174-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))
174+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[5m]))
175175
-
176-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(.0)?"}[5m]))
176+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"1(.0)?"}[5m]))
177177
)
178178
+
179-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
179+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[5m]))
180180
)
181181
labels:
182182
verb: write
@@ -185,12 +185,12 @@ spec:
185185
(
186186
(
187187
# too slow
188-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h]))
188+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[6h]))
189189
-
190-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(.0)?"}[6h]))
190+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"1(.0)?"}[6h]))
191191
)
192192
+
193-
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
193+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[6h]))
194194
)
195195
labels:
196196
verb: write

0 commit comments

Comments
 (0)