You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
MON-4129: slos: accomodate for Prometheus v3 "le" normalization
ensure all series involved in the different queries change
during the integer->float transition so that rate calculation remains consistent across all series.
If apiserver_request_sli_duration_seconds_bucket{le="1"} had a last value of 15 and
then apiserver_request_sli_duration_seconds_bucket{le=~"1.0"} reappeared with 20, the
rate calculated over a range where both {le="1"} and {le="1.0"} overlap will not
account for the 20−15=5 difference, as the two series are distinct. But
apiserver_request_sli_duration_seconds_count's rate will still take
that 5 jump into account as the series doesn't change.
Replace apiserver_request_sli_duration_seconds_count with
apiserver_request_sli_duration_seconds_bucket{le=~"60(.0)?"}
since they should be equal given that the timeout is 60s and cannot be customized.
This change is temporary to avoid silencing alerts or having to reset/forget historical integer buckets during the transition.
Later, we'll revert back to using apiserver_request_sli_duration_seconds_count.
Copy file name to clipboardExpand all lines: bindata/assets/alerts/kube-apiserver-slos-basic.yaml
+24-24Lines changed: 24 additions & 24 deletions
Original file line number
Diff line number
Diff line change
@@ -43,18 +43,18 @@ spec:
43
43
(
44
44
(
45
45
# too slow
46
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[5m]))
46
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"60(.0)?"}[5m]))
47
47
-
48
48
(
49
49
(
50
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le="1"}[5m]))
50
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le=~"1(.0)?"}[5m]))
51
51
or
52
52
vector(0)
53
53
)
54
54
+
55
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le="5"}[5m]))
55
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le=~"5(.0)?"}[5m]))
56
56
+
57
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le="30"}[5m]))
57
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le=~"30(.0)?"}[5m]))
58
58
)
59
59
)
60
60
+
@@ -68,18 +68,18 @@ spec:
68
68
(
69
69
(
70
70
# too slow
71
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[30m]))
71
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"60(.0)?"}[30m]))
72
72
-
73
73
(
74
74
(
75
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le="1"}[30m]))
75
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le=~"1(.0)?"}[30m]))
76
76
or
77
77
vector(0)
78
78
)
79
79
+
80
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le="5"}[30m]))
80
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le=~"5(.0)?"}[30m]))
81
81
+
82
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le="30"}[30m]))
82
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le=~"30(.0)?"}[30m]))
83
83
)
84
84
)
85
85
+
@@ -93,18 +93,18 @@ spec:
93
93
(
94
94
(
95
95
# too slow
96
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[1h]))
96
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"60(.0)?"}[1h]))
97
97
-
98
98
(
99
99
(
100
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le="1"}[1h]))
100
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le=~"1(.0)?"}[1h]))
101
101
or
102
102
vector(0)
103
103
)
104
104
+
105
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le="5"}[1h]))
105
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le=~"5(.0)?"}[1h]))
106
106
+
107
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le="30"}[1h]))
107
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le=~"30(.0)?"}[1h]))
108
108
)
109
109
)
110
110
+
@@ -118,18 +118,18 @@ spec:
118
118
(
119
119
(
120
120
# too slow
121
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[6h]))
121
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"60(.0)?"}[6h]))
122
122
-
123
123
(
124
124
(
125
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le="1"}[6h]))
125
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope=~"resource|",le=~"1(.0)?"}[6h]))
126
126
or
127
127
vector(0)
128
128
)
129
129
+
130
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le="5"}[6h]))
130
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="namespace",le=~"5(.0)?"}[6h]))
131
131
+
132
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le="30"}[6h]))
132
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",scope="cluster",le=~"30(.0)?"}[6h]))
133
133
)
134
134
)
135
135
+
@@ -143,9 +143,9 @@ spec:
143
143
(
144
144
(
145
145
# too slow
146
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[1h]))
146
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"60(.0)?"}[1h]))
147
147
-
148
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le="1"}[1h]))
148
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"1(.0)?"}[1h]))
149
149
)
150
150
+
151
151
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[1h]))
@@ -157,9 +157,9 @@ spec:
157
157
(
158
158
(
159
159
# too slow
160
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[30m]))
160
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"60(.0)?"}[30m]))
161
161
-
162
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le="1"}[30m]))
162
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"1(.0)?"}[30m]))
163
163
)
164
164
+
165
165
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[30m]))
@@ -171,9 +171,9 @@ spec:
171
171
(
172
172
(
173
173
# too slow
174
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[5m]))
174
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"60(.0)?"}[5m]))
175
175
-
176
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le="1"}[5m]))
176
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"1(.0)?"}[5m]))
177
177
)
178
178
+
179
179
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[5m]))
@@ -185,9 +185,9 @@ spec:
185
185
(
186
186
(
187
187
# too slow
188
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz"}[6h]))
188
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"60(.0)?"}[6h]))
189
189
-
190
-
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le="1"}[6h]))
190
+
sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward|/healthz|/livez|/readyz",le=~"1(.0)?"}[6h]))
191
191
)
192
192
+
193
193
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"/healthz|/livez|/readyz",code=~"5.."}[6h]))
0 commit comments