Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 3aa5830

Browse files
committed
Fix scaling dashboard to work on multi-zone ingesters
Signed-off-by: Marco Pracucci <[email protected]>
1 parent 8c2d6c0 commit 3aa5830

File tree

2 files changed

+48
-16
lines changed

2 files changed

+48
-16
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
* [BUGFIX] Fixed `CortexIngesterHasNotShippedBlocks` alert false positive in case an ingester instance had ingested samples in the past, then no traffic was received for a long period and then it started receiving samples again. #308
3636
* [BUGFIX] Alertmanager: fixed `--alertmanager.cluster.peers` CLI flag passed to alertmanager when HA is enabled. #329
3737
* [BUGFIX] Fixed `CortexInconsistentRuntimeConfig` metric. #335
38+
* [BUGFIX] Fixed scaling dashboard to correctly work when a Cortex service deployment spans across multiple zones (a zone is expected to have the `zone-[a-z]` suffix). #365
3839

3940
## 1.9.0 / 2021-05-18
4041

cortex-mixin/recording_rules.libsonnet

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,21 @@ local utils = import 'mixin-utils/utils.libsonnet';
6969
rules: [
7070
{
7171
// Convenience rule to get the number of replicas for both a deployment and a statefulset.
72+
// Multi-zone deployments are grouped together removing the "zone-X" suffix.
7273
record: 'cluster_namespace_deployment:actual_replicas:count',
7374
expr: |||
74-
sum by (cluster, namespace, deployment) (kube_deployment_spec_replicas)
75-
or
7675
sum by (cluster, namespace, deployment) (
77-
label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*)")
76+
label_replace(
77+
kube_deployment_spec_replicas,
78+
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
79+
)
80+
)
81+
or
82+
sum by (cluster, namespace, deployment) (
83+
label_replace(
84+
label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*)"),
85+
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
86+
)
7887
)
7988
|||,
8089
},
@@ -188,7 +197,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
188197
expr: |||
189198
ceil(
190199
(sum by (cluster, namespace) (
191-
cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester"}
200+
cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"}
192201
) / 4)
193202
/
194203
avg by (cluster, namespace) (
@@ -199,18 +208,23 @@ local utils = import 'mixin-utils/utils.libsonnet';
199208
},
200209
{
201210
// Convenience rule to get the CPU utilization for both a deployment and a statefulset.
211+
// Multi-zone deployments are grouped together removing the "zone-X" suffix.
202212
record: 'cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate',
203213
expr: |||
204214
sum by (cluster, namespace, deployment) (
205215
label_replace(
206-
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate,
207-
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
216+
label_replace(
217+
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate,
218+
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
219+
),
220+
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
208221
)
209222
)
210223
|||,
211224
},
212225
{
213226
// Convenience rule to get the CPU request for both a deployment and a statefulset.
227+
// Multi-zone deployments are grouped together removing the "zone-X" suffix.
214228
record: 'cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum',
215229
expr: |||
216230
# This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2
@@ -223,8 +237,11 @@ local utils = import 'mixin-utils/utils.libsonnet';
223237
(
224238
sum by (cluster, namespace, deployment) (
225239
label_replace(
226-
kube_pod_container_resource_requests_cpu_cores,
227-
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
240+
label_replace(
241+
kube_pod_container_resource_requests_cpu_cores,
242+
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
243+
),
244+
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
228245
)
229246
)
230247
)
@@ -234,8 +251,11 @@ local utils = import 'mixin-utils/utils.libsonnet';
234251
(
235252
sum by (cluster, namespace, deployment) (
236253
label_replace(
237-
kube_pod_container_resource_requests{resource="cpu"},
238-
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
254+
label_replace(
255+
kube_pod_container_resource_requests{resource="cpu"},
256+
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
257+
),
258+
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
239259
)
240260
)
241261
)
@@ -261,18 +281,23 @@ local utils = import 'mixin-utils/utils.libsonnet';
261281
},
262282
{
263283
// Convenience rule to get the Memory utilization for both a deployment and a statefulset.
284+
// Multi-zone deployments are grouped together removing the "zone-X" suffix.
264285
record: 'cluster_namespace_deployment:container_memory_usage_bytes:sum',
265286
expr: |||
266287
sum by (cluster, namespace, deployment) (
267288
label_replace(
268-
container_memory_usage_bytes,
269-
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
289+
label_replace(
290+
container_memory_usage_bytes,
291+
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
292+
),
293+
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
270294
)
271295
)
272296
|||,
273297
},
274298
{
275299
// Convenience rule to get the Memory request for both a deployment and a statefulset.
300+
// Multi-zone deployments are grouped together removing the "zone-X" suffix.
276301
record: 'cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum',
277302
expr: |||
278303
# This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2
@@ -285,8 +310,11 @@ local utils = import 'mixin-utils/utils.libsonnet';
285310
(
286311
sum by (cluster, namespace, deployment) (
287312
label_replace(
288-
kube_pod_container_resource_requests_memory_bytes,
289-
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
313+
label_replace(
314+
kube_pod_container_resource_requests_memory_bytes,
315+
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
316+
),
317+
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
290318
)
291319
)
292320
)
@@ -296,8 +324,11 @@ local utils = import 'mixin-utils/utils.libsonnet';
296324
(
297325
sum by (cluster, namespace, deployment) (
298326
label_replace(
299-
kube_pod_container_resource_requests{resource="memory"},
300-
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
327+
label_replace(
328+
kube_pod_container_resource_requests{resource="memory"},
329+
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
330+
),
331+
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
301332
)
302333
)
303334
)

0 commit comments

Comments
 (0)