Skip to content

Commit 6997e03

Browse files
committed
fixup! Prefer kube-scheduler's resource metrics to kube-state-metrics'
Refactor kube_pod_status_phase, since statuses other than "Pending" or "Running" are excluded or deprecated. Signed-off-by: Pranshu Srivastava <[email protected]>
1 parent 67a87ba commit 6997e03

File tree

4 files changed

+10
-50
lines changed

4 files changed

+10
-50
lines changed

DESIGN.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Jsonnet offers the ability to parameterise configuration, allowing for basic cus
5454
alert: "KubePodNotReady",
5555
expr: |||
5656
sum by (namespace, pod) (
57-
kube_pod_status_phase{%(kubeStateMetricsSelector)s, phase!~"Running|Succeeded"}
57+
kube_pod_status_phase{%(kubeStateMetricsSelector)s, phase!~"Running"}
5858
) > $(allowedNotReadyPods)s
5959
||| % $._config,
6060
"for": "1h",

alerts/apps_alerts.libsonnet

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
expr: |||
3434
sum by (namespace, pod, %(clusterLabel)s) (
3535
max by(namespace, pod, %(clusterLabel)s) (
36-
kube_pod_status_phase{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, phase=~"Pending|Unknown|Failed"}
36+
kube_pod_status_phase{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, phase=~"Pending"}
3737
) * on(namespace, pod, %(clusterLabel)s) group_left(owner_kind) topk by(namespace, pod, %(clusterLabel)s) (
3838
1, max by(namespace, pod, owner_kind, %(clusterLabel)s) (kube_pod_owner{owner_kind!="Job"})
3939
)

rules/apps.libsonnet

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,7 @@
8686
{
8787
record: 'cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests',
8888
expr: |||
89-
(kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or (kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s}) * on (namespace, pod, %(clusterLabel)s)
90-
group_left() max by (namespace, pod, %(clusterLabel)s) (
91-
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
92-
)
89+
(kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s})
9390
||| % $._config,
9491
},
9592
{
@@ -99,8 +96,6 @@
9996
sum by (namespace, pod, %(clusterLabel)s) (
10097
max by (namespace, pod, container, %(clusterLabel)s) (
10198
kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s}
102-
) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) (
103-
kube_pod_status_phase{phase=~"Pending|Running"} == 1
10499
)
105100
)
106101
)
@@ -114,10 +109,7 @@
114109
{
115110
record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests',
116111
expr: |||
117-
(kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or (kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s}) * on (namespace, pod, %(clusterLabel)s)
118-
group_left() max by (namespace, pod, %(clusterLabel)s) (
119-
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
120-
)
112+
(kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s})
121113
||| % $._config,
122114
},
123115
{
@@ -127,8 +119,6 @@
127119
sum by (namespace, pod, %(clusterLabel)s) (
128120
max by (namespace, pod, container, %(clusterLabel)s) (
129121
kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s}
130-
) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) (
131-
kube_pod_status_phase{phase=~"Pending|Running"} == 1
132122
)
133123
)
134124
)
@@ -142,10 +132,7 @@
142132
{
143133
record: 'cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits',
144134
expr: |||
145-
(kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s}) * on (namespace, pod, %(clusterLabel)s)
146-
group_left() max by (namespace, pod, %(clusterLabel)s) (
147-
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
148-
)
135+
(kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s})
149136
||| % $._config,
150137
},
151138
{
@@ -155,8 +142,6 @@
155142
sum by (namespace, pod, %(clusterLabel)s) (
156143
max by (namespace, pod, container, %(clusterLabel)s) (
157144
kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s}
158-
) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) (
159-
kube_pod_status_phase{phase=~"Pending|Running"} == 1
160145
)
161146
)
162147
)
@@ -170,10 +155,7 @@
170155
{
171156
record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits',
172157
expr: |||
173-
(kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s}) * on (namespace, pod, %(clusterLabel)s)
174-
group_left() max by (namespace, pod, %(clusterLabel)s) (
175-
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
176-
)
158+
(kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s})
177159
||| % $._config,
178160
},
179161
{
@@ -183,8 +165,6 @@
183165
sum by (namespace, pod, %(clusterLabel)s) (
184166
max by (namespace, pod, container, %(clusterLabel)s) (
185167
kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s}
186-
) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) (
187-
kube_pod_status_phase{phase=~"Pending|Running"} == 1
188168
)
189169
)
190170
)

tests.yaml

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -461,23 +461,13 @@ tests:
461461
- eval_time: 0m
462462
expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum
463463
exp_samples:
464-
- value: 0.15
464+
- value: 0.3
465465
labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}'
466-
- eval_time: 0m
467-
expr: namespace_memory:kube_pod_container_resource_requests:sum
468-
exp_samples:
469-
- value: 1.0e+9
470-
labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}'
471466
- eval_time: 1m
472467
expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum
473468
exp_samples:
474-
- value: 0.15
469+
- value: 0.3
475470
labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}'
476-
- eval_time: 1m
477-
expr: namespace_memory:kube_pod_container_resource_requests:sum
478-
exp_samples:
479-
- value: 1.0e+9
480-
labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}'
481471

482472
- interval: 1m
483473
input_series:
@@ -506,23 +496,13 @@ tests:
506496
- eval_time: 0m
507497
expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum
508498
exp_samples:
509-
- value: 0.15
499+
- value: 0.3
510500
labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}'
511-
- eval_time: 0m
512-
expr: namespace_memory:kube_pod_container_resource_requests:sum
513-
exp_samples:
514-
- value: 1.0e+9
515-
labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}'
516501
- eval_time: 1m
517502
expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum
518503
exp_samples:
519-
- value: 0.15
504+
- value: 0.3
520505
labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}'
521-
- eval_time: 1m
522-
expr: namespace_memory:kube_pod_container_resource_requests:sum
523-
exp_samples:
524-
- value: 1.0e+9
525-
labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}'
526506

527507
- interval: 1m
528508
input_series:

0 commit comments

Comments
 (0)