Skip to content

Commit 0c7e6e0

Browse files
authored
fix(dashboards): deduplicate container queries (#1130)
1 parent 4a030d9 commit 0c7e6e0

File tree

8 files changed

+73
-73
lines changed

8 files changed

+73
-73
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ dev-port-forward:
4343
kubectl --context kind-kubernetes-mixin wait --for=condition=Ready pods -l app=lgtm --timeout=300s
4444
kubectl --context kind-kubernetes-mixin port-forward service/lgtm 3000:3000 4317:4317 4318:4318 9090:9090
4545

46-
dev-reload: clean-alerts clean-rules generate lint
46+
dev-reload: clean-dashboards clean-alerts clean-rules generate lint
4747
@cp -v prometheus_alerts.yaml scripts/provisioning/prometheus/ && \
4848
cp -v prometheus_rules.yaml scripts/provisioning/prometheus/ && \
4949
kubectl --context kind-kubernetes-mixin apply -f scripts/lgtm.yaml && \

dashboards/resources/cluster.libsonnet

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ local var = g.dashboard.variable;
129129
+ tsPanel.queryOptions.withTargets([
130130
prometheus.new(
131131
'${datasource}',
132-
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config
132+
'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"})) by (namespace)' % $._config
133133
)
134134
+ prometheus.withLegendFormat('__auto'),
135135
]),
@@ -144,23 +144,23 @@ local var = g.dashboard.variable;
144144
+ prometheus.withInstant(true)
145145
+ prometheus.withFormat('table'),
146146

147-
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
147+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"})) by (namespace)' % $._config)
148148
+ prometheus.withInstant(true)
149149
+ prometheus.withFormat('table'),
150150

151151
prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
152152
+ prometheus.withInstant(true)
153153
+ prometheus.withFormat('table'),
154154

155-
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
155+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"})) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
156156
+ prometheus.withInstant(true)
157157
+ prometheus.withFormat('table'),
158158

159159
prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
160160
+ prometheus.withInstant(true)
161161
+ prometheus.withFormat('table'),
162162

163-
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
163+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"})) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
164164
+ prometheus.withInstant(true)
165165
+ prometheus.withFormat('table'),
166166
])
@@ -246,7 +246,7 @@ local var = g.dashboard.variable;
246246
+ tsPanel.queryOptions.withTargets([
247247
prometheus.new(
248248
'${datasource}',
249-
'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config
249+
'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""})) by (namespace)' % $._config
250250
)
251251
+ prometheus.withLegendFormat('__auto'),
252252
]),
@@ -261,23 +261,23 @@ local var = g.dashboard.variable;
261261
+ prometheus.withInstant(true)
262262
+ prometheus.withFormat('table'),
263263

264-
prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config)
264+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""})) by (namespace)' % $._config)
265265
+ prometheus.withInstant(true)
266266
+ prometheus.withFormat('table'),
267267

268268
prometheus.new('${datasource}', 'sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
269269
+ prometheus.withInstant(true)
270270
+ prometheus.withFormat('table'),
271271

272-
prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
272+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""})) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
273273
+ prometheus.withInstant(true)
274274
+ prometheus.withFormat('table'),
275275

276276
prometheus.new('${datasource}', 'sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
277277
+ prometheus.withInstant(true)
278278
+ prometheus.withFormat('table'),
279279

280-
prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
280+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""})) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
281281
+ prometheus.withInstant(true)
282282
+ prometheus.withFormat('table'),
283283
])

dashboards/resources/multi-cluster.libsonnet

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,27 +99,27 @@ local var = g.dashboard.variable;
9999
cpuUsage: [
100100
tsPanel.new('CPU Usage')
101101
+ tsPanel.queryOptions.withTargets([
102-
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m) by (%(clusterLabel)s)' % $._config)
102+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) by (%(clusterLabel)s)' % $._config)
103103
+ prometheus.withLegendFormat('__auto'),
104104
]),
105105
],
106106

107107
cpuQuota: [
108108
g.panel.table.new('CPU Quota')
109109
+ g.panel.table.queryOptions.withTargets([
110-
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m) by (%(clusterLabel)s)' % $._config)
110+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) by (%(clusterLabel)s)' % $._config)
111111
+ prometheus.withInstant(true)
112112
+ prometheus.withFormat('table'),
113113
prometheus.new('${datasource}', 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
114114
+ prometheus.withInstant(true)
115115
+ prometheus.withFormat('table'),
116-
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
116+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
117117
+ prometheus.withInstant(true)
118118
+ prometheus.withFormat('table'),
119119
prometheus.new('${datasource}', 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
120120
+ prometheus.withInstant(true)
121121
+ prometheus.withFormat('table'),
122-
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
122+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
123123
+ prometheus.withInstant(true)
124124
+ prometheus.withFormat('table'),
125125
])
@@ -197,7 +197,7 @@ local var = g.dashboard.variable;
197197
+ tsPanel.standardOptions.withUnit('bytes')
198198
+ tsPanel.queryOptions.withTargets([
199199
// Not using container_memory_usage_bytes here because that includes page cache
200-
prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s)' % $._config)
200+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, container!=""})) by (%(clusterLabel)s)' % $._config)
201201
+ prometheus.withLegendFormat('__auto'),
202202
]),
203203
],
@@ -206,19 +206,19 @@ local var = g.dashboard.variable;
206206
g.panel.table.new('Memory Requests by Cluster')
207207
+ g.panel.table.standardOptions.withUnit('bytes')
208208
+ g.panel.table.queryOptions.withTargets([
209-
prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s)' % $._config)
209+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, container!=""})) by (%(clusterLabel)s)' % $._config)
210210
+ prometheus.withInstant(true)
211211
+ prometheus.withFormat('table'),
212212
prometheus.new('${datasource}', 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config)
213213
+ prometheus.withInstant(true)
214214
+ prometheus.withFormat('table'),
215-
prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config)
215+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, container!=""})) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config)
216216
+ prometheus.withInstant(true)
217217
+ prometheus.withFormat('table'),
218218
prometheus.new('${datasource}', 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config)
219219
+ prometheus.withInstant(true)
220220
+ prometheus.withFormat('table'),
221-
prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config)
221+
prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, container!=""})) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config)
222222
+ prometheus.withInstant(true)
223223
+ prometheus.withFormat('table'),
224224
])

0 commit comments

Comments
 (0)