11local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet' ;
2+ local queries = import './queries/cluster-queries.libsonnet' ;
23
34local prometheus = g.query.prometheus;
45local stat = g.panel.stat;
@@ -80,47 +81,47 @@ local var = g.dashboard.variable;
8081 statPanel(
8182 'CPU Utilisation' ,
8283 'percentunit' ,
83- 'cluster:node_cpu:ratio_rate5m{%(clusterLabel)s="$cluster"}' % $ ._config
84+ queries.cpuUtilisation($ ._config)
8485 )
8586 + stat.gridPos.withW(4 )
8687 + stat.gridPos.withH(3 ),
8788
8889 statPanel(
8990 'CPU Requests Commitment' ,
9091 'percentunit' ,
91- 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $ ._config
92+ queries.cpuRequestsCommitment($ ._config)
9293 )
9394 + stat.gridPos.withW(4 )
9495 + stat.gridPos.withH(3 ),
9596
9697 statPanel(
9798 'CPU Limits Commitment' ,
9899 'percentunit' ,
99- 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $ ._config
100+ queries.cpuLimitsCommitment($ ._config)
100101 )
101102 + stat.gridPos.withW(4 )
102103 + stat.gridPos.withH(3 ),
103104
104105 statPanel(
105106 'Memory Utilisation' ,
106107 'percentunit' ,
107- '1 - sum(:node_memory_MemAvailable_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(node_memory_MemTotal_bytes{%(nodeExporterSelector)s,%(clusterLabel)s="$cluster"})' % $ ._config
108+ queries.memoryUtilisation($ ._config)
108109 )
109110 + stat.gridPos.withW(4 )
110111 + stat.gridPos.withH(3 ),
111112
112113 statPanel(
113114 'Memory Requests Commitment' ,
114115 'percentunit' ,
115- 'sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $ ._config
116+ queries.memoryRequestsCommitment($ ._config)
116117 )
117118 + stat.gridPos.withW(4 )
118119 + stat.gridPos.withH(3 ),
119120
120121 statPanel(
121122 'Memory Limits Commitment' ,
122123 'percentunit' ,
123- 'sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $ ._config
124+ queries.memoryLimitsCommitment($ ._config)
124125 )
125126 + stat.gridPos.withW(4 )
126127 + stat.gridPos.withH(3 ),
@@ -129,38 +130,38 @@ local var = g.dashboard.variable;
129130 + tsPanel.queryOptions.withTargets([
130131 prometheus.new(
131132 '${datasource}' ,
132- 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"})) by (namespace)' % $ ._config
133+ queries.cpuUsageByNamespace($ ._config)
133134 )
134135 + prometheus.withLegendFormat('__auto' ),
135136 ]),
136137
137138 table.new('CPU Quota' )
138139 + table.queryOptions.withTargets([
139- prometheus.new('${datasource}' , 'sum(kube_pod_owner{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster"}) by (namespace)' % $ ._config)
140+ prometheus.new('${datasource}' , queries.podsByNamespace($ ._config) )
140141 + prometheus.withInstant(true )
141142 + prometheus.withFormat('table' ),
142143
143- prometheus.new('${datasource}' , 'count(avg(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $ ._config)
144+ prometheus.new('${datasource}' , queries.workloadsByNamespace($ ._config) )
144145 + prometheus.withInstant(true )
145146 + prometheus.withFormat('table' ),
146147
147- prometheus.new('${datasource}' , 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"})) by (namespace)' % $ ._config)
148+ prometheus.new('${datasource}' , queries.cpuUsageByNamespace($ ._config) )
148149 + prometheus.withInstant(true )
149150 + prometheus.withFormat('table' ),
150151
151- prometheus.new('${datasource}' , 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $ ._config)
152+ prometheus.new('${datasource}' , queries.cpuRequestsByNamespace($ ._config) )
152153 + prometheus.withInstant(true )
153154 + prometheus.withFormat('table' ),
154155
155- prometheus.new('${datasource}' , 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"})) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $ ._config)
156+ prometheus.new('${datasource}' , queries.cpuUsageVsRequests($ ._config) )
156157 + prometheus.withInstant(true )
157158 + prometheus.withFormat('table' ),
158159
159- prometheus.new('${datasource}' , 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $ ._config)
160+ prometheus.new('${datasource}' , queries.cpuLimitsByNamespace($ ._config) )
160161 + prometheus.withInstant(true )
161162 + prometheus.withFormat('table' ),
162163
163- prometheus.new('${datasource}' , 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"})) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $ ._config)
164+ prometheus.new('${datasource}' , queries.cpuUsageVsLimits($ ._config) )
164165 + prometheus.withInstant(true )
165166 + prometheus.withFormat('table' ),
166167 ])
@@ -246,38 +247,38 @@ local var = g.dashboard.variable;
246247 + tsPanel.queryOptions.withTargets([
247248 prometheus.new(
248249 '${datasource}' ,
249- 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""})) by (namespace)' % $ ._config
250+ queries.memoryUsageByNamespace($ ._config)
250251 )
251252 + prometheus.withLegendFormat('__auto' ),
252253 ]),
253254
254255 table.new('Memory Requests by Namespace' )
255256 + table.queryOptions.withTargets([
256- prometheus.new('${datasource}' , 'sum(kube_pod_owner{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster"}) by (namespace)' % $ ._config)
257+ prometheus.new('${datasource}' , queries.podsByNamespace($ ._config) )
257258 + prometheus.withInstant(true )
258259 + prometheus.withFormat('table' ),
259260
260- prometheus.new('${datasource}' , 'count(avg(namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $ ._config)
261+ prometheus.new('${datasource}' , queries.workloadsByNamespace($ ._config) )
261262 + prometheus.withInstant(true )
262263 + prometheus.withFormat('table' ),
263264
264- prometheus.new('${datasource}' , 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""})) by (namespace)' % $ ._config)
265+ prometheus.new('${datasource}' , queries.memoryUsageByNamespace($ ._config) )
265266 + prometheus.withInstant(true )
266267 + prometheus.withFormat('table' ),
267268
268- prometheus.new('${datasource}' , 'sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $ ._config)
269+ prometheus.new('${datasource}' , queries.memoryRequestsByNamespace($ ._config) )
269270 + prometheus.withInstant(true )
270271 + prometheus.withFormat('table' ),
271272
272- prometheus.new('${datasource}' , 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""})) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $ ._config)
273+ prometheus.new('${datasource}' , queries.memoryUsageVsRequests($ ._config) )
273274 + prometheus.withInstant(true )
274275 + prometheus.withFormat('table' ),
275276
276- prometheus.new('${datasource}' , 'sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $ ._config)
277+ prometheus.new('${datasource}' , queries.memoryLimitsByNamespace($ ._config) )
277278 + prometheus.withInstant(true )
278279 + prometheus.withFormat('table' ),
279280
280- prometheus.new('${datasource}' , 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""})) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $ ._config)
281+ prometheus.new('${datasource}' , queries.memoryUsageVsLimits($ ._config) )
281282 + prometheus.withInstant(true )
282283 + prometheus.withFormat('table' ),
283284 ])
@@ -396,27 +397,27 @@ local var = g.dashboard.variable;
396397
397398 table.new('Current Network Usage' )
398399 + table.queryOptions.withTargets([
399- prometheus.new('${datasource}' , 'sum(rate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config)
400+ prometheus.new('${datasource}' , queries.networkReceiveBandwidth( $._config) )
400401 + prometheus.withInstant(true )
401402 + prometheus.withFormat('table' ),
402403
403- prometheus.new('${datasource}' , 'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config)
404+ prometheus.new('${datasource}' , queries.networkTransmitBandwidth( $._config) )
404405 + prometheus.withInstant(true )
405406 + prometheus.withFormat('table' ),
406407
407- prometheus.new('${datasource}' , 'sum(rate(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config)
408+ prometheus.new('${datasource}' , queries.networkReceivePackets( $._config) )
408409 + prometheus.withInstant(true )
409410 + prometheus.withFormat('table' ),
410411
411- prometheus.new('${datasource}' , 'sum(rate(container_network_transmit_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config)
412+ prometheus.new('${datasource}' , queries.networkTransmitPackets( $._config) )
412413 + prometheus.withInstant(true )
413414 + prometheus.withFormat('table' ),
414415
415- prometheus.new('${datasource}' , 'sum(rate(container_network_receive_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config)
416+ prometheus.new('${datasource}' , queries.networkReceivePacketsDropped( $._config) )
416417 + prometheus.withInstant(true )
417418 + prometheus.withFormat('table' ),
418419
419- prometheus.new('${datasource}' , 'sum(rate(container_network_transmit_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config)
420+ prometheus.new('${datasource}' , queries.networkTransmitPacketsDropped( $._config) )
420421 + prometheus.withInstant(true )
421422 + prometheus.withFormat('table' ),
422423 ])
@@ -510,7 +511,7 @@ local var = g.dashboard.variable;
510511 + tsPanel.queryOptions.withTargets([
511512 prometheus.new(
512513 '${datasource}' ,
513- 'sum(rate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config
514+ queries.networkReceiveBandwidth( $._config)
514515 )
515516 + prometheus.withLegendFormat('__auto' ),
516517 ]),
@@ -520,7 +521,7 @@ local var = g.dashboard.variable;
520521 + tsPanel.queryOptions.withTargets([
521522 prometheus.new(
522523 '${datasource}' ,
523- 'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config
524+ queries.networkTransmitBandwidth( $._config)
524525 )
525526 + prometheus.withLegendFormat('__auto' ),
526527 ]),
@@ -530,7 +531,7 @@ local var = g.dashboard.variable;
530531 + tsPanel.queryOptions.withTargets([
531532 prometheus.new(
532533 '${datasource}' ,
533- 'avg(irate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config
534+ queries.avgContainerReceiveBandwidth( $._config)
534535 )
535536 + prometheus.withLegendFormat('__auto' ),
536537 ]),
@@ -540,7 +541,7 @@ local var = g.dashboard.variable;
540541 + tsPanel.queryOptions.withTargets([
541542 prometheus.new(
542543 '${datasource}' ,
543- 'avg(irate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config
544+ queries.avgContainerTransmitBandwidth( $._config)
544545 )
545546 + prometheus.withLegendFormat('__auto' ),
546547 ]),
@@ -550,7 +551,7 @@ local var = g.dashboard.variable;
550551 + tsPanel.queryOptions.withTargets([
551552 prometheus.new(
552553 '${datasource}' ,
553- 'sum(irate(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config
554+ queries.rateOfReceivedPackets( $._config)
554555 )
555556 + prometheus.withLegendFormat('__auto' ),
556557 ]),
@@ -560,7 +561,7 @@ local var = g.dashboard.variable;
560561 + tsPanel.queryOptions.withTargets([
561562 prometheus.new(
562563 '${datasource}' ,
563- 'sum(irate(container_network_transmit_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config
564+ queries.rateOfTransmittedPackets( $._config)
564565 )
565566 + prometheus.withLegendFormat('__auto' ),
566567 ]),
@@ -570,7 +571,7 @@ local var = g.dashboard.variable;
570571 + tsPanel.queryOptions.withTargets([
571572 prometheus.new(
572573 '${datasource}' ,
573- 'sum(irate(container_network_receive_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config
574+ queries.rateOfReceivedPacketsDropped( $._config)
574575 )
575576 + prometheus.withLegendFormat('__auto' ),
576577 ]),
@@ -580,7 +581,7 @@ local var = g.dashboard.variable;
580581 + tsPanel.queryOptions.withTargets([
581582 prometheus.new(
582583 '${datasource}' ,
583- 'sum(irate(container_network_transmit_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s=~".+"}[%(grafanaIntervalVar)s])) by (namespace)' % $._config
584+ queries.rateOfTransmittedPacketsDropped( $._config)
584585 )
585586 + prometheus.withLegendFormat('__auto' ),
586587 ]),
@@ -590,7 +591,7 @@ local var = g.dashboard.variable;
590591 + tsPanel.queryOptions.withTargets([
591592 prometheus.new(
592593 '${datasource}' ,
593- 'ceil(sum by(namespace) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s])))' % $ ._config
594+ queries.iopsReadsWrites($ ._config)
594595 )
595596 + prometheus.withLegendFormat('__auto' ),
596597 ]),
@@ -600,34 +601,34 @@ local var = g.dashboard.variable;
600601 + tsPanel.queryOptions.withTargets([
601602 prometheus.new(
602603 '${datasource}' ,
603- 'sum by(namespace) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]))' % $ ._config
604+ queries.throughputReadWrite($ ._config)
604605 )
605606 + prometheus.withLegendFormat('__auto' ),
606607 ]),
607608
608609 table.new('Current Storage IO' )
609610 + table.queryOptions.withTargets([
610- prometheus.new('${datasource}' , 'sum by(namespace) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]))' % $ ._config)
611+ prometheus.new('${datasource}' , queries.iopsReads($ ._config) )
611612 + prometheus.withInstant(true )
612613 + prometheus.withFormat('table' ),
613614
614- prometheus.new('${datasource}' , 'sum by(namespace) (rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]))' % $ ._config)
615+ prometheus.new('${datasource}' , queries.iopsWrites($ ._config) )
615616 + prometheus.withInstant(true )
616617 + prometheus.withFormat('table' ),
617618
618- prometheus.new('${datasource}' , 'sum by(namespace) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]))' % $ ._config)
619+ prometheus.new('${datasource}' , queries.iopsReadsWritesCombined($ ._config) )
619620 + prometheus.withInstant(true )
620621 + prometheus.withFormat('table' ),
621622
622- prometheus.new('${datasource}' , 'sum by(namespace) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]))' % $ ._config)
623+ prometheus.new('${datasource}' , queries.throughputRead($ ._config) )
623624 + prometheus.withInstant(true )
624625 + prometheus.withFormat('table' ),
625626
626- prometheus.new('${datasource}' , 'sum by(namespace) (rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]))' % $ ._config)
627+ prometheus.new('${datasource}' , queries.throughputWrite($ ._config) )
627628 + prometheus.withInstant(true )
628629 + prometheus.withFormat('table' ),
629630
630- prometheus.new('${datasource}' , 'sum by(namespace) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace!=""}[%(grafanaIntervalVar)s]))' % $ ._config)
631+ prometheus.new('${datasource}' , queries.throughputReadWriteCombined($ ._config) )
631632 + prometheus.withInstant(true )
632633 + prometheus.withFormat('table' ),
633634 ])
0 commit comments