diff --git a/dashboards/resources/namespace.libsonnet b/dashboards/resources/namespace.libsonnet index cbe3e1736..fea1d4eaf 100644 --- a/dashboards/resources/namespace.libsonnet +++ b/dashboards/resources/namespace.libsonnet @@ -1,9 +1,11 @@ +local defaultQueries = import './queries/namespace.libsonnet'; +local defaultVariables = import './variables/namespace.libsonnet'; local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + local prometheus = g.query.prometheus; local stat = g.panel.stat; local table = g.panel.table; local timeSeries = g.panel.timeSeries; -local var = g.dashboard.variable; { local statPanel(title, unit, query) = @@ -34,48 +36,15 @@ local var = g.dashboard.variable; grafanaDashboards+:: { 'k8s-resources-namespace.json': - local variables = { - datasource: - var.datasource.new('datasource', 'prometheus') - + var.datasource.withRegex($._config.datasourceFilterRegex) - + var.datasource.generalOptions.showOnDashboard.withLabelAndValue() - + var.datasource.generalOptions.withLabel('Data source') - + { - current: { - selected: true, - text: $._config.datasourceName, - value: $._config.datasourceName, - }, - }, + // Allow overriding queries via $._queries.namespace, otherwise use default + local queries = if std.objectHas($, '_queries') && std.objectHas($._queries, 'namespace') + then $._queries.namespace + else defaultQueries; - cluster: - var.query.new('cluster') - + var.query.withDatasourceFromVariable(self.datasource) - + var.query.queryTypes.withLabelValues( - $._config.clusterLabel, - 'up{%(kubeStateMetricsSelector)s}' % $._config, - ) - + var.query.generalOptions.withLabel('cluster') - + var.query.refresh.onTime() - + ( - if $._config.showMultiCluster - then var.query.generalOptions.showOnDashboard.withLabelAndValue() - else var.query.generalOptions.showOnDashboard.withNothing() - ) - + var.query.withSort(type='alphabetical'), - - namespace: - var.query.new('namespace') - + var.query.withDatasourceFromVariable(self.datasource) - + var.query.queryTypes.withLabelValues( - 'namespace', - 'kube_namespace_status_phase{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster"}' % $._config, - ) - + var.query.generalOptions.withLabel('namespace') - + var.query.refresh.onTime() - + var.query.generalOptions.showOnDashboard.withLabelAndValue() - + var.query.withSort(type='alphabetical'), - }; + // Allow overriding variables via $._variables.namespace, otherwise use default + local variables = if std.objectHas($, '_variables') && std.objectHas($._variables, 'namespace') + then $._variables.namespace($._config) + else defaultVariables.namespace($._config); local links = { pod: { @@ -91,7 +60,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Utilisation (from requests)', 'percentunit', - 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config + queries.cpuUtilisationFromRequests($._config) ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -99,7 +68,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Utilisation (from limits)', 'percentunit', - 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config + queries.cpuUtilisationFromLimits($._config) ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -107,7 +76,7 @@ local var = g.dashboard.variable; statPanel( 'Memory Utilisation (from requests)', 'percentunit', - 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""})) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config + queries.memoryUtilisationFromRequests($._config) ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -115,7 +84,7 @@ local var = g.dashboard.variable; statPanel( 'Memory Utilisation (from limits)', 'percentunit', - 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""})) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config + queries.memoryUtilisationFromLimits($._config) ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -125,19 +94,19 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % $._config + queries.cpuUsageByPod($._config) ) + prometheus.withLegendFormat('__auto'), prometheus.new( '${datasource}', - 'scalar(max(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"}))' % $._config + queries.cpuQuotaRequests($._config) ) + prometheus.withLegendFormat('quota - requests'), prometheus.new( '${datasource}', - 'scalar(max(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="limits.cpu"}))' % $._config + queries.cpuQuotaLimits($._config) ) + prometheus.withLegendFormat('quota - limits'), ]) @@ -197,19 +166,19 @@ local var = g.dashboard.variable; table.new('CPU Quota') + table.gridPos.withW(24) + table.queryOptions.withTargets([ - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.cpuUsageByPod($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.cpuRequestsByPod($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.cpuUsageVsRequests($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.cpuLimitsByPod($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.cpuUsageVsLimits($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -287,19 +256,19 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", container!="", image!=""})) by (pod)' % $._config + queries.memoryUsageByPod($._config) ) + prometheus.withLegendFormat('__auto'), prometheus.new( '${datasource}', - 'scalar(max(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.memory"}))' % $._config + queries.memoryQuotaRequests($._config) ) + prometheus.withLegendFormat('quota - requests'), prometheus.new( '${datasource}', - 'scalar(max(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="limits.memory"}))' % $._config + queries.memoryQuotaLimits($._config) ) + prometheus.withLegendFormat('quota - limits'), ]) @@ -360,28 +329,28 @@ local var = g.dashboard.variable; + table.gridPos.withW(24) + table.standardOptions.withUnit('bytes') + table.queryOptions.withTargets([ - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageByPod($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.memoryRequestsByPod($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""})) by (pod) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageVsRequests($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.memoryLimitsByPod($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""})) by (pod) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageVsLimits($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageRSS($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageCache($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_swap{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""})) by (pod)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageSwap($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -468,27 +437,27 @@ local var = g.dashboard.variable; table.new('Current Network Usage') + table.gridPos.withW(24) + table.queryOptions.withTargets([ - prometheus.new('${datasource}', 'sum(rate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + prometheus.new('${datasource}', queries.networkReceiveBandwidth($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + prometheus.new('${datasource}', queries.networkTransmitBandwidth($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(rate(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + prometheus.new('${datasource}', queries.networkReceivePackets($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(rate(container_network_transmit_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + prometheus.new('${datasource}', queries.networkTransmitPackets($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(rate(container_network_receive_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + prometheus.new('${datasource}', queries.networkReceivePacketsDropped($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(rate(container_network_transmit_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config) + prometheus.new('${datasource}', queries.networkTransmitPacketsDropped($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -582,7 +551,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(rate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.networkReceiveBandwidthTimeSeries($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -592,7 +561,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(rate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.networkTransmitBandwidthTimeSeries($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -602,7 +571,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.rateOfReceivedPackets($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -612,7 +581,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.rateOfTransmittedPackets($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -622,7 +591,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.rateOfReceivedPacketsDropped($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -632,7 +601,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.rateOfTransmittedPacketsDropped($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -642,7 +611,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'ceil(sum by(pod) (rate(container_fs_reads_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s])))' % $._config + queries.iopsReadsWrites($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -652,7 +621,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum by(pod) (rate(container_fs_reads_bytes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config + queries.throughputReadWrite($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -660,27 +629,27 @@ local var = g.dashboard.variable; table.new('Current Storage IO') + table.gridPos.withW(24) + table.queryOptions.withTargets([ - prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageReads($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageWrites($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageReadsPlusWrites($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageReadBytes($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageWriteBytes($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum by(pod) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageReadPlusWriteBytes($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) diff --git a/dashboards/resources/pod.libsonnet b/dashboards/resources/pod.libsonnet index fdd0320db..1d93e4779 100644 --- a/dashboards/resources/pod.libsonnet +++ b/dashboards/resources/pod.libsonnet @@ -1,8 +1,10 @@ +local defaultQueries = import './queries/pod.libsonnet'; +local defaultVariables = import './variables/pod.libsonnet'; local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + local prometheus = g.query.prometheus; local table = g.panel.table; local timeSeries = g.panel.timeSeries; -local var = g.dashboard.variable; { local tsPanel = @@ -23,70 +25,15 @@ local var = g.dashboard.variable; grafanaDashboards+:: { 'k8s-resources-pod.json': - local variables = { - datasource: - var.datasource.new('datasource', 'prometheus') - + var.datasource.withRegex($._config.datasourceFilterRegex) - + var.datasource.generalOptions.showOnDashboard.withLabelAndValue() - + var.datasource.generalOptions.withLabel('Data source') - + { - current: { - selected: true, - text: $._config.datasourceName, - value: $._config.datasourceName, - }, - }, + // Allow overriding queries via $._queries.pod, otherwise use default + local queries = if std.objectHas($, '_queries') && std.objectHas($._queries, 'pod') + then $._queries.pod + else defaultQueries; - cluster: - var.query.new('cluster') - + var.query.withDatasourceFromVariable(self.datasource) - + var.query.queryTypes.withLabelValues( - $._config.clusterLabel, - 'up{%(kubeStateMetricsSelector)s}' % $._config, - ) - + var.query.generalOptions.withLabel('cluster') - + var.query.refresh.onTime() - + ( - if $._config.showMultiCluster - then var.query.generalOptions.showOnDashboard.withLabelAndValue() - else var.query.generalOptions.showOnDashboard.withNothing() - ) - + var.query.withSort(type='alphabetical'), - - namespace: - var.query.new('namespace') - + var.query.withDatasourceFromVariable(self.datasource) - + var.query.queryTypes.withLabelValues( - 'namespace', - 'kube_namespace_status_phase{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster"}' % $._config, - ) - + var.query.generalOptions.withLabel('namespace') - + var.query.refresh.onTime() - + var.query.generalOptions.showOnDashboard.withLabelAndValue() - + var.query.withSort(type='alphabetical'), - - pod: - var.query.new('pod') - + var.query.withDatasourceFromVariable(self.datasource) - + var.query.queryTypes.withLabelValues( - 'pod', - 'kube_pod_info{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, - ) - + var.query.generalOptions.withLabel('pod') - + var.query.refresh.onTime() - + var.query.generalOptions.showOnDashboard.withLabelAndValue() - + var.query.withSort(type='alphabetical'), - }; - - local cpuRequestsQuery = ||| - sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} - ) - ||| % $._config; - - local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits'); - local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu', 'memory'); - local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu', 'memory'); + // Allow overriding variables via $._variables.pod, otherwise use default + local variables = if std.objectHas($, '_variables') && std.objectHas($._variables, 'pod') + then $._variables.pod($._config) + else defaultVariables.pod($._config); local panels = [ tsPanel.new('CPU Usage') @@ -94,14 +41,14 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{namespace="$namespace", pod="$pod", %(clusterLabel)s="$cluster", container!=""})) by (container)' % $._config + queries.cpuUsageByContainer($._config) ) + prometheus.withLegendFormat('__auto'), - prometheus.new('${datasource}', cpuRequestsQuery) + prometheus.new('${datasource}', queries.cpuRequests($._config)) + prometheus.withLegendFormat('requests'), - prometheus.new('${datasource}', cpuLimitsQuery) + prometheus.new('${datasource}', queries.cpuLimits($._config)) + prometheus.withLegendFormat('limits'), ]) + tsPanel.standardOptions.withOverrides([ @@ -163,7 +110,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(increase(container_cpu_cfs_throttled_periods_total{%(cadvisorSelector)s, namespace="$namespace", pod="$pod", container!="", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s])) by (container) /sum(increase(container_cpu_cfs_periods_total{%(cadvisorSelector)s, namespace="$namespace", pod="$pod", container!="", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s])) by (container)' % $._config + queries.cpuThrottling($._config) ) + prometheus.withLegendFormat('__auto'), ]) @@ -208,19 +155,19 @@ local var = g.dashboard.variable; table.new('CPU Quota') + table.gridPos.withW(24) + table.queryOptions.withTargets([ - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container)' % $._config) + prometheus.new('${datasource}', queries.cpuUsageByContainer($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container)' % $._config) + prometheus.new('${datasource}', queries.cpuRequestsByContainer($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container)' % $._config) + prometheus.new('${datasource}', queries.cpuUsageVsRequests($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container)' % $._config) + prometheus.new('${datasource}', queries.cpuLimitsByContainer($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container)' % $._config) + prometheus.new('${datasource}', queries.cpuUsageVsLimits($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -286,14 +233,14 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""})) by (container)' % $._config + queries.memoryUsageWSS($._config) ) + prometheus.withLegendFormat('__auto'), - prometheus.new('${datasource}', memRequestsQuery) + prometheus.new('${datasource}', queries.memoryRequests($._config)) + prometheus.withLegendFormat('requests'), - prometheus.new('${datasource}', memLimitsQuery) + prometheus.new('${datasource}', queries.memoryLimits($._config)) + prometheus.withLegendFormat('limits'), ]) + tsPanel.standardOptions.withOverrides([ @@ -353,28 +300,28 @@ local var = g.dashboard.variable; + table.gridPos.withW(24) + table.standardOptions.withUnit('bytes') + table.queryOptions.withTargets([ - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""})) by (container)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageWSS($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})) by (container)' % $._config) + prometheus.new('${datasource}', queries.memoryRequestsByContainer($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", image!=""})) by (container) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})) by (container)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageVsRequests($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})) by (container)' % $._config) + prometheus.new('${datasource}', queries.memoryLimitsByContainer($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""})) by (container) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})) by (container)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageVsLimits($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"})) by (container)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageRSS($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"})) by (container)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageCache($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_swap{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"})) by (container)' % $._config) + prometheus.new('${datasource}', queries.memoryUsageSwap($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -452,7 +399,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(irate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.networkReceiveBandwidth($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -462,7 +409,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.networkTransmitBandwidth($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -472,7 +419,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(rate(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.networkReceivePackets($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -482,7 +429,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(rate(container_network_transmit_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.networkTransmitPackets($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -492,7 +439,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(rate(container_network_receive_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.networkReceivePacketsDropped($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -502,7 +449,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum(rate(container_network_transmit_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % $._config + queries.networkTransmitPacketsDropped($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -512,12 +459,12 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'ceil(sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])))' % $._config + queries.iopsPodReads($._config) ) + prometheus.withLegendFormat('Reads'), prometheus.new( '${datasource}', - 'ceil(sum by(pod) (rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster",namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])))' % $._config + queries.iopsPodWrites($._config) ) + prometheus.withLegendFormat('Writes'), ]), @@ -527,12 +474,12 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum by(pod) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s]))' % $._config + queries.throughputPodRead($._config) ) + prometheus.withLegendFormat('Reads'), prometheus.new( '${datasource}', - 'sum by(pod) (rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s]))' % $._config + queries.throughputPodWrite($._config) ) + prometheus.withLegendFormat('Writes'), ]), @@ -542,7 +489,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'ceil(sum by(container) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s])))' % $._config + queries.iopsContainersCombined($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -552,7 +499,7 @@ local var = g.dashboard.variable; + tsPanel.queryOptions.withTargets([ prometheus.new( '${datasource}', - 'sum by(container) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % $._config + queries.throughputContainersCombined($._config) ) + prometheus.withLegendFormat('__auto'), ]), @@ -560,27 +507,27 @@ local var = g.dashboard.variable; table.new('Current Storage IO') + table.gridPos.withW(24) + table.queryOptions.withTargets([ - prometheus.new('${datasource}', 'sum by(container) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageReads($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum by(container) (rate(container_fs_writes_total{%(cadvisorSelector)s,%(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageWrites($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum by(container) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageReadsPlusWrites($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum by(container) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageReadBytes($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum by(container) (rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageWriteBytes($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum by(container) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % $._config) + prometheus.new('${datasource}', queries.storageReadPlusWriteBytes($._config)) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) diff --git a/dashboards/resources/queries/namespace.libsonnet b/dashboards/resources/queries/namespace.libsonnet new file mode 100644 index 000000000..931a242e6 --- /dev/null +++ b/dashboards/resources/queries/namespace.libsonnet @@ -0,0 +1,133 @@ +{ + // CPU Utilization Stat Queries + cpuUtilisationFromRequests(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % config, + + cpuUtilisationFromLimits(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % config, + + memoryUtilisationFromRequests(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""})) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % config, + + memoryUtilisationFromLimits(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""})) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % config, + + // CPU Usage TimeSeries Queries + cpuUsageByPod(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % config, + + cpuQuotaRequests(config):: + 'scalar(max(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"}))' % config, + + cpuQuotaLimits(config):: + 'scalar(max(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="limits.cpu"}))' % config, + + // CPU Quota Table Queries + cpuRequestsByPod(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % config, + + cpuUsageVsRequests(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % config, + + cpuLimitsByPod(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % config, + + cpuUsageVsLimits(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % config, + + // Memory Usage TimeSeries Queries + memoryUsageByPod(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", container!="", image!=""})) by (pod)' % config, + + memoryQuotaRequests(config):: + 'scalar(max(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="requests.memory"}))' % config, + + memoryQuotaLimits(config):: + 'scalar(max(kube_resourcequota{%(clusterLabel)s="$cluster", namespace="$namespace", type="hard",resource="limits.memory"}))' % config, + + // Memory Quota Table Queries + memoryRequestsByPod(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % config, + + memoryUsageVsRequests(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""})) by (pod) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % config, + + memoryLimitsByPod(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % config, + + memoryUsageVsLimits(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""})) by (pod) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"})) by (pod)' % config, + + memoryUsageRSS(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""})) by (pod)' % config, + + memoryUsageCache(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""})) by (pod)' % config, + + memoryUsageSwap(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_swap{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""})) by (pod)' % config, + + // Network Table Queries + networkReceiveBandwidth(config):: + 'sum(rate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkTransmitBandwidth(config):: + 'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkReceivePackets(config):: + 'sum(rate(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkTransmitPackets(config):: + 'sum(rate(container_network_transmit_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkReceivePacketsDropped(config):: + 'sum(rate(container_network_receive_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkTransmitPacketsDropped(config):: + 'sum(rate(container_network_transmit_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + // Network TimeSeries Queries (using different functions - rate vs irate) + networkReceiveBandwidthTimeSeries(config):: + 'sum(rate(container_network_receive_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkTransmitBandwidthTimeSeries(config):: + 'sum(rate(container_network_transmit_bytes_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + rateOfReceivedPackets(config):: + 'sum(irate(container_network_receive_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + rateOfTransmittedPackets(config):: + 'sum(irate(container_network_transmit_packets_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + rateOfReceivedPacketsDropped(config):: + 'sum(irate(container_network_receive_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + rateOfTransmittedPacketsDropped(config):: + 'sum(irate(container_network_transmit_packets_dropped_total{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + // Storage TimeSeries Queries + iopsReadsWrites(config):: + 'ceil(sum by(pod) (rate(container_fs_reads_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s])))' % config, + + throughputReadWrite(config):: + 'sum by(pod) (rate(container_fs_reads_bytes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(containerfsSelector)s, %(diskDeviceSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % config, + + // Storage Table Queries + storageReads(config):: + 'sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % config, + + storageWrites(config):: + 'sum by(pod) (rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % config, + + storageReadsPlusWrites(config):: + 'sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % config, + + storageReadBytes(config):: + 'sum by(pod) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % config, + + storageWriteBytes(config):: + 'sum by(pod) (rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % config, + + storageReadPlusWriteBytes(config):: + 'sum by(pod) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}[%(grafanaIntervalVar)s]))' % config, +} diff --git a/dashboards/resources/queries/pod.libsonnet b/dashboards/resources/queries/pod.libsonnet new file mode 100644 index 000000000..e11bbcd34 --- /dev/null +++ b/dashboards/resources/queries/pod.libsonnet @@ -0,0 +1,133 @@ +{ + // CPU Queries + cpuUsageByContainer(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{namespace="$namespace", pod="$pod", %(clusterLabel)s="$cluster", container!=""})) by (container)' % config, + + cpuRequests(config):: + ||| + sum( + kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} + ) + ||| % config, + + cpuLimits(config):: + ||| + sum( + kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} + ) + ||| % config, + + cpuThrottling(config):: + 'sum(increase(container_cpu_cfs_throttled_periods_total{%(cadvisorSelector)s, namespace="$namespace", pod="$pod", container!="", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s])) by (container) /sum(increase(container_cpu_cfs_periods_total{%(cadvisorSelector)s, namespace="$namespace", pod="$pod", container!="", %(clusterLabel)s="$cluster"}[%(grafanaIntervalVar)s])) by (container)' % config, + + // CPU Quota Table Queries + cpuRequestsByContainer(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container)' % config, + + cpuUsageVsRequests(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container)' % config, + + cpuLimitsByContainer(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container)' % config, + + cpuUsageVsLimits(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""})) by (container)' % config, + + // Memory Queries + memoryUsageWSS(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""})) by (container)' % config, + + memoryRequests(config):: + ||| + sum( + kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="memory"} + ) + ||| % config, + + memoryLimits(config):: + ||| + sum( + kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="memory"} + ) + ||| % config, + + // Memory Quota Table Queries + memoryRequestsByContainer(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})) by (container)' % config, + + memoryUsageVsRequests(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", image!=""})) by (container) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})) by (container)' % config, + + memoryLimitsByContainer(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})) by (container)' % config, + + memoryUsageVsLimits(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""})) by (container) / sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"})) by (container)' % config, + + memoryUsageRSS(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"})) by (container)' % config, + + memoryUsageCache(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"})) by (container)' % config, + + memoryUsageSwap(config):: + 'sum(max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_swap{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"})) by (container)' % config, + + // Network Queries + networkReceiveBandwidth(config):: + 'sum(irate(container_network_receive_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkTransmitBandwidth(config):: + 'sum(rate(container_network_transmit_bytes_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkReceivePackets(config):: + 'sum(rate(container_network_receive_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkTransmitPackets(config):: + 'sum(rate(container_network_transmit_packets_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkReceivePacketsDropped(config):: + 'sum(rate(container_network_receive_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + networkTransmitPacketsDropped(config):: + 'sum(rate(container_network_transmit_packets_dropped_total{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])) by (pod)' % config, + + // Storage Queries - Pod Level + iopsPodReads(config):: + 'ceil(sum by(pod) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])))' % config, + + iopsPodWrites(config):: + 'ceil(sum by(pod) (rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster",namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s])))' % config, + + throughputPodRead(config):: + 'sum by(pod) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s]))' % config, + + throughputPodWrite(config):: + 'sum by(pod) (rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod=~"$pod"}[%(grafanaIntervalVar)s]))' % config, + + // Storage Queries - Container Level + iopsContainersCombined(config):: + 'ceil(sum by(container) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s])))' % config, + + throughputContainersCombined(config):: + 'sum by(container) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % config, + + // Storage Table Queries + storageReads(config):: + 'sum by(container) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % config, + + storageWrites(config):: + 'sum by(container) (rate(container_fs_writes_total{%(cadvisorSelector)s,%(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % config, + + storageReadsPlusWrites(config):: + 'sum by(container) (rate(container_fs_reads_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % config, + + storageReadBytes(config):: + 'sum by(container) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % config, + + storageWriteBytes(config):: + 'sum by(container) (rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % config, + + storageReadPlusWriteBytes(config):: + 'sum by(container) (rate(container_fs_reads_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]) + rate(container_fs_writes_bytes_total{%(cadvisorSelector)s, %(diskDeviceSelector)s, %(containerfsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[%(grafanaIntervalVar)s]))' % config, +} diff --git a/dashboards/resources/variables/cluster.libsonnet b/dashboards/resources/variables/cluster.libsonnet index c670ea07d..d2126c844 100644 --- a/dashboards/resources/variables/cluster.libsonnet +++ b/dashboards/resources/variables/cluster.libsonnet @@ -1,37 +1,10 @@ -local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; -local var = g.dashboard.variable; +local common = import './common.libsonnet'; { - // Cluster dashboard variables - // Returns both datasource and cluster variables - cluster(config):: { - datasource: - var.datasource.new('datasource', 'prometheus') - + var.datasource.withRegex(config.datasourceFilterRegex) - + var.datasource.generalOptions.showOnDashboard.withLabelAndValue() - + var.datasource.generalOptions.withLabel('Data source') - + { - current: { - selected: true, - text: config.datasourceName, - value: config.datasourceName, - }, - }, - - cluster: - var.query.new('cluster') - + var.query.withDatasourceFromVariable(self.datasource) - + var.query.queryTypes.withLabelValues( - config.clusterLabel, - 'up{%(cadvisorSelector)s}' % config, - ) - + var.query.generalOptions.withLabel('cluster') - + var.query.refresh.onTime() - + ( - if config.showMultiCluster - then var.query.generalOptions.showOnDashboard.withLabelAndValue() - else var.query.generalOptions.showOnDashboard.withNothing() - ) - + var.query.withSort(type='alphabetical'), - }, + cluster(config):: + local datasource = common.datasource(config); + { + datasource: datasource, + cluster: common.cluster(config, datasource, 'up{%(cadvisorSelector)s}'), + }, } diff --git a/dashboards/resources/variables/common.libsonnet b/dashboards/resources/variables/common.libsonnet new file mode 100644 index 000000000..4926fd9e2 --- /dev/null +++ b/dashboards/resources/variables/common.libsonnet @@ -0,0 +1,57 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local var = g.dashboard.variable; + +{ + datasource(config):: + var.datasource.new('datasource', 'prometheus') + + var.datasource.withRegex(config.datasourceFilterRegex) + + var.datasource.generalOptions.showOnDashboard.withLabelAndValue() + + var.datasource.generalOptions.withLabel('Data source') + + { + current: { + selected: true, + text: config.datasourceName, + value: config.datasourceName, + }, + }, + + cluster(config, datasourceVar, selectorTemplate):: + var.query.new('cluster') + + var.query.withDatasourceFromVariable(datasourceVar) + + var.query.queryTypes.withLabelValues( + config.clusterLabel, + selectorTemplate % config, + ) + + var.query.generalOptions.withLabel('cluster') + + var.query.refresh.onTime() + + ( + if config.showMultiCluster + then var.query.generalOptions.showOnDashboard.withLabelAndValue() + else var.query.generalOptions.showOnDashboard.withNothing() + ) + + var.query.withSort(type='alphabetical'), + + namespace(config, datasourceVar):: + var.query.new('namespace') + + var.query.withDatasourceFromVariable(datasourceVar) + + var.query.queryTypes.withLabelValues( + 'namespace', + 'kube_namespace_status_phase{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster"}' % config, + ) + + var.query.generalOptions.withLabel('namespace') + + var.query.refresh.onTime() + + var.query.generalOptions.showOnDashboard.withLabelAndValue() + + var.query.withSort(type='alphabetical'), + + pod(config, datasourceVar):: + var.query.new('pod') + + var.query.withDatasourceFromVariable(datasourceVar) + + var.query.queryTypes.withLabelValues( + 'pod', + 'kube_pod_info{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace"}' % config, + ) + + var.query.generalOptions.withLabel('pod') + + var.query.refresh.onTime() + + var.query.generalOptions.showOnDashboard.withLabelAndValue() + + var.query.withSort(type='alphabetical'), +} diff --git a/dashboards/resources/variables/namespace.libsonnet b/dashboards/resources/variables/namespace.libsonnet new file mode 100644 index 000000000..091173361 --- /dev/null +++ b/dashboards/resources/variables/namespace.libsonnet @@ -0,0 +1,12 @@ +local common = import './common.libsonnet'; + +{ + namespace(config):: + local datasource = common.datasource(config); + local clusterVar = common.cluster(config, datasource, 'up{%(kubeStateMetricsSelector)s}'); + { + datasource: datasource, + cluster: clusterVar, + namespace: common.namespace(config, datasource), + }, +} diff --git a/dashboards/resources/variables/pod.libsonnet b/dashboards/resources/variables/pod.libsonnet new file mode 100644 index 000000000..37f7204bd --- /dev/null +++ b/dashboards/resources/variables/pod.libsonnet @@ -0,0 +1,13 @@ +local common = import './common.libsonnet'; + +{ + pod(config):: + local datasource = common.datasource(config); + local clusterVar = common.cluster(config, datasource, 'up{%(kubeStateMetricsSelector)s}'); + { + datasource: datasource, + cluster: clusterVar, + namespace: common.namespace(config, datasource), + pod: common.pod(config, datasource), + }, +}