Skip to content
8 changes: 4 additions & 4 deletions dashboards/resources/cluster.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ local var = g.dashboard.variable;
+ tsPanel.queryOptions.withTargets([
prometheus.new(
'${datasource}',
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The recording rule says node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m, but all the code changes say node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate.

)
+ prometheus.withLegendFormat('__auto'),
]),
Expand All @@ -144,23 +144,23 @@ local var = g.dashboard.variable;
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),

prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),

prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),

prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),

prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),

prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
])
Expand Down
8 changes: 4 additions & 4 deletions dashboards/resources/multi-cluster.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -99,27 +99,27 @@ local var = g.dashboard.variable;
cpuUsage: [
tsPanel.new('CPU Usage')
+ tsPanel.queryOptions.withTargets([
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config)
+ prometheus.withLegendFormat('__auto'),
]),
],

cpuQuota: [
g.panel.table.new('CPU Quota')
+ g.panel.table.queryOptions.withTargets([
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
])
Expand Down
12 changes: 6 additions & 6 deletions dashboards/resources/namespace.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,15 @@ local var = g.dashboard.variable;
statPanel(
'CPU Utilisation (from requests)',
'percentunit',
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config
)
+ stat.gridPos.withW(6)
+ stat.gridPos.withH(3),

statPanel(
'CPU Utilisation (from limits)',
'percentunit',
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config
)
+ stat.gridPos.withW(6)
+ stat.gridPos.withH(3),
Expand All @@ -125,7 +125,7 @@ local var = g.dashboard.variable;
+ tsPanel.queryOptions.withTargets([
prometheus.new(
'${datasource}',
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config
)
+ prometheus.withLegendFormat('__auto'),

Expand Down Expand Up @@ -197,19 +197,19 @@ local var = g.dashboard.variable;
table.new('CPU Quota')
+ table.gridPos.withW(24)
+ table.queryOptions.withTargets([
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
])
Expand Down
8 changes: 4 additions & 4 deletions dashboards/resources/node.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ local var = g.dashboard.variable;

prometheus.new(
'${datasource}',
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
)
+ prometheus.withLegendFormat('{{pod}}'),
])
Expand All @@ -105,19 +105,19 @@ local var = g.dashboard.variable;

table.new('CPU Quota')
+ table.queryOptions.withTargets([
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
])
Expand Down
8 changes: 4 additions & 4 deletions dashboards/resources/pod.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ local var = g.dashboard.variable;
+ tsPanel.queryOptions.withTargets([
prometheus.new(
'${datasource}',
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="$namespace", pod="$pod", %(clusterLabel)s="$cluster", container!=""}) by (container)' % $._config
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", %(clusterLabel)s="$cluster", container!=""}) by (container)' % $._config
)
+ prometheus.withLegendFormat('__auto'),

Expand Down Expand Up @@ -208,19 +208,19 @@ local var = g.dashboard.variable;
table.new('CPU Quota')
+ table.gridPos.withW(24)
+ table.queryOptions.withTargets([
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config)
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
])
Expand Down
2 changes: 1 addition & 1 deletion dashboards/resources/workload-namespace.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ local var = g.dashboard.variable;

local cpuUsageQuery = |||
sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type=~"$type"}
) by (workload, workload_type)
Expand Down
2 changes: 1 addition & 1 deletion dashboards/resources/workload.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ local var = g.dashboard.variable;

local cpuUsageQuery = |||
sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type=~"$type"}
) by (pod)
Expand Down
10 changes: 10 additions & 0 deletions rules/apps.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@
// Reduces cardinality of this timeseries by #cores, which makes it
// more useable in dashboards. Also, allows us to do things like
// quantile_over_time(...) which would otherwise not be possible.
record: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m',
expr: |||
sum by (%(clusterLabel)s, namespace, pod, container) (
rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!=""}[5m])
) * on (%(clusterLabel)s, namespace, pod) group_left(node) topk by (%(clusterLabel)s, namespace, pod) (
1, max by(%(clusterLabel)s, namespace, pod, node) (kube_pod_info{node!=""})
)
||| % $._config,
},
{
record: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate',
expr: |||
sum by (%(clusterLabel)s, namespace, pod, container) (
Expand Down
Loading
Loading