Skip to content

Commit 0720d4d

Browse files
committed
Can drilldown from cluster level to either pod or workload views
1 parent 3b28d8e commit 0720d4d

File tree

2 files changed

+107
-88
lines changed

2 files changed

+107
-88
lines changed

config.libsonnet

Lines changed: 74 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,86 @@
11
{
2-
_config+:: {
3-
// Selectors are inserted between {} in Prometheus queries.
4-
cadvisorSelector: 'job="cadvisor"',
5-
kubeletSelector: 'job="kubelet"',
6-
kubeStateMetricsSelector: 'job="kube-state-metrics"',
7-
nodeExporterSelector: 'job="node-exporter"',
8-
notKubeDnsSelector: 'job!="kube-dns"',
9-
kubeSchedulerSelector: 'job="kube-scheduler"',
10-
kubeControllerManagerSelector: 'job="kube-controller-manager"',
11-
kubeApiserverSelector: 'job="kube-apiserver"',
12-
podLabel: 'pod',
13-
namespaceSelector: null,
14-
prefixedNamespaceSelector: if self.namespaceSelector != null then self.namespaceSelector + ',' else '',
15-
hostNetworkInterfaceSelector: 'device!~"veth.+"',
16-
hostMountpointSelector: 'mountpoint="/"',
17-
wmiExporterSelector: 'job="wmi-exporter"',
2+
_config+:: {
3+
// Selectors are inserted between {} in Prometheus queries.
4+
cadvisorSelector: 'job="cadvisor"',
5+
kubeletSelector: 'job="kubelet"',
6+
kubeStateMetricsSelector: 'job="kube-state-metrics"',
7+
nodeExporterSelector: 'job="node-exporter"',
8+
notKubeDnsSelector: 'job!="kube-dns"',
9+
kubeSchedulerSelector: 'job="kube-scheduler"',
10+
kubeControllerManagerSelector: 'job="kube-controller-manager"',
11+
kubeApiserverSelector: 'job="kube-apiserver"',
12+
podLabel: 'pod',
13+
namespaceSelector: null,
14+
prefixedNamespaceSelector: if self.namespaceSelector != null then self.namespaceSelector + ',' else '',
15+
hostNetworkInterfaceSelector: 'device!~"veth.+"',
16+
hostMountpointSelector: 'mountpoint="/"',
17+
wmiExporterSelector: 'job="wmi-exporter"',
1818

19-
// We build alerts for the presence of all these jobs.
20-
jobs: {
21-
Kubelet: $._config.kubeletSelector,
22-
KubeScheduler: $._config.kubeSchedulerSelector,
23-
KubeControllerManager: $._config.kubeControllerManagerSelector,
24-
KubeAPI: $._config.kubeApiserverSelector,
25-
},
19+
// We build alerts for the presence of all these jobs.
20+
jobs: {
21+
Kubelet: $._config.kubeletSelector,
22+
KubeScheduler: $._config.kubeSchedulerSelector,
23+
KubeControllerManager: $._config.kubeControllerManagerSelector,
24+
KubeAPI: $._config.kubeApiserverSelector,
25+
},
2626

27-
// Grafana dashboard IDs are necessary for stable links for dashboards
28-
grafanaDashboardIDs: {
29-
'k8s-resources-multicluster.json': '1gBgaexoVZ4TpBNAt2eGRsc4LNjNhdjcZd6cqU6S',
30-
'k8s-resources-cluster.json': 'ZnbvYbcXkob7GLqcDPLTj1ZL4MRX87tOh8xdr831',
31-
'k8s-resources-namespace.json': 'XaY4UCP3J51an4ikqtkUGBSjLpDW4pg39xe2FuxP',
32-
'k8s-resources-pod.json': 'wU56sdGSNYZTL3eO0db3pONtVmTvsyV7w8aadbYF',
33-
'k8s-multicluster-rsrc-use.json': 'NJ9AlnsObVgj9uKiJMeAqfzMi1wihOMupcsDhlhR',
34-
'k8s-cluster-rsrc-use.json': 'uXQldxzqUNgIOUX6FyZNvqgP2vgYb78daNu4GiDc',
35-
'k8s-node-rsrc-use.json': 'E577CMUOwmPsxVVqM9lj40czM1ZPjclw7hGa7OT7',
36-
'nodes.json': 'kcb9C2QDe4IYcjiTOmYyfhsImuzxRcvwWC3YLJPS',
37-
'pods.json': 'AMK9hS0rSbSz7cKjPHcOtk6CGHFjhSHwhbQ3sedK',
38-
'statefulset.json': 'dPiBt0FRG5BNYo0XJ4L0Meoc7DWs9eL40c1CRc1g',
39-
'k8s-resources-windows-cluster.json': '4d08557fd9391b100730f2494bccac68',
40-
'k8s-resources-windows-namespace.json': '490b402361724ab1d4c45666c1fa9b6f',
41-
'k8s-resources-windows-pod.json': '40597a704a610e936dc6ed374a7ce023',
42-
'k8s-windows-cluster-rsrc-use.json': '53a43377ec9aaf2ff64dfc7a1f539334',
43-
'k8s-windows-node-rsrc-use.json': '96e7484b0bb53b74fbc2bcb7723cd40b',
44-
'k8s-resources-workloads-namespace.json': 'L29WgMrccBDauPs3Xsti3fwaKjMB6fReufWj6Gl1',
45-
'k8s-resources-workload.json': 'hZCNbUPfUqjc95N3iumVsaEVHXzaBr3IFKRFvUJf',
46-
},
27+
// Grafana dashboard IDs are necessary for stable links for dashboards
28+
grafanaDashboardIDs: {
29+
'k8s-resources-multicluster.json': '1gBgaexoVZ4TpBNAt2eGRsc4LNjNhdjcZd6cqU6S',
30+
'k8s-resources-cluster.json': 'ZnbvYbcXkob7GLqcDPLTj1ZL4MRX87tOh8xdr831',
31+
'k8s-resources-namespace.json': 'XaY4UCP3J51an4ikqtkUGBSjLpDW4pg39xe2FuxP',
32+
'k8s-resources-pod.json': 'wU56sdGSNYZTL3eO0db3pONtVmTvsyV7w8aadbYF',
33+
'k8s-multicluster-rsrc-use.json': 'NJ9AlnsObVgj9uKiJMeAqfzMi1wihOMupcsDhlhR',
34+
'k8s-cluster-rsrc-use.json': 'uXQldxzqUNgIOUX6FyZNvqgP2vgYb78daNu4GiDc',
35+
'k8s-node-rsrc-use.json': 'E577CMUOwmPsxVVqM9lj40czM1ZPjclw7hGa7OT7',
36+
'nodes.json': 'kcb9C2QDe4IYcjiTOmYyfhsImuzxRcvwWC3YLJPS',
37+
'pods.json': 'AMK9hS0rSbSz7cKjPHcOtk6CGHFjhSHwhbQ3sedK',
38+
'statefulset.json': 'dPiBt0FRG5BNYo0XJ4L0Meoc7DWs9eL40c1CRc1g',
39+
'k8s-resources-windows-cluster.json': '4d08557fd9391b100730f2494bccac68',
40+
'k8s-resources-windows-namespace.json': '490b402361724ab1d4c45666c1fa9b6f',
41+
'k8s-resources-windows-pod.json': '40597a704a610e936dc6ed374a7ce023',
42+
'k8s-windows-cluster-rsrc-use.json': '53a43377ec9aaf2ff64dfc7a1f539334',
43+
'k8s-windows-node-rsrc-use.json': '96e7484b0bb53b74fbc2bcb7723cd40b',
44+
'k8s-resources-workloads-namespace.json': 'L29WgMrccBDauPs3Xsti3fwaKjMB6fReufWj6Gl1',
45+
'k8s-resources-workload.json': 'hZCNbUPfUqjc95N3iumVsaEVHXzaBr3IFKRFvUJf',
46+
},
4747

48-
// Config for the Grafana dashboards in the Kubernetes Mixin
49-
grafanaK8s: {
50-
dashboardNamePrefix: 'Kubernetes / ',
51-
dashboardTags: ['kubernetes-mixin'],
48+
// Config for the Grafana dashboards in the Kubernetes Mixin
49+
grafanaK8s: {
50+
dashboardNamePrefix: 'Kubernetes / ',
51+
dashboardTags: ['kubernetes-mixin'],
5252

53-
// For links between grafana dashboards, you need to tell us if your grafana
54-
// servers under some non-root path.
55-
linkPrefix: '',
56-
},
53+
// For links between grafana dashboards, you need to tell us if your grafana
54+
// servers under some non-root path.
55+
linkPrefix: '',
56+
},
5757

58-
// We alert when the aggregate (CPU, Memory) quota for all namespaces is
59-
// greater than the amount of the resources in the cluster. We do however
60-
// allow you to overcommit if you wish.
61-
namespaceOvercommitFactor: 1.5,
62-
kubeletPodLimit: 110,
63-
certExpirationWarningSeconds: 7 * 24 * 3600,
64-
certExpirationCriticalSeconds: 1 * 24 * 3600,
65-
cpuThrottlingPercent: 25,
66-
cpuThrottlingSelector: '',
58+
// We alert when the aggregate (CPU, Memory) quota for all namespaces is
59+
// greater than the amount of the resources in the cluster. We do however
60+
// allow you to overcommit if you wish.
61+
namespaceOvercommitFactor: 1.5,
62+
kubeletPodLimit: 110,
63+
certExpirationWarningSeconds: 7 * 24 * 3600,
64+
certExpirationCriticalSeconds: 1 * 24 * 3600,
65+
cpuThrottlingPercent: 25,
66+
cpuThrottlingSelector: '',
6767

68-
// We alert when a disk is expected to fill up in four days. Depending on
69-
// the data-set it might be useful to change the sampling-time for the
70-
// prediction
71-
volumeFullPredictionSampleTime: '6h',
68+
// We alert when a disk is expected to fill up in four days. Depending on
69+
// the data-set it might be useful to change the sampling-time for the
70+
// prediction
71+
volumeFullPredictionSampleTime: '6h',
7272

7373

74-
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.
75-
showMultiCluster: false,
76-
clusterLabel: 'cluster',
74+
// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.
75+
showMultiCluster: false,
76+
clusterLabel: 'cluster',
7777

78-
// This list of filesystem is referenced in various expressions.
79-
fstypes: ['ext[234]', 'btrfs', 'xfs', 'zfs'],
80-
fstypeSelector: 'fstype=~"%s"' % std.join('|', self.fstypes),
78+
// This list of filesystem is referenced in various expressions.
79+
fstypes: ['ext[234]', 'btrfs', 'xfs', 'zfs'],
80+
fstypeSelector: 'fstype=~"%s"' % std.join('|', self.fstypes),
8181

82-
// This list of disk device names is referenced in various expressions.
83-
diskDevices: ['nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+'],
84-
diskDeviceSelector: 'device=~"%s"' % std.join('|', self.diskDevices),
85-
},
82+
// This list of disk device names is referenced in various expressions.
83+
diskDevices: ['nvme.+', 'rbd.+', 'sd.+', 'vd.+', 'xvd.+', 'dm-.+'],
84+
diskDeviceSelector: 'device=~"%s"' % std.join('|', self.diskDevices),
85+
},
8686
}

dashboards/resources.libsonnet

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,27 @@ local g = import 'grafana-builder/grafana.libsonnet';
77
namespace: {
88
alias: 'Namespace',
99
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
10+
linkTooltip: 'Drill down to pods',
11+
},
12+
'Value #A': {
13+
alias: 'Pods',
14+
linkTooltip: 'Drill down to pods',
15+
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
16+
decimals: 0,
17+
},
18+
'Value #B': {
19+
alias: 'Workloads',
20+
linkTooltip: 'Drill down to workloads',
21+
link: '%(prefix)s/d/%(uid)s/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workloads-namespace.json') },
22+
decimals: 0,
1023
},
1124
};
1225

26+
local podWorkloadColumns = [
27+
'count(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
28+
'count(avg(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $._config,
29+
];
30+
1331
g.dashboard(
1432
'%(dashboardNamePrefix)sCompute Resources / Cluster' % $._config.grafanaK8s,
1533
uid=($._config.grafanaDashboardIDs['k8s-resources-cluster.json']),
@@ -57,18 +75,18 @@ local g = import 'grafana-builder/grafana.libsonnet';
5775
g.row('CPU Quota')
5876
.addPanel(
5977
g.panel('CPU Quota') +
60-
g.tablePanel([
78+
g.tablePanel(podWorkloadColumns + [
6179
'sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
6280
'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
6381
'sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
6482
'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
6583
'sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
6684
], tableStyles {
67-
'Value #A': { alias: 'CPU Usage' },
68-
'Value #B': { alias: 'CPU Requests' },
69-
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
70-
'Value #D': { alias: 'CPU Limits' },
71-
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
85+
'Value #C': { alias: 'CPU Usage' },
86+
'Value #D': { alias: 'CPU Requests' },
87+
'Value #E': { alias: 'CPU Requests %', unit: 'percentunit' },
88+
'Value #F': { alias: 'CPU Limits' },
89+
'Value #G': { alias: 'CPU Limits %', unit: 'percentunit' },
7290
})
7391
)
7492
)
@@ -86,19 +104,20 @@ local g = import 'grafana-builder/grafana.libsonnet';
86104
g.row('Memory Requests')
87105
.addPanel(
88106
g.panel('Requests by Namespace') +
89-
g.tablePanel([
107+
g.tablePanel(podWorkloadColumns + [
90108
// Not using container_memory_usage_bytes here because that includes page cache
91109
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container_name!=""}) by (namespace)' % $._config,
92110
'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
93111
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container_name!=""}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
94112
'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
95113
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container_name!=""}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
96114
], tableStyles {
97-
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
98-
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
99-
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
100-
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
101-
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
115+
'Value #C': { alias: 'CPU Usage' },
116+
'Value #D': { alias: 'Memory Usage', unit: 'bytes' },
117+
'Value #E': { alias: 'Memory Requests', unit: 'bytes' },
118+
'Value #F': { alias: 'Memory Requests %', unit: 'percentunit' },
119+
'Value #G': { alias: 'Memory Limits', unit: 'bytes' },
120+
'Value #H': { alias: 'Memory Limits %', unit: 'percentunit' },
102121
})
103122
)
104123
) + { tags: $._config.grafanaK8s.dashboardTags },
@@ -112,7 +131,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
112131
};
113132

114133
g.dashboard(
115-
'%(dashboardNamePrefix)sCompute Resources / Namespace' % $._config.grafanaK8s,
134+
'%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s,
116135
uid=($._config.grafanaDashboardIDs['k8s-resources-namespace.json']),
117136
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
118137
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
@@ -221,7 +240,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
221240
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
222241

223242
g.dashboard(
224-
'%(dashboardNamePrefix)sCompute Resources / Workloads by Namespace' % $._config.grafanaK8s,
243+
'%(dashboardNamePrefix)sCompute Resources / Namespace (Workloads)' % $._config.grafanaK8s,
225244
uid=($._config.grafanaDashboardIDs['k8s-resources-workloads-namespace.json']),
226245
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
227246
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')

0 commit comments

Comments
 (0)