Skip to content

Commit 3991660

Browse files
authored
Merge pull request #153 from splunk/workload-dashboards
Workload dashboards
2 parents 0669b54 + 0720d4d commit 3991660

File tree

5 files changed

+289
-16
lines changed

5 files changed

+289
-16
lines changed

.circleci/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ FROM alpine:3.7
44
RUN apk --no-cache add alpine-sdk git openssl-dev
55

66
RUN git clone https://github.com/google/jsonnet && \
7-
git -C jsonnet checkout v0.10.0 && \
7+
git -C jsonnet checkout v0.12.1 && \
88
make -C jsonnet LDFLAGS=-static
99

1010
FROM circleci/golang:1.10.3-stretch

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ version: 2
22
jobs:
33
build:
44
docker:
5-
- image: tomwilkie/kubernetes-mixin-build:dont-commit-binary
5+
- image: csmarchbanks/kubernetes-mixin-build:jsonnet-0.12.1
66

77
working_directory: /go/src/github.com/kubernetes-monitoring/kubernetes-mixin
88
steps:

config.libsonnet

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
'k8s-resources-windows-pod.json': '40597a704a610e936dc6ed374a7ce023',
4343
'k8s-windows-cluster-rsrc-use.json': '53a43377ec9aaf2ff64dfc7a1f539334',
4444
'k8s-windows-node-rsrc-use.json': '96e7484b0bb53b74fbc2bcb7723cd40b',
45+
'k8s-resources-workloads-namespace.json': 'L29WgMrccBDauPs3Xsti3fwaKjMB6fReufWj6Gl1',
46+
'k8s-resources-workload.json': 'hZCNbUPfUqjc95N3iumVsaEVHXzaBr3IFKRFvUJf',
4547
},
4648

4749
// Config for the Grafana dashboards in the Kubernetes Mixin

dashboards/resources.libsonnet

Lines changed: 239 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,27 @@ local g = import 'grafana-builder/grafana.libsonnet';
77
namespace: {
88
alias: 'Namespace',
99
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
10+
linkTooltip: 'Drill down to pods',
11+
},
12+
'Value #A': {
13+
alias: 'Pods',
14+
linkTooltip: 'Drill down to pods',
15+
link: '%(prefix)s/d/%(uid)s/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-namespace.json') },
16+
decimals: 0,
17+
},
18+
'Value #B': {
19+
alias: 'Workloads',
20+
linkTooltip: 'Drill down to workloads',
21+
link: '%(prefix)s/d/%(uid)s/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workloads-namespace.json') },
22+
decimals: 0,
1023
},
1124
};
1225

26+
local podWorkloadColumns = [
27+
'count(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
28+
'count(avg(mixin_pod_workload{%(clusterLabel)s="$cluster"}) by (workload, namespace)) by (namespace)' % $._config,
29+
];
30+
1331
g.dashboard(
1432
'%(dashboardNamePrefix)sCompute Resources / Cluster' % $._config.grafanaK8s,
1533
uid=($._config.grafanaDashboardIDs['k8s-resources-cluster.json']),
@@ -57,18 +75,18 @@ local g = import 'grafana-builder/grafana.libsonnet';
5775
g.row('CPU Quota')
5876
.addPanel(
5977
g.panel('CPU Quota') +
60-
g.tablePanel([
78+
g.tablePanel(podWorkloadColumns + [
6179
'sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
6280
'sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
6381
'sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
6482
'sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
6583
'sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
6684
], tableStyles {
67-
'Value #A': { alias: 'CPU Usage' },
68-
'Value #B': { alias: 'CPU Requests' },
69-
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
70-
'Value #D': { alias: 'CPU Limits' },
71-
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
85+
'Value #C': { alias: 'CPU Usage' },
86+
'Value #D': { alias: 'CPU Requests' },
87+
'Value #E': { alias: 'CPU Requests %', unit: 'percentunit' },
88+
'Value #F': { alias: 'CPU Limits' },
89+
'Value #G': { alias: 'CPU Limits %', unit: 'percentunit' },
7290
})
7391
)
7492
)
@@ -86,19 +104,20 @@ local g = import 'grafana-builder/grafana.libsonnet';
86104
g.row('Memory Requests')
87105
.addPanel(
88106
g.panel('Requests by Namespace') +
89-
g.tablePanel([
107+
g.tablePanel(podWorkloadColumns + [
90108
// Not using container_memory_usage_bytes here because that includes page cache
91109
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container_name!=""}) by (namespace)' % $._config,
92110
'sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
93111
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container_name!=""}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
94112
'sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
95113
'sum(container_memory_rss{%(clusterLabel)s="$cluster", container_name!=""}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
96114
], tableStyles {
97-
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
98-
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
99-
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
100-
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
101-
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
115+
'Value #C': { alias: 'CPU Usage' },
116+
'Value #D': { alias: 'Memory Usage', unit: 'bytes' },
117+
'Value #E': { alias: 'Memory Requests', unit: 'bytes' },
118+
'Value #F': { alias: 'Memory Requests %', unit: 'percentunit' },
119+
'Value #G': { alias: 'Memory Limits', unit: 'bytes' },
120+
'Value #H': { alias: 'Memory Limits %', unit: 'percentunit' },
102121
})
103122
)
104123
) + { tags: $._config.grafanaK8s.dashboardTags },
@@ -112,7 +131,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
112131
};
113132

114133
g.dashboard(
115-
'%(dashboardNamePrefix)sCompute Resources / Namespace' % $._config.grafanaK8s,
134+
'%(dashboardNamePrefix)sCompute Resources / Namespace (Pods)' % $._config.grafanaK8s,
116135
uid=($._config.grafanaDashboardIDs['k8s-resources-namespace.json']),
117136
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
118137
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
@@ -179,6 +198,212 @@ local g = import 'grafana-builder/grafana.libsonnet';
179198
)
180199
) + { tags: $._config.grafanaK8s.dashboardTags },
181200

201+
'k8s-resources-workloads-namespace.json':
202+
local tableStyles = {
203+
workload: {
204+
alias: 'Workload',
205+
link: '%(prefix)s/d/%(uid)s/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-workload.json') },
206+
},
207+
workload_type: {
208+
alias: 'Workload Type',
209+
},
210+
};
211+
212+
local cpuUsageQuery = |||
213+
sum(
214+
label_replace(
215+
namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"},
216+
"pod", "$1", "pod_name", "(.*)"
217+
) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}
218+
) by (workload, workload_type)
219+
||| % $._config;
220+
221+
local cpuRequestsQuery = |||
222+
sum(
223+
kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}
224+
* on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}
225+
) by (workload, workload_type)
226+
||| % $._config;
227+
228+
local podCountQuery = 'count(mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (workload, workload_type)' % $._config;
229+
local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits');
230+
231+
local memUsageQuery = |||
232+
sum(
233+
label_replace(
234+
container_memory_usage_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container_name!=""},
235+
"pod", "$1", "pod_name", "(.*)"
236+
) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}
237+
) by (workload, workload_type)
238+
||| % $._config;
239+
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
240+
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
241+
242+
g.dashboard(
243+
'%(dashboardNamePrefix)sCompute Resources / Namespace (Workloads)' % $._config.grafanaK8s,
244+
uid=($._config.grafanaDashboardIDs['k8s-resources-workloads-namespace.json']),
245+
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
246+
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
247+
.addRow(
248+
g.row('CPU Usage')
249+
.addPanel(
250+
g.panel('CPU Usage') +
251+
g.queryPanel(cpuUsageQuery, '{{workload}} - {{workload_type}}') +
252+
g.stack,
253+
)
254+
)
255+
.addRow(
256+
g.row('CPU Quota')
257+
.addPanel(
258+
g.panel('CPU Quota') +
259+
g.tablePanel([
260+
podCountQuery,
261+
cpuUsageQuery,
262+
cpuRequestsQuery,
263+
cpuUsageQuery + '/' + cpuRequestsQuery,
264+
cpuLimitsQuery,
265+
cpuUsageQuery + '/' + cpuLimitsQuery,
266+
], tableStyles {
267+
'Value #A': { alias: 'Running Pods', decimals: 0 },
268+
'Value #B': { alias: 'CPU Usage' },
269+
'Value #C': { alias: 'CPU Requests' },
270+
'Value #D': { alias: 'CPU Requests %', unit: 'percentunit' },
271+
'Value #E': { alias: 'CPU Limits' },
272+
'Value #F': { alias: 'CPU Limits %', unit: 'percentunit' },
273+
})
274+
)
275+
)
276+
.addRow(
277+
g.row('Memory Usage')
278+
.addPanel(
279+
g.panel('Memory Usage') +
280+
g.queryPanel(memUsageQuery, '{{workload}} - {{workload_type}}') +
281+
g.stack +
282+
{ yaxes: g.yaxes('bytes') },
283+
)
284+
)
285+
.addRow(
286+
g.row('Memory Quota')
287+
.addPanel(
288+
g.panel('Memory Quota') +
289+
g.tablePanel([
290+
podCountQuery,
291+
memUsageQuery,
292+
memRequestsQuery,
293+
memUsageQuery + '/' + memRequestsQuery,
294+
memLimitsQuery,
295+
memUsageQuery + '/' + memLimitsQuery,
296+
], tableStyles {
297+
'Value #A': { alias: 'Running Pods', decimals: 0 },
298+
'Value #B': { alias: 'Memory Usage', unit: 'bytes' },
299+
'Value #C': { alias: 'Memory Requests', unit: 'bytes' },
300+
'Value #D': { alias: 'Memory Requests %', unit: 'percentunit' },
301+
'Value #E': { alias: 'Memory Limits', unit: 'bytes' },
302+
'Value #F': { alias: 'Memory Limits %', unit: 'percentunit' },
303+
})
304+
)
305+
) + { tags: $._config.grafanaK8s.dashboardTags },
306+
307+
'k8s-resources-workload.json':
308+
local tableStyles = {
309+
pod: {
310+
alias: 'Pod',
311+
link: '%(prefix)s/d/%(uid)s/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-pod.json') },
312+
},
313+
};
314+
315+
local cpuUsageQuery = |||
316+
sum(
317+
label_replace(
318+
namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"},
319+
"pod", "$1", "pod_name", "(.*)"
320+
) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"}
321+
) by (pod)
322+
||| % $._config;
323+
324+
local cpuRequestsQuery = |||
325+
sum(
326+
kube_pod_container_resource_requests_cpu_cores{%(clusterLabel)s="$cluster", namespace="$namespace"}
327+
* on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"}
328+
) by (pod)
329+
||| % $._config;
330+
331+
local cpuLimitsQuery = std.strReplace(cpuRequestsQuery, 'requests', 'limits');
332+
333+
local memUsageQuery = |||
334+
sum(
335+
label_replace(
336+
container_memory_usage_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container_name!=""},
337+
"pod", "$1", "pod_name", "(.*)"
338+
) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"}
339+
) by (pod)
340+
||| % $._config;
341+
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu_cores', 'memory_bytes');
342+
local memLimitsQuery = std.strReplace(cpuLimitsQuery, 'cpu_cores', 'memory_bytes');
343+
344+
g.dashboard(
345+
'%(dashboardNamePrefix)sCompute Resources / Workload' % $._config.grafanaK8s,
346+
uid=($._config.grafanaDashboardIDs['k8s-resources-workload.json']),
347+
).addTemplate('cluster', 'kube_pod_info', $._config.clusterLabel, hide=if $._config.showMultiCluster then 0 else 2)
348+
.addTemplate('namespace', 'kube_pod_info{%(clusterLabel)s="$cluster"}' % $._config, 'namespace')
349+
.addTemplate('workload', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace"}' % $._config, 'workload')
350+
.addTemplate('type', 'mixin_pod_workload{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload"}' % $._config, 'workload_type')
351+
.addRow(
352+
g.row('CPU Usage')
353+
.addPanel(
354+
g.panel('CPU Usage') +
355+
g.queryPanel(cpuUsageQuery, '{{pod}}') +
356+
g.stack,
357+
)
358+
)
359+
.addRow(
360+
g.row('CPU Quota')
361+
.addPanel(
362+
g.panel('CPU Quota') +
363+
g.tablePanel([
364+
cpuUsageQuery,
365+
cpuRequestsQuery,
366+
cpuUsageQuery + '/' + cpuRequestsQuery,
367+
cpuLimitsQuery,
368+
cpuUsageQuery + '/' + cpuLimitsQuery,
369+
], tableStyles {
370+
'Value #A': { alias: 'CPU Usage' },
371+
'Value #B': { alias: 'CPU Requests' },
372+
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
373+
'Value #D': { alias: 'CPU Limits' },
374+
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
375+
})
376+
)
377+
)
378+
.addRow(
379+
g.row('Memory Usage')
380+
.addPanel(
381+
g.panel('Memory Usage') +
382+
g.queryPanel(memUsageQuery, '{{pod}}') +
383+
g.stack +
384+
{ yaxes: g.yaxes('bytes') },
385+
)
386+
)
387+
.addRow(
388+
g.row('Memory Quota')
389+
.addPanel(
390+
g.panel('Memory Quota') +
391+
g.tablePanel([
392+
memUsageQuery,
393+
memRequestsQuery,
394+
memUsageQuery + '/' + memRequestsQuery,
395+
memLimitsQuery,
396+
memUsageQuery + '/' + memLimitsQuery,
397+
], tableStyles {
398+
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
399+
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
400+
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
401+
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
402+
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
403+
})
404+
)
405+
) + { tags: $._config.grafanaK8s.dashboardTags },
406+
182407
'k8s-resources-pod.json':
183408
local tableStyles = {
184409
container: {
@@ -339,7 +564,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
339564
g.panel('Memory Usage (w/o cache)') +
340565
// Not using container_memory_usage_bytes here because that includes page cache
341566
g.queryPanel('sum(container_memory_rss{container_name!=""}) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) +
342-
{ fill: 0, linewidth: 2, yaxes: g.yaxes('decbytes') },
567+
{ fill: 0, linewidth: 2, yaxes: g.yaxes('bytes') },
343568
)
344569
)
345570
.addRow(

rules/rules.libsonnet

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,52 @@
6767
)
6868
||| % $._config,
6969
},
70+
// workload aggregation for deployments
71+
{
72+
record: 'mixin_pod_workload',
73+
expr: |||
74+
sum(
75+
label_replace(
76+
label_replace(
77+
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="ReplicaSet"},
78+
"replicaset", "$1", "owner_name", "(.*)"
79+
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{%(kubeStateMetricsSelector)s},
80+
"workload", "$1", "owner_name", "(.*)"
81+
)
82+
) by (namespace, workload, pod)
83+
||| % $._config,
84+
labels: {
85+
workload_type: 'deployment',
86+
},
87+
},
88+
{
89+
record: 'mixin_pod_workload',
90+
expr: |||
91+
sum(
92+
label_replace(
93+
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="DaemonSet"},
94+
"workload", "$1", "owner_name", "(.*)"
95+
)
96+
) by (namespace, workload, pod)
97+
||| % $._config,
98+
labels: {
99+
workload_type: 'daemonset',
100+
},
101+
},
102+
{
103+
record: 'mixin_pod_workload',
104+
expr: |||
105+
sum(
106+
label_replace(
107+
kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="StatefulSet"},
108+
"workload", "$1", "owner_name", "(.*)"
109+
)
110+
) by (namespace, workload, pod)
111+
||| % $._config,
112+
labels: {
113+
workload_type: 'statefulset',
114+
},
115+
},
70116
],
71117
},
72118
{

0 commit comments

Comments
 (0)