|
1 | | -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; |
2 | | -local dashboard = grafana.dashboard; |
3 | | -local row = grafana.row; |
4 | | -local prometheus = grafana.prometheus; |
5 | | -local template = grafana.template; |
6 | | -local graphPanel = grafana.graphPanel; |
7 | | -local promgrafonnet = import 'github.com/kubernetes-monitoring/kubernetes-mixin/lib/promgrafonnet/promgrafonnet.libsonnet'; |
8 | | -local gauge = promgrafonnet.gauge; |
9 | | - |
10 | 1 | { |
| 2 | + local nodemixin = import '../lib/prom-mixin.libsonnet', |
11 | 3 | grafanaDashboards+:: { |
12 | | - 'nodes.json': |
13 | | - local idleCPU = |
14 | | - graphPanel.new( |
15 | | - 'CPU Usage', |
16 | | - datasource='$datasource', |
17 | | - span=6, |
18 | | - format='percentunit', |
19 | | - max=1, |
20 | | - min=0, |
21 | | - stack=true, |
22 | | - ) |
23 | | - .addTarget(prometheus.target( |
24 | | - ||| |
25 | | - ( |
26 | | - (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance"}[$__rate_interval]))) |
27 | | - / ignoring(cpu) group_left |
28 | | - count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}) |
29 | | - ) |
30 | | - ||| % $._config, |
31 | | - legendFormat='{{cpu}}', |
32 | | - intervalFactor=5, |
33 | | - )); |
34 | | - |
35 | | - local systemLoad = |
36 | | - graphPanel.new( |
37 | | - 'Load Average', |
38 | | - datasource='$datasource', |
39 | | - span=6, |
40 | | - format='short', |
41 | | - min=0, |
42 | | - fill=0, |
43 | | - ) |
44 | | - .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average')) |
45 | | - .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average')) |
46 | | - .addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='15m load average')) |
47 | | - .addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle"})' % $._config, legendFormat='logical cores')); |
48 | | - |
49 | | - local memoryGraph = |
50 | | - graphPanel.new( |
51 | | - 'Memory Usage', |
52 | | - datasource='$datasource', |
53 | | - span=9, |
54 | | - format='bytes', |
55 | | - stack=true, |
56 | | - min=0, |
57 | | - ) |
58 | | - .addTarget(prometheus.target( |
59 | | - ||| |
60 | | - ( |
61 | | - node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"} |
62 | | - - |
63 | | - node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"} |
64 | | - - |
65 | | - node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"} |
66 | | - - |
67 | | - node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"} |
68 | | - ) |
69 | | - ||| % $._config, legendFormat='memory used' |
70 | | - )) |
71 | | - .addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory buffers')) |
72 | | - .addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory cached')) |
73 | | - .addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory free')); |
74 | | - |
75 | | - // TODO: It would be nicer to have a gauge that gets a 0-1 range and displays it as a percentage 0%-100%. |
76 | | - // This needs to be added upstream in the promgrafonnet library and then changed here. |
77 | | - // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout. |
78 | | - local memoryGauge = gauge.new( |
79 | | - 'Memory Usage', |
80 | | - ||| |
81 | | - 100 - |
82 | | - ( |
83 | | - avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}) |
84 | | - / |
85 | | - avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}) |
86 | | - * 100 |
87 | | - ) |
88 | | - ||| % $._config, |
89 | | - ).withLowerBeingBetter(); |
90 | | - |
91 | | - local diskIO = |
92 | | - graphPanel.new( |
93 | | - 'Disk I/O', |
94 | | - datasource='$datasource', |
95 | | - span=6, |
96 | | - min=0, |
97 | | - fill=0, |
98 | | - ) |
99 | | - // TODO: Does it make sense to have those three in the same panel? |
100 | | - .addTarget(prometheus.target( |
101 | | - 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config, |
102 | | - legendFormat='{{device}} read', |
103 | | - )) |
104 | | - .addTarget(prometheus.target( |
105 | | - 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config, |
106 | | - legendFormat='{{device}} written', |
107 | | - )) |
108 | | - .addTarget(prometheus.target( |
109 | | - 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config, |
110 | | - legendFormat='{{device}} io time', |
111 | | - )) + |
112 | | - { |
113 | | - seriesOverrides: [ |
114 | | - { |
115 | | - alias: '/ read| written/', |
116 | | - yaxis: 1, |
117 | | - }, |
118 | | - { |
119 | | - alias: '/ io time/', |
120 | | - yaxis: 2, |
121 | | - }, |
122 | | - ], |
123 | | - yaxes: [ |
124 | | - self.yaxe(format='bytes'), |
125 | | - self.yaxe(format='s'), |
126 | | - ], |
127 | | - }; |
128 | | - |
129 | | - // TODO: Somehow partition this by device while excluding read-only devices. |
130 | | - local diskSpaceUsage = |
131 | | - graphPanel.new( |
132 | | - 'Disk Space Usage', |
133 | | - datasource='$datasource', |
134 | | - span=6, |
135 | | - format='bytes', |
136 | | - min=0, |
137 | | - fill=1, |
138 | | - stack=true, |
139 | | - ) |
140 | | - .addTarget(prometheus.target( |
141 | | - ||| |
142 | | - sum( |
143 | | - max by (device) ( |
144 | | - node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s} |
145 | | - - |
146 | | - node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s} |
147 | | - ) |
148 | | - ) |
149 | | - ||| % $._config, |
150 | | - legendFormat='used', |
151 | | - )) |
152 | | - .addTarget(prometheus.target( |
153 | | - ||| |
154 | | - sum( |
155 | | - max by (device) ( |
156 | | - node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s} |
157 | | - ) |
158 | | - ) |
159 | | - ||| % $._config, |
160 | | - legendFormat='available', |
161 | | - )) + |
162 | | - { |
163 | | - seriesOverrides: [ |
164 | | - { |
165 | | - alias: 'used', |
166 | | - color: '#E0B400', |
167 | | - }, |
168 | | - { |
169 | | - alias: 'available', |
170 | | - color: '#73BF69', |
171 | | - }, |
172 | | - ], |
173 | | - }; |
174 | | - |
175 | | - local networkReceived = |
176 | | - graphPanel.new( |
177 | | - 'Network Received', |
178 | | - datasource='$datasource', |
179 | | - span=6, |
180 | | - format='bytes', |
181 | | - min=0, |
182 | | - fill=0, |
183 | | - ) |
184 | | - .addTarget(prometheus.target( |
185 | | - 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval])' % $._config, |
186 | | - legendFormat='{{device}}', |
187 | | - )); |
188 | | - |
189 | | - local networkTransmitted = |
190 | | - graphPanel.new( |
191 | | - 'Network Transmitted', |
192 | | - datasource='$datasource', |
193 | | - span=6, |
194 | | - format='bytes', |
195 | | - min=0, |
196 | | - fill=0, |
197 | | - ) |
198 | | - .addTarget(prometheus.target( |
199 | | - 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval])' % $._config, |
200 | | - legendFormat='{{device}}', |
201 | | - )); |
202 | | - |
203 | | - dashboard.new( |
204 | | - '%sNodes' % $._config.dashboardNamePrefix, |
205 | | - time_from='now-1h', |
206 | | - tags=($._config.dashboardTags), |
207 | | - timezone='utc', |
208 | | - refresh='30s', |
209 | | - graphTooltip='shared_crosshair' |
210 | | - ) |
211 | | - .addTemplate( |
212 | | - { |
213 | | - current: { |
214 | | - text: 'default', |
215 | | - value: 'default', |
216 | | - }, |
217 | | - hide: 0, |
218 | | - label: 'Data Source', |
219 | | - name: 'datasource', |
220 | | - options: [], |
221 | | - query: 'prometheus', |
222 | | - refresh: 1, |
223 | | - regex: '', |
224 | | - type: 'datasource', |
225 | | - }, |
226 | | - ) |
227 | | - .addTemplate( |
228 | | - template.new( |
229 | | - 'instance', |
230 | | - '$datasource', |
231 | | - 'label_values(node_exporter_build_info{%(nodeExporterSelector)s}, instance)' % $._config, |
232 | | - refresh='time', |
233 | | - ) |
234 | | - ) |
235 | | - .addRow( |
236 | | - row.new() |
237 | | - .addPanel(idleCPU) |
238 | | - .addPanel(systemLoad) |
239 | | - ) |
240 | | - .addRow( |
241 | | - row.new() |
242 | | - .addPanel(memoryGraph) |
243 | | - .addPanel(memoryGauge) |
244 | | - ) |
245 | | - .addRow( |
246 | | - row.new() |
247 | | - .addPanel(diskIO) |
248 | | - .addPanel(diskSpaceUsage) |
249 | | - ) |
250 | | - .addRow( |
251 | | - row.new() |
252 | | - .addPanel(networkReceived) |
253 | | - .addPanel(networkTransmitted) |
254 | | - ), |
| 4 | + 'nodes.json': nodemixin.new(config=$._config, platform='Linux').dashboard, |
| 5 | + 'nodes-darwin.json': nodemixin.new(config=$._config, platform='Darwin').dashboard, |
255 | 6 | }, |
256 | 7 | } |
0 commit comments