|
8 | 8 | // This rule gives the number of windows nodes
|
9 | 9 | record: 'node:windows_node:sum',
|
10 | 10 | expr: |||
|
11 |
| - count ( |
| 11 | + count by (%(clusterLabel)s) ( |
12 | 12 | windows_system_system_up_time{%(windowsExporterSelector)s}
|
13 | 13 | )
|
14 | 14 | ||| % $._config,
|
|
17 | 17 | // This rule gives the number of CPUs per node.
|
18 | 18 | record: 'node:windows_node_num_cpu:sum',
|
19 | 19 | expr: |||
|
20 |
| - count by (instance) (sum by (instance, core) ( |
| 20 | + count by (%(clusterLabel)s, instance) (sum by (%(clusterLabel)s, instance, core) ( |
21 | 21 | windows_cpu_time_total{%(windowsExporterSelector)s}
|
22 | 22 | ))
|
23 | 23 | ||| % $._config,
|
|
26 | 26 | // CPU utilisation is % CPU is not idle.
|
27 | 27 | record: ':windows_node_cpu_utilisation:avg1m',
|
28 | 28 | expr: |||
|
29 |
| - 1 - avg(rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m])) |
| 29 | + 1 - avg by (%(clusterLabel)s) (rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m])) |
30 | 30 | ||| % $._config,
|
31 | 31 | },
|
32 | 32 | {
|
33 | 33 | // CPU utilisation is % CPU is not idle.
|
34 | 34 | record: 'node:windows_node_cpu_utilisation:avg1m',
|
35 | 35 | expr: |||
|
36 |
| - 1 - avg by (instance) ( |
| 36 | + 1 - avg by (%(clusterLabel)s, instance) ( |
37 | 37 | rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m])
|
38 | 38 | )
|
39 | 39 | ||| % $._config,
|
|
42 | 42 | record: ':windows_node_memory_utilisation:',
|
43 | 43 | expr: |||
|
44 | 44 | 1 -
|
45 |
| - sum(windows_memory_available_bytes{%(windowsExporterSelector)s}) |
| 45 | + sum by (%(clusterLabel)s) (windows_memory_available_bytes{%(windowsExporterSelector)s}) |
46 | 46 | /
|
47 |
| - sum(windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) |
| 47 | + sum by (%(clusterLabel)s) (windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) |
48 | 48 | ||| % $._config,
|
49 | 49 | },
|
50 | 50 | // Add separate rules for Free & Total, so we can aggregate across clusters
|
51 | 51 | // in dashboards.
|
52 | 52 | {
|
53 | 53 | record: ':windows_node_memory_MemFreeCached_bytes:sum',
|
54 | 54 | expr: |||
|
55 |
| - sum(windows_memory_available_bytes{%(windowsExporterSelector)s} + windows_memory_cache_bytes{%(windowsExporterSelector)s}) |
| 55 | + sum by (%(clusterLabel)s) (windows_memory_available_bytes{%(windowsExporterSelector)s} + windows_memory_cache_bytes{%(windowsExporterSelector)s}) |
56 | 56 | ||| % $._config,
|
57 | 57 | },
|
58 | 58 | {
|
|
64 | 64 | {
|
65 | 65 | record: ':windows_node_memory_MemTotal_bytes:sum',
|
66 | 66 | expr: |||
|
67 |
| - sum(windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) |
| 67 | + sum by (%(clusterLabel)s) (windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) |
68 | 68 | ||| % $._config,
|
69 | 69 | },
|
70 | 70 | {
|
71 | 71 | // Available memory per node
|
72 | 72 | // SINCE 2018-02-08
|
73 | 73 | record: 'node:windows_node_memory_bytes_available:sum',
|
74 | 74 | expr: |||
|
75 |
| - sum by (instance) ( |
| 75 | + sum by (%(clusterLabel)s, instance) ( |
76 | 76 | (windows_memory_available_bytes{%(windowsExporterSelector)s})
|
77 | 77 | )
|
78 | 78 | ||| % $._config,
|
|
81 | 81 | // Total memory per node
|
82 | 82 | record: 'node:windows_node_memory_bytes_total:sum',
|
83 | 83 | expr: |||
|
84 |
| - sum by (instance) ( |
| 84 | + sum by (%(clusterLabel)s, instance) ( |
85 | 85 | windows_os_visible_memory_bytes{%(windowsExporterSelector)s}
|
86 | 86 | )
|
87 | 87 | ||| % $._config,
|
|
111 | 111 | // Disk utilisation (ms spent, by rate() it's bound by 1 second)
|
112 | 112 | record: ':windows_node_disk_utilisation:avg_irate',
|
113 | 113 | expr: |||
|
114 |
| - avg(irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) + |
| 114 | + avg by (%(clusterLabel)s) (irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) + |
115 | 115 | irate(windows_logical_disk_write_seconds_total{%(windowsExporterSelector)s}[1m])
|
116 | 116 | )
|
117 | 117 | ||| % $._config,
|
|
120 | 120 | // Disk utilisation (ms spent, by rate() it's bound by 1 second)
|
121 | 121 | record: 'node:windows_node_disk_utilisation:avg_irate',
|
122 | 122 | expr: |||
|
123 |
| - avg by (instance) ( |
| 123 | + avg by (%(clusterLabel)s, instance) ( |
124 | 124 | (irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) +
|
125 | 125 | irate(windows_logical_disk_write_seconds_total{%(windowsExporterSelector)s}[1m]))
|
126 | 126 | )
|
|
129 | 129 | {
|
130 | 130 | record: 'node:windows_node_filesystem_usage:',
|
131 | 131 | expr: |||
|
132 |
| - max by (instance,volume)( |
| 132 | + max by (%(clusterLabel)s,instance,volume)( |
133 | 133 | (windows_logical_disk_size_bytes{%(windowsExporterSelector)s}
|
134 | 134 | - windows_logical_disk_free_bytes{%(windowsExporterSelector)s})
|
135 | 135 | / windows_logical_disk_size_bytes{%(windowsExporterSelector)s}
|
|
139 | 139 | {
|
140 | 140 | record: 'node:windows_node_filesystem_avail:',
|
141 | 141 | expr: |||
|
142 |
| - max by (instance, volume) (windows_logical_disk_free_bytes{%(windowsExporterSelector)s} / windows_logical_disk_size_bytes{%(windowsExporterSelector)s}) |
| 142 | + max by (%(clusterLabel)s, instance, volume) (windows_logical_disk_free_bytes{%(windowsExporterSelector)s} / windows_logical_disk_size_bytes{%(windowsExporterSelector)s}) |
143 | 143 | ||| % $._config,
|
144 | 144 | },
|
145 | 145 | {
|
146 | 146 | record: ':windows_node_net_utilisation:sum_irate',
|
147 | 147 | expr: |||
|
148 |
| - sum(irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m])) |
| 148 | + sum by (%(clusterLabel)s) (irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m])) |
149 | 149 | ||| % $._config,
|
150 | 150 | },
|
151 | 151 | {
|
152 | 152 | record: 'node:windows_node_net_utilisation:sum_irate',
|
153 | 153 | expr: |||
|
154 |
| - sum by (instance) ( |
| 154 | + sum by (%(clusterLabel)s, instance) ( |
155 | 155 | (irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m]))
|
156 | 156 | )
|
157 | 157 | ||| % $._config,
|
158 | 158 | },
|
159 | 159 | {
|
160 | 160 | record: ':windows_node_net_saturation:sum_irate',
|
161 | 161 | expr: |||
|
162 |
| - sum(irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m])) + |
163 |
| - sum(irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m])) |
| 162 | + sum by (%(clusterLabel)s) (irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m])) + |
| 163 | + sum by (%(clusterLabel)s) (irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m])) |
164 | 164 | ||| % $._config,
|
165 | 165 | },
|
166 | 166 | {
|
167 | 167 | record: 'node:windows_node_net_saturation:sum_irate',
|
168 | 168 | expr: |||
|
169 |
| - sum by (instance) ( |
| 169 | + sum by (%(clusterLabel)s, instance) ( |
170 | 170 | (irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m]) +
|
171 | 171 | irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m]))
|
172 | 172 | )
|
|
180 | 180 | {
|
181 | 181 | record: 'windows_pod_container_available',
|
182 | 182 | expr: |||
|
183 |
| - windows_container_available{%(windowsExporterSelector)s, container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace) |
| 183 | + windows_container_available{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) |
184 | 184 | ||| % $._config,
|
185 | 185 | },
|
186 | 186 | {
|
187 | 187 | record: 'windows_container_total_runtime',
|
188 | 188 | expr: |||
|
189 |
| - windows_container_cpu_usage_seconds_total{%(windowsExporterSelector)s, container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace) |
| 189 | + windows_container_cpu_usage_seconds_total{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) |
190 | 190 | ||| % $._config,
|
191 | 191 | },
|
192 | 192 | {
|
193 | 193 | record: 'windows_container_memory_usage',
|
194 | 194 | expr: |||
|
195 |
| - windows_container_memory_usage_commit_bytes{%(windowsExporterSelector)s, container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace) |
| 195 | + windows_container_memory_usage_commit_bytes{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) |
196 | 196 | ||| % $._config,
|
197 | 197 | },
|
198 | 198 | {
|
199 | 199 | record: 'windows_container_private_working_set_usage',
|
200 | 200 | expr: |||
|
201 |
| - windows_container_memory_usage_private_working_set_bytes{%(windowsExporterSelector)s, container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace) |
| 201 | + windows_container_memory_usage_private_working_set_bytes{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) |
202 | 202 | ||| % $._config,
|
203 | 203 | },
|
204 | 204 | {
|
205 | 205 | record: 'windows_container_network_received_bytes_total',
|
206 | 206 | expr: |||
|
207 |
| - windows_container_network_receive_bytes_total{%(windowsExporterSelector)s, container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace) |
| 207 | + windows_container_network_receive_bytes_total{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) |
208 | 208 | ||| % $._config,
|
209 | 209 | },
|
210 | 210 | {
|
211 | 211 | record: 'windows_container_network_transmitted_bytes_total',
|
212 | 212 | expr: |||
|
213 |
| - windows_container_network_transmit_bytes_total{%(windowsExporterSelector)s, container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace) |
| 213 | + windows_container_network_transmit_bytes_total{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) |
214 | 214 | ||| % $._config,
|
215 | 215 | },
|
216 | 216 | {
|
217 | 217 | record: 'kube_pod_windows_container_resource_memory_request',
|
218 | 218 | expr: |||
|
219 |
| - max by (namespace, pod, container) ( |
| 219 | + max by (%(clusterLabel)s, namespace, pod, container) ( |
220 | 220 | kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s}
|
221 |
| - ) * on(container,pod,namespace) (windows_pod_container_available) |
| 221 | + ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) |
222 | 222 | ||| % $._config,
|
223 | 223 | },
|
224 | 224 | {
|
225 | 225 | record: 'kube_pod_windows_container_resource_memory_limit',
|
226 | 226 | expr: |||
|
227 |
| - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_pod_container_available) |
| 227 | + kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) |
228 | 228 | ||| % $._config,
|
229 | 229 | },
|
230 | 230 | {
|
231 | 231 | record: 'kube_pod_windows_container_resource_cpu_cores_request',
|
232 | 232 | expr: |||
|
233 |
| - max by (namespace, pod, container) ( |
| 233 | + max by (%(clusterLabel)s, namespace, pod, container) ( |
234 | 234 | kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s}
|
235 |
| - ) * on(container,pod,namespace) (windows_pod_container_available) |
| 235 | + ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) |
236 | 236 | ||| % $._config,
|
237 | 237 | },
|
238 | 238 | {
|
239 | 239 | record: 'kube_pod_windows_container_resource_cpu_cores_limit',
|
240 | 240 | expr: |||
|
241 |
| - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_pod_container_available) |
| 241 | + kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) |
242 | 242 | ||| % $._config,
|
243 | 243 | },
|
244 | 244 | {
|
245 | 245 | record: 'namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate',
|
246 | 246 | expr: |||
|
247 |
| - sum by (namespace, pod, container) ( |
| 247 | + sum by (%(clusterLabel)s, namespace, pod, container) ( |
248 | 248 | rate(windows_container_total_runtime{}[5m])
|
249 | 249 | )
|
250 | 250 | ||| % $._config,
|
|
0 commit comments