Skip to content

Commit 0ab459a

Browse files
authored
in_node_exporter_metrics: add support for thermal_zone. (#7522)
--------- Signed-off-by: Phillip Whelan <[email protected]>
1 parent a1faa4f commit 0ab459a

File tree

9 files changed

+480
-6
lines changed

9 files changed

+480
-6
lines changed

plugins/in_node_exporter_metrics/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ set(src
1616
ne_utils.c
1717
ne_config.c
1818
ne_systemd.c
19+
ne_thermalzone.c
1920
ne.c
2021
)
2122

plugins/in_node_exporter_metrics/ne.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "ne_systemd.h"
4444
#include "ne_processes.h"
4545
#include "ne_nvme.h"
46+
#include "ne_thermalzone.h"
4647

4748
/*
4849
* Update the metrics, this function is invoked every time 'scrape_interval'
@@ -192,6 +193,7 @@ static int in_ne_init(struct flb_input_instance *in,
192193
mk_list_add(&systemd_collector._head, &ctx->collectors);
193194
mk_list_add(&processes_collector._head, &ctx->collectors);
194195
mk_list_add(&nvme_collector._head, &ctx->collectors);
196+
mk_list_add(&thermalzone_collector._head, &ctx->collectors);
195197

196198
mk_list_foreach(head, &ctx->collectors) {
197199
coll = mk_list_entry(head, struct flb_ne_collector, _head);
@@ -402,6 +404,11 @@ static struct flb_config_map config_map[] = {
402404
0, FLB_FALSE, 0,
403405
"scrape interval to collect processes metrics from the node."
404406
},
407+
{
408+
FLB_CONFIG_MAP_TIME, "collector.thermalzone.scrape_interval", "0",
409+
0, FLB_FALSE, 0,
410+
"scrape interval to collect thermal zone metrics from the node."
411+
},
405412

406413
{
407414
FLB_CONFIG_MAP_TIME, "collector.nvme.scrape_interval", "0",

plugins/in_node_exporter_metrics/ne.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
/* Default enabled metrics */
3434

3535
#ifdef __linux__
36-
#define NE_DEFAULT_ENABLED_METRICS "cpu,cpufreq,meminfo,diskstats,filesystem,uname,stat,time,loadavg,vmstat,netdev,filefd,systemd,nvme"
36+
#define NE_DEFAULT_ENABLED_METRICS "cpu,cpufreq,meminfo,diskstats,filesystem,uname,stat,time,loadavg,vmstat,netdev,filefd,systemd,nvme,thermal_zone"
3737
#elif __APPLE__
3838
#define NE_DEFAULT_ENABLED_METRICS "cpu,loadavg,meminfo,diskstats,uname,netdev"
3939
#endif
@@ -206,6 +206,11 @@ struct flb_ne {
206206

207207
/* nvme */
208208
struct cmt_gauge *nvme_info;
209+
210+
/* thermal zone */
211+
struct cmt_gauge *thermalzone_temp;
212+
struct cmt_gauge *cooling_device_cur_state;
213+
struct cmt_gauge *cooling_device_max_state;
209214
};
210215

211216
struct flb_ne_collector {
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2+
3+
/* Fluent Bit
4+
* ==========
5+
* Copyright (C) 2023 The Fluent Bit Authors
6+
*
7+
* Licensed under the Apache License, Version 2.0 (the "License");
8+
* you may not use this file except in compliance with the License.
9+
* You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
*/
19+
20+
#ifdef __linux__
21+
#include "ne_thermalzone_linux.c"
22+
#else
23+
24+
#include "ne.h"
25+
26+
struct flb_ne_collector thermalzone_collector = {
27+
.name = "thermal_zone",
28+
.cb_init = NULL,
29+
.cb_update = NULL,
30+
.cb_exit = NULL
31+
};
32+
#endif
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2+
3+
/* Fluent Bit
4+
* ==========
5+
* Copyright (C) 2023 The Fluent Bit Authors
6+
*
7+
* Licensed under the Apache License, Version 2.0 (the "License");
8+
* you may not use this file except in compliance with the License.
9+
* You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
*/
19+
20+
#ifndef FLB_IN_NE_THERMALZONE_H
21+
#define FLB_IN_NE_THERMALZONE_H
22+
23+
#include "ne.h"
24+
25+
extern struct flb_ne_collector thermalzone_collector;
26+
27+
#endif
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2+
3+
/* Fluent Bit
4+
* ==========
5+
* Copyright (C) 2015-2022 The Fluent Bit Authors
6+
*
7+
* Licensed under the Apache License, Version 2.0 (the "License");
8+
* you may not use this file except in compliance with the License.
9+
* You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
*/
19+
20+
#include <fluent-bit/flb_info.h>
21+
#include <fluent-bit/flb_input_plugin.h>
22+
23+
#include "ne.h"
24+
#include "ne_utils.h"
25+
#include "ne_thermalzone_linux.h"
26+
27+
#include <unistd.h>
28+
29+
/*
30+
* See kernel documentation for a description:
31+
* https://www.kernel.org/doc/html/latest/driver-api/thermal/sysfs-api.html
32+
*
33+
* Ensure to pick the correct version of the documentation, older versions here:
34+
* https://github.com/torvalds/linux/tree/master/Documentation
35+
*/
36+
/*
37+
* Thermal zone stats, reads /sys/class/thermal/thermal_zone*
38+
* ----------------------------------------------------------
39+
*/
40+
41+
static int ne_thermalzone_init(struct flb_ne *ctx)
42+
{
43+
ctx->thermalzone_temp = cmt_gauge_create(ctx->cmt, "node", "thermal_zone", "temp",
44+
"Zone temperature in Celsius",
45+
2, (char *[]) {"zone", "type"});
46+
if (!ctx->thermalzone_temp) {
47+
flb_plg_error(ctx->ins, "could not initialize thermal zone metrics");
48+
return -1;
49+
}
50+
51+
ctx->cooling_device_cur_state = cmt_gauge_create(ctx->cmt,
52+
"node", "cooling_device", "cur_state",
53+
"Current throttle state of the cooling device",
54+
2, (char *[]) {"name", "type"});
55+
if (!ctx->cooling_device_cur_state) {
56+
flb_plg_error(ctx->ins, "could not initialize cooling device cur_state metric");
57+
return -1;
58+
}
59+
60+
ctx->cooling_device_max_state = cmt_gauge_create(ctx->cmt,
61+
"node", "cooling_device", "max_state",
62+
"Maximum throttle state of the cooling device",
63+
2, (char *[]) {"name", "type"});
64+
if (!ctx->cooling_device_max_state) {
65+
flb_plg_error(ctx->ins, "could not initialize cooling device max_state metric");
66+
return -1;
67+
}
68+
69+
return 0;
70+
}
71+
72+
static int ne_thermalzone_update_thermal_zones(struct flb_ne *ctx)
73+
{
74+
uint64_t tstamp;
75+
int ret;
76+
uint64_t temp = 0;
77+
struct mk_list *head;
78+
struct mk_list list;
79+
struct flb_slist_entry *entry;
80+
flb_sds_t type;
81+
flb_sds_t full_path_sysfs;
82+
int path_sysfs_len;
83+
char *num;
84+
85+
tstamp = cfl_time_now();
86+
87+
ret = ne_utils_path_scan(ctx, ctx->path_sysfs, THERMAL_ZONE_PATTERN, NE_SCAN_DIR, &list);
88+
if (ret != 0) {
89+
return -1;
90+
}
91+
92+
if (mk_list_size(&list) == 0) {
93+
return 0;
94+
}
95+
96+
full_path_sysfs = flb_sds_create_size(strlen(THERMAL_ZONE_BASE) +
97+
strlen(ctx->path_sysfs) + 8);
98+
if (full_path_sysfs == NULL) {
99+
flb_slist_destroy(&list);
100+
return -1;
101+
}
102+
path_sysfs_len = strlen(ctx->path_sysfs);
103+
if (ctx->path_sysfs[strlen(ctx->path_sysfs)-1] == '/') {
104+
path_sysfs_len--;
105+
}
106+
/* Set the full_path to the sysfs path */
107+
if (flb_sds_cat_safe(&full_path_sysfs, ctx->path_sysfs, path_sysfs_len) < 0) {
108+
flb_slist_destroy(&list);
109+
flb_sds_destroy(full_path_sysfs);
110+
return -1;
111+
}
112+
/* Concatenate the base for all thermalzone objects */
113+
if (flb_sds_cat_safe(&full_path_sysfs, THERMAL_ZONE_BASE,
114+
strlen(THERMAL_ZONE_BASE)) < 0) {
115+
flb_slist_destroy(&list);
116+
flb_sds_destroy(full_path_sysfs);
117+
return -1;
118+
}
119+
120+
/* Process entries */
121+
mk_list_foreach(head, &list) {
122+
entry = mk_list_entry(head, struct flb_slist_entry, _head);
123+
124+
/* Core ID */
125+
ret = ne_utils_file_read_uint64(ctx->path_sysfs,
126+
entry->str,
127+
"temp", NULL,
128+
&temp);
129+
if (ret != 0) {
130+
continue;
131+
}
132+
133+
ret = ne_utils_file_read_sds(ctx->path_sysfs, entry->str, "type", NULL, &type);
134+
if (ret != 0) {
135+
flb_plg_error(ctx->ins, "unable to get type for zone: %s", entry->str);
136+
continue;
137+
}
138+
139+
if (strncmp(entry->str, full_path_sysfs, strlen(full_path_sysfs)) == 0) {
140+
num = &entry->str[strlen(full_path_sysfs)];
141+
} else {
142+
num = entry->str;
143+
}
144+
145+
cmt_gauge_set(ctx->thermalzone_temp, tstamp, ((double) temp)/1000.0,
146+
2, (char *[]) {num, type});
147+
148+
flb_sds_destroy(type);
149+
}
150+
151+
flb_slist_destroy(&list);
152+
flb_sds_destroy(full_path_sysfs);
153+
154+
return 0;
155+
}
156+
157+
static int ne_thermalzone_update_cooling_devices(struct flb_ne *ctx)
158+
{
159+
uint64_t tstamp;
160+
int ret;
161+
uint64_t cur_state = 0;
162+
uint64_t max_state = 0;
163+
struct mk_list *head;
164+
struct mk_list list;
165+
struct flb_slist_entry *entry;
166+
flb_sds_t type;
167+
char *num;
168+
flb_sds_t full_path_sysfs;
169+
int path_sysfs_len;
170+
171+
tstamp = cfl_time_now();
172+
173+
ret = ne_utils_path_scan(ctx, ctx->path_sysfs, COOLING_DEVICE_PATTERN, NE_SCAN_DIR, &list);
174+
if (ret != 0) {
175+
return -1;
176+
}
177+
178+
if (mk_list_size(&list) == 0) {
179+
return 0;
180+
}
181+
182+
full_path_sysfs = flb_sds_create_size(strlen(COOLING_DEVICE_BASE) +
183+
strlen(ctx->path_sysfs) + 8);
184+
if (full_path_sysfs == NULL) {
185+
flb_slist_destroy(&list);
186+
return -1;
187+
}
188+
path_sysfs_len = strlen(ctx->path_sysfs);
189+
if (ctx->path_sysfs[strlen(ctx->path_sysfs)-1] == '/') {
190+
path_sysfs_len--;
191+
}
192+
if (flb_sds_cat_safe(&full_path_sysfs, ctx->path_sysfs, path_sysfs_len) < 0) {
193+
flb_slist_destroy(&list);
194+
flb_sds_destroy(full_path_sysfs);
195+
return -1;
196+
}
197+
if (flb_sds_cat_safe(&full_path_sysfs, COOLING_DEVICE_BASE,
198+
strlen(COOLING_DEVICE_BASE)) < 0) {
199+
flb_slist_destroy(&list);
200+
flb_sds_destroy(full_path_sysfs);
201+
return -1;
202+
}
203+
204+
/* Process entries */
205+
mk_list_foreach(head, &list) {
206+
entry = mk_list_entry(head, struct flb_slist_entry, _head);
207+
208+
/* Core ID */
209+
ret = ne_utils_file_read_uint64(ctx->path_sysfs,
210+
entry->str,
211+
"cur_state", NULL,
212+
&cur_state);
213+
if (ret != 0) {
214+
continue;
215+
}
216+
217+
ret = ne_utils_file_read_uint64(ctx->path_sysfs,
218+
entry->str,
219+
"max_state", NULL,
220+
&max_state);
221+
if (ret != 0) {
222+
continue;
223+
}
224+
225+
ret = ne_utils_file_read_sds(ctx->path_sysfs, entry->str, "type", NULL, &type);
226+
if (ret != 0) {
227+
flb_plg_error(ctx->ins, "unable to get type for zone: %s", entry->str);
228+
continue;
229+
}
230+
231+
if (strncmp(entry->str, full_path_sysfs, strlen(full_path_sysfs)) == 0) {
232+
num = &entry->str[strlen(full_path_sysfs)];
233+
} else {
234+
num = entry->str;
235+
}
236+
237+
cmt_gauge_set(ctx->cooling_device_cur_state, tstamp, ((double)cur_state),
238+
2, (char *[]) {num, type});
239+
cmt_gauge_set(ctx->cooling_device_max_state, tstamp, ((double)max_state),
240+
2, (char *[]) {num, type});
241+
flb_sds_destroy(type);
242+
}
243+
244+
flb_slist_destroy(&list);
245+
flb_sds_destroy(full_path_sysfs);
246+
247+
return 0;
248+
}
249+
250+
static int ne_thermalzone_update(struct flb_input_instance *ins, struct flb_config *config, void *in_context)
251+
{
252+
int ret;
253+
struct flb_ne *ctx = (struct flb_ne *)in_context;
254+
255+
ret = ne_thermalzone_update_thermal_zones(ctx);
256+
if (ret != 0) {
257+
return ret;
258+
}
259+
return ne_thermalzone_update_cooling_devices(ctx);
260+
}
261+
262+
struct flb_ne_collector thermalzone_collector = {
263+
.name = "thermal_zone",
264+
.cb_init = ne_thermalzone_init,
265+
.cb_update = ne_thermalzone_update,
266+
.cb_exit = NULL
267+
};

0 commit comments

Comments
 (0)