From 0e12aee95ede66f9ae05125c380d11939fda49c5 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 23 Aug 2021 11:07:00 -0400 Subject: [PATCH 01/21] proc_metrics: add new proc metrics plugin for getting metrics on a single process. Signed-off-by: Phillip Whelan --- CMakeLists.txt | 2 + plugins/CMakeLists.txt | 1 + plugins/in_proc_metrics/CMakeLists.txt | 4 + plugins/in_proc_metrics/proc_metrics.c | 490 +++++++++++++++++++++++++ plugins/in_proc_metrics/proc_metrics.h | 89 +++++ 5 files changed, 586 insertions(+) create mode 100644 plugins/in_proc_metrics/CMakeLists.txt create mode 100644 plugins/in_proc_metrics/proc_metrics.c create mode 100644 plugins/in_proc_metrics/proc_metrics.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ef9f14a541a..c1a4bc69c60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -143,6 +143,7 @@ option(FLB_IN_STATSD "Enable StatsD input plugin" Yes) option(FLB_IN_STORAGE_BACKLOG "Enable storage backlog input plugin" Yes) option(FLB_IN_EMITTER "Enable emitter input plugin" Yes) option(FLB_IN_NODE_EXPORTER_METRICS "Enable node exporter metrics input plugin" Yes) +option(FLB_IN_PROC_METRICS "Enable process metrics input plugin" Yes) option(FLB_OUT_AZURE "Enable Azure output plugin" Yes) option(FLB_OUT_AZURE_BLOB "Enable Azure output plugin" Yes) option(FLB_OUT_BIGQUERY "Enable BigQuery output plugin" Yes) @@ -226,6 +227,7 @@ if(FLB_ALL) set(FLB_IN_DUMMY 1) set(FLB_IN_NETIF 1) set(FLB_IN_EXEC 1) + set(FLB_IN_PROC_METRICS 1) # Output plugins set(FLB_OUT_ES 1) diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index b3f1db591fa..ddeb76eae7b 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -182,6 +182,7 @@ REGISTER_IN_PLUGIN("in_http") REGISTER_IN_PLUGIN("in_collectd") REGISTER_IN_PLUGIN("in_statsd") REGISTER_IN_PLUGIN("in_storage_backlog") +REGISTER_IN_PLUGIN("in_proc_metrics") if (FLB_STREAM_PROCESSOR) REGISTER_IN_PLUGIN("in_stream_processor") diff --git a/plugins/in_proc_metrics/CMakeLists.txt b/plugins/in_proc_metrics/CMakeLists.txt new file mode 100644 index 00000000000..eedd92a50bd --- /dev/null +++ b/plugins/in_proc_metrics/CMakeLists.txt @@ -0,0 +1,4 @@ +set(src + proc_metrics.c) + +FLB_PLUGIN(in_proc_metrics "${src}" "") diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c new file mode 100644 index 00000000000..b873916b282 --- /dev/null +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -0,0 +1,490 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2021 The Fluent Bit Authors + * Copyright (C) 2015-2018 Treasure Data Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "proc_metrics.h" + +/* rchar: 260189 + * wchar: 413454 + * syscr: 2036 + * syscw: 2564 + * read_bytes: 0 + * write_bytes: 0 + * cancelled_write_bytes: 0 + */ +static int parse_proc_io(struct proc_metrics_ctx *ctx, const char *buf, + struct proc_metrics_io_status *status) +{ + struct mk_list *llines; + struct mk_list *head; + struct flb_split_entry *cur = NULL; + int line = 0; + + llines = flb_utils_split(buf, '\n', 7); + mk_list_foreach(head, llines) { + cur = mk_list_entry(head, struct flb_split_entry, _head); + switch(line) { + case 0: + sscanf(cur->value, "rchar: %lu", &status->rchar); + break; + case 1: + sscanf(cur->value, "wchar: %lu", &status->wchar); + break; + case 2: + sscanf(cur->value, "syscr: %lu", &status->syscr); + break; + case 3: + sscanf(cur->value, "syscw: %lu", &status->syscw); + break; + case 4: + sscanf(cur->value, "read_bytes: %lu", &status->read_bytes); + break; + case 5: + sscanf(cur->value, "write_bytes: %lu", &status->write_bytes); + break; + case 6: + sscanf(cur->value, "cancelled_write_bytes: %lu", + &status->cancelled_write_bytes); + break; + } + line++; + } + flb_utils_split_free(llines); + return 0; +} + +/* size res trs lrs drs dt (implied) + * 1793 516 482 4 0 180 0 + */ +static int parse_proc_mem(struct proc_metrics_ctx *ctx, const char *buf, struct proc_metrics_mem_status *status) +{ + struct mk_list *lfields; + struct mk_list *head; + struct flb_split_entry *cur = NULL; + int line = 0; + + lfields = flb_utils_split(buf, ' ', 7); + mk_list_foreach(head, lfields) { + cur = mk_list_entry(head, struct flb_split_entry, _head); + switch(line) { + case 0: + sscanf(cur->value, "%lu", &status->size); + break; + case 1: + sscanf(cur->value, "%lu", &status->resident); + break; + case 2: + sscanf(cur->value, "%lu", &status->shared); + break; + case 3: + sscanf(cur->value, "%lu", &status->trs); + break; + case 4: + sscanf(cur->value, "%lu", &status->lrs); + break; + case 5: + sscanf(cur->value, "%lu", &status->drs); + break; + case 6: + sscanf(cur->value, "%lu", &status->dt); + break; + } + line++; + } + flb_utils_split_free(lfields); + return 0; +} + +/* We specifically *CANNOT* use flb_utils_read_file because + * /proc special files tend to report their own size as 0. + */ +static int read_file_lines(const char *path, char *buf, size_t maxlen, int lines) +{ + int fd; + int rc; + + fd = open(path, O_RDONLY); + if (fd == -1) { + flb_errno(); + return -1; + } + rc = read(fd, buf, maxlen-1); + if (rc == -1) { + close(fd); + flb_errno(); + return -1; + } + buf[rc] = '\0'; + close(fd); + return 0; +} + +static int read_stat_file(struct proc_metrics_ctx *ctx, const char *file, + char *buf, size_t maxlen, int lines) +{ + char pathname[PATH_MAX]; + if (ctx->pid > 0) { + snprintf(pathname, sizeof(pathname)-1, "/proc/%d/%s", ctx->pid, file); + } else { + snprintf(pathname, sizeof(pathname)-1, "/proc/%d/%s", getpid(), file); + } + flb_plg_debug(ctx->ins, "open proc file: %s", pathname); + if (read_file_lines(pathname, buf, maxlen, lines) == -1) { + flb_errno(); + flb_plg_error(ctx->ins, "unable to open file: %s", pathname); + return -1; + } + return 0; +} + +/** + * Callback function to gather statistics from /proc/$PID. + * + * + * @param ins Pointer to flb_input_instance + * @param config Pointer to flb_config + * @param in_context void Pointer used to cast to + * flb_in_de_config + * + * @return int 0 for success -1 for failure. + */ +static int proc_metrics_collect(struct flb_input_instance *ins, + struct flb_config *config, void *in_context) +{ + char buf[1024]; + struct proc_metrics_ctx *ctx = (struct proc_metrics_ctx *)in_context; + uint64_t ts = cmt_time_now(); + struct proc_metrics_status status; + int ret; + + if (read_stat_file(ctx, "io", buf, sizeof(buf)-1, 7) == -1) { + return -1; + } + if (parse_proc_io(ctx, buf, &status.io) != 0) { + flb_free(buf); + return -1; + } + + if (read_stat_file(ctx, "statm", buf, sizeof(buf)-1, 1) == -1) { + return -1; + } + if (parse_proc_mem(ctx, buf, &status.mem) != 0) { + return -1; + } + + cmt_counter_set(ctx->rchar, ts, (double)status.io.rchar, 0, NULL); + cmt_counter_set(ctx->wchar, ts, (double)status.io.wchar, 0, NULL); + cmt_counter_set(ctx->syscr, ts, (double)status.io.syscr, 0, NULL); + cmt_counter_set(ctx->syscw, ts, (double)status.io.syscw, 0, NULL); + cmt_counter_set(ctx->read_bytes, ts, (double)status.io.read_bytes, 0, NULL); + cmt_counter_set(ctx->write_bytes, ts, (double)status.io.write_bytes, 0, NULL); + cmt_counter_set(ctx->cancelled_write_bytes, ts, + (double)status.io.cancelled_write_bytes, 0, NULL); + + cmt_gauge_set(ctx->size, ts, (double)status.mem.size, 0, NULL); + cmt_gauge_set(ctx->resident, ts, (double)status.mem.resident, 0, NULL); + cmt_gauge_set(ctx->shared, ts, (double)status.mem.shared, 0, NULL); + cmt_gauge_set(ctx->trs, ts, (double)status.mem.trs, 0, NULL); + cmt_gauge_set(ctx->lrs, ts, (double)status.mem.lrs, 0, NULL); + cmt_gauge_set(ctx->drs, ts, (double)status.mem.drs, 0, NULL); + cmt_gauge_set(ctx->dt, ts, (double)status.mem.dt, 0, NULL); + + ret = flb_input_metrics_append(ins, NULL, 0, ctx->cmt); + if (ret != 0) { + flb_plg_error(ins, "could not append metrics"); + } + return ret; +} + +/** + * Function to initialize the proc stats plugin. + * + * @param ins Pointer to flb_input_instance + * @param config Pointer to flb_config + * @param data Unused + * + * @return int 0 on success, -1 on failure + */ +static int proc_metrics_init(struct flb_input_instance *ins, + struct flb_config *config, void *data) +{ + struct proc_metrics_ctx *ctx; + int ret; + + ctx = flb_calloc(1, sizeof(struct proc_metrics_ctx)); + if (!ctx) { + flb_errno(); + return -1; + } + ctx->ins = ins; + + ret = flb_input_config_map_set(ins, (void *)ctx); + if (ret == -1) { + flb_free(ctx); + return -1; + } + + ctx->cmt = cmt_create(); + if (!ctx->cmt) { + flb_plg_error(ins, "could not initialize CMetrics"); + goto cmt_error; + } + + ctx->rchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "rchar", + "The number of bytes which this task has " + "caused to be read from storage.", 0, NULL); + if (ctx->rchar == NULL) { + flb_plg_error(ins, "could not initialize rchar counter"); + goto cmt_counter_error; + } + + ctx->wchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "wchar", + "The number of bytes which this task has " + "caused, or shall cause to be written to " + "disk.", 0, NULL); + if (ctx->wchar == NULL) { + flb_plg_error(ins, "could not initialize wchar counter"); + goto cmt_counter_error; + } + + ctx->syscr = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "syscr", + "Attempt to count the number of read I/O " + "operations, i.e. syscalls like read() and " + "pread().", 0, NULL); + if (ctx->syscr == NULL) { + flb_plg_error(ins, "could not initialize syscr counter"); + goto cmt_counter_error; + } + + ctx->syscw = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "syscw", + "Attempt to count the number of write I/O " + "operations, i.e. syscalls like write() and " + "pwrite().", 0, NULL); + if (ctx->syscw == NULL) { + flb_plg_error(ins, "could not initialize syscw counter"); + goto cmt_counter_error; + } + + ctx->read_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "read_bytes", + "Attempt to count the number of bytes " + "which this process really did cause to" + " be fetched from the storage layer.", + 0, NULL); + if (ctx->read_bytes == NULL) { + flb_plg_error(ins, "could not initialize read_bytes counter"); + goto cmt_counter_error; + } + + ctx->write_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "write_bytes", + "Attempt to count the number of bytes " + "which this process caused to be sent " + "to the storage layer.", 0, NULL); + if (ctx->write_bytes == NULL) { + flb_plg_error(ins, "could not initialize write_bytes counter"); + goto cmt_counter_error; + } + + ctx->cancelled_write_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", + "cancelled_write_bytes", + "The number of bytes which " + "this process caused to not " + "happen, by truncating " + "pagecache.", 0, NULL); + if (ctx->cancelled_write_bytes == NULL) { + flb_plg_error(ins, "could not initialize cancelled_write_bytes counter"); + goto cmt_counter_error; + } + + ctx->size = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "size", + "total program size (pages).", 0, NULL); + if (ctx->size == NULL) { + flb_plg_error(ins, "could not initialize size gauge"); + goto cmt_gauge_error; + } + + ctx->resident = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "resident", + "size of memory portions (pages).", 0, NULL); + if (ctx->resident == NULL) { + flb_plg_error(ins, "could not initialize resident gauge"); + goto cmt_gauge_error; + } + + ctx->shared = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "shared", + "number of pages that are shared.", 0, NULL); + if (ctx->shared == NULL) { + flb_plg_error(ins, "could not initialize shared gauge"); + goto cmt_gauge_error; + } + + ctx->trs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "trs", + "number of pages that are ‘code’.", 0, NULL); + if (ctx->trs == NULL) { + flb_plg_error(ins, "could not initialize trs gauge"); + goto cmt_gauge_error; + } + + ctx->lrs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "lrs", + "number of pages of library.", 0, NULL); + if (ctx->lrs == NULL) { + flb_plg_error(ins, "could not initialize lrs gauge"); + goto cmt_gauge_error; + } + + ctx->drs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "drs", + "number of pages of data/stack.", 0, NULL); + if (ctx->drs == NULL) { + flb_plg_error(ins, "could not initialize drs gauge"); + goto cmt_gauge_error; + } + + ctx->dt = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "dt", + "number of dirty pages.", 0, NULL); + if (ctx->dt == NULL) { + flb_plg_error(ins, "could not initialize dt gauge"); + goto cmt_gauge_error; + } + + + flb_input_set_context(ins, ctx); + ctx->coll_id = flb_input_set_collector_time(ins, + proc_metrics_collect, + 1, 0, config); + return 0; + +cmt_gauge_error: + if (ctx->size != NULL) { + cmt_gauge_destroy(ctx->size); + } +cmt_counter_error: + if (ctx->rchar != NULL) { + cmt_counter_destroy(ctx->rchar); + } + if (ctx->wchar != NULL) { + cmt_counter_destroy(ctx->wchar); + } + if (ctx->syscr != NULL) { + cmt_counter_destroy(ctx->syscr); + } + if (ctx->syscw != NULL) { + cmt_counter_destroy(ctx->syscw); + } + if (ctx->read_bytes != NULL) { + cmt_counter_destroy(ctx->read_bytes); + } + if (ctx->write_bytes != NULL) { + cmt_counter_destroy(ctx->write_bytes); + } + if (ctx->cancelled_write_bytes != NULL) { + cmt_counter_destroy(ctx->cancelled_write_bytes); + } + cmt_destroy(ctx->cmt); +cmt_error: + flb_free(ctx); + return -1; +} + +/** + * Function to destroy proc_metrics_status plugin. + * + * @param ctx Pointer to proc_metrics_ctx + * + * @return int 0 + */ +static int proc_metrics_ctx_destroy(struct proc_metrics_ctx *ctx) +{ + if (ctx->cmt) { + if (ctx->rchar != NULL) { + cmt_counter_destroy(ctx->rchar); + } + if (ctx->wchar != NULL) { + cmt_counter_destroy(ctx->wchar); + } + if (ctx->syscr != NULL) { + cmt_counter_destroy(ctx->syscr); + } + if (ctx->syscw != NULL) { + cmt_counter_destroy(ctx->syscw); + } + if (ctx->read_bytes != NULL) { + cmt_counter_destroy(ctx->read_bytes); + } + if (ctx->write_bytes != NULL) { + cmt_counter_destroy(ctx->write_bytes); + } + if (ctx->cancelled_write_bytes != NULL) { + cmt_counter_destroy(ctx->cancelled_write_bytes); + } + cmt_destroy(ctx->cmt); + } + flb_free(ctx); + return 0; +} + +/** + * Callback exit function to cleanup plugin + * + * @param data Pointer cast to flb_in_de_config + * @param config Unused + * + * @return int Always returns 0 + */ +static int proc_metrics_exit(void *data, struct flb_config *config) +{ + struct proc_metrics_ctx *ctx = (struct proc_metrics_ctx *)data; + if (!ctx) { + return 0; + } + proc_metrics_ctx_destroy(ctx); + + return 0; +} + +/* Configuration properties map */ +static struct flb_config_map config_map[] = { + { + FLB_CONFIG_MAP_STR, "process", 0, + 0, FLB_TRUE, offsetof(struct proc_metrics_ctx, process), + "The Process Name or ID to collect statistics for." + }, + /* EOF */ + {0} +}; + +/* Plugin reference */ +struct flb_input_plugin in_proc_metrics_plugin = { + .name = "proc_metrics", + .description = "Process ID stats metrics", + .cb_init = proc_metrics_init, + .cb_pre_run = NULL, + .cb_collect = proc_metrics_collect, + .cb_flush_buf = NULL, + .config_map = config_map, + .cb_exit = proc_metrics_exit, + .event_type = FLB_INPUT_METRICS +}; diff --git a/plugins/in_proc_metrics/proc_metrics.h b/plugins/in_proc_metrics/proc_metrics.h new file mode 100644 index 00000000000..ab965da848f --- /dev/null +++ b/plugins/in_proc_metrics/proc_metrics.h @@ -0,0 +1,89 @@ +////* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2021 The Fluent Bit Authors + * Copyright (C) 2015-2018 Treasure Data Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLB_IN_METRICS_H +#define FLB_IN_METRICS_H + +#include +#include +#include +#include + +struct proc_metrics_ctx +{ + int coll_id; /* collector id */ + pid_t pid; /* process id to monitor */ + char *process; /* process name or id to monitor */ + struct flb_parser *parser; + struct flb_input_instance *ins; /* Input plugin instace */ + struct cmt *cmt; + /* rchar: 260189 + * wchar: 413454 + * syscr: 2036 + * syscw: 2564 + * read_bytes: 0 + * write_bytes: 0 + * cancelled_write_bytes: 0 + */ + struct cmt_counter *rchar; + struct cmt_counter *wchar; + struct cmt_counter *syscr; + struct cmt_counter *syscw; + struct cmt_counter *read_bytes; + struct cmt_counter *write_bytes; + struct cmt_counter *cancelled_write_bytes; + + struct cmt_gauge *size; + struct cmt_gauge *resident; + struct cmt_gauge *shared; + struct cmt_gauge *trs; + struct cmt_gauge *lrs; + struct cmt_gauge *drs; + struct cmt_gauge *dt; +}; + +struct proc_metrics_io_status +{ + uint64_t rchar; + uint64_t wchar; + uint64_t syscr; + uint64_t syscw; + uint64_t read_bytes; + uint64_t write_bytes; + uint64_t cancelled_write_bytes; +}; + +struct proc_metrics_mem_status +{ + uint64_t size; + uint64_t resident; + uint64_t shared; + uint64_t trs; + uint64_t lrs; + uint64_t drs; + uint64_t dt; +}; + +struct proc_metrics_status { + struct proc_metrics_io_status io; + struct proc_metrics_mem_status mem; +}; + +#endif From 9da5b0790c293b04294d2aa59b62126b9d3c62f0 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Fri, 10 Sep 2021 16:32:36 -0300 Subject: [PATCH 02/21] in_proc_metrics: search for process by name. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 162 ++++++++++++++++++++----- plugins/in_proc_metrics/proc_metrics.h | 1 + 2 files changed, 135 insertions(+), 28 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index b873916b282..254c07625dd 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -26,10 +26,14 @@ #include #include #include +#include +#include #include #include "proc_metrics.h" +#define FLB_CMD_LEN 256 + /* rchar: 260189 * wchar: 413454 * syscr: 2036 @@ -163,6 +167,64 @@ static int read_stat_file(struct proc_metrics_ctx *ctx, const char *file, return 0; } +static pid_t get_pid_from_procname_linux(struct proc_metrics_ctx *ctx, + const char* proc) +{ + pid_t ret = -1; + glob_t glb; + int i; + int fd = -1; + long ret_scan = -1; + int ret_glb = -1; + ssize_t count; + + char cmdname[FLB_CMD_LEN]; + char* bname = NULL; + + ret_glb = glob("/proc/*/cmdline", 0 ,NULL, &glb); + if (ret_glb != 0) { + switch(ret_glb){ + case GLOB_NOSPACE: + flb_plg_warn(ctx->ins, "glob: no space"); + break; + case GLOB_NOMATCH: + flb_plg_warn(ctx->ins, "glob: no match"); + break; + case GLOB_ABORTED: + flb_plg_warn(ctx->ins, "glob: aborted"); + break; + default: + flb_plg_warn(ctx->ins, "glob: other error"); + } + return ret; + } + + for (i = 0; i < glb.gl_pathc; i++) { + fd = open(glb.gl_pathv[i], O_RDONLY); + if (fd < 0) { + continue; + } + count = read(fd, &cmdname, FLB_CMD_LEN); + if (count <= 0){ + close(fd); + continue; + } + cmdname[FLB_CMD_LEN-1] = '\0'; + bname = basename(cmdname); + + if (strncmp(proc, bname, FLB_CMD_LEN) == 0) { + sscanf(glb.gl_pathv[i],"/proc/%ld/cmdline",&ret_scan); + ret = (pid_t)ret_scan; + close(fd); + break; + } + close(fd); + } + globfree(&glb); + return ret; +} + + /** * Callback function to gather statistics from /proc/$PID. * @@ -181,8 +243,17 @@ static int proc_metrics_collect(struct flb_input_instance *ins, struct proc_metrics_ctx *ctx = (struct proc_metrics_ctx *)in_context; uint64_t ts = cmt_time_now(); struct proc_metrics_status status; + char pid[64]; int ret; + if (ctx->proc_name != NULL) { + ret = get_pid_from_procname_linux(ctx, ctx->proc_name); + if (ret == -1) { + return -1; + } + ctx->pid = ret; + } + if (read_stat_file(ctx, "io", buf, sizeof(buf)-1, 7) == -1) { return -1; } @@ -198,22 +269,31 @@ static int proc_metrics_collect(struct flb_input_instance *ins, return -1; } - cmt_counter_set(ctx->rchar, ts, (double)status.io.rchar, 0, NULL); - cmt_counter_set(ctx->wchar, ts, (double)status.io.wchar, 0, NULL); - cmt_counter_set(ctx->syscr, ts, (double)status.io.syscr, 0, NULL); - cmt_counter_set(ctx->syscw, ts, (double)status.io.syscw, 0, NULL); - cmt_counter_set(ctx->read_bytes, ts, (double)status.io.read_bytes, 0, NULL); - cmt_counter_set(ctx->write_bytes, ts, (double)status.io.write_bytes, 0, NULL); + if (ctx->pid == 0) { + snprintf(pid, sizeof(pid)-1, "%d", getpid()); + } else { + snprintf(pid, sizeof(pid)-1, "%d", ctx->pid); + } + + cmt_counter_set(ctx->rchar, ts, (double)status.io.rchar, 1, (char *[]) {(char *) pid}); + cmt_counter_set(ctx->wchar, ts, (double)status.io.wchar, 1, (char *[]) {(char *) pid}); + cmt_counter_set(ctx->syscr, ts, (double)status.io.syscr, 1, (char *[]) {(char *) pid}); + cmt_counter_set(ctx->syscw, ts, (double)status.io.syscw, 1, (char *[]) {(char *) pid}); + cmt_counter_set(ctx->read_bytes, ts, (double)status.io.read_bytes, + 1, (char *[]) {(char *) pid}); + cmt_counter_set(ctx->write_bytes, ts, (double)status.io.write_bytes, + 1, (char *[]) {(char *) pid}); cmt_counter_set(ctx->cancelled_write_bytes, ts, - (double)status.io.cancelled_write_bytes, 0, NULL); + (double)status.io.cancelled_write_bytes, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(ctx->size, ts, (double)status.mem.size, 0, NULL); - cmt_gauge_set(ctx->resident, ts, (double)status.mem.resident, 0, NULL); - cmt_gauge_set(ctx->shared, ts, (double)status.mem.shared, 0, NULL); - cmt_gauge_set(ctx->trs, ts, (double)status.mem.trs, 0, NULL); - cmt_gauge_set(ctx->lrs, ts, (double)status.mem.lrs, 0, NULL); - cmt_gauge_set(ctx->drs, ts, (double)status.mem.drs, 0, NULL); - cmt_gauge_set(ctx->dt, ts, (double)status.mem.dt, 0, NULL); + cmt_gauge_set(ctx->size, ts, (double)status.mem.size, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(ctx->resident, ts, (double)status.mem.resident, + 1, (char *[]) {(char *) pid}); + cmt_gauge_set(ctx->shared, ts, (double)status.mem.shared, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(ctx->trs, ts, (double)status.mem.trs, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(ctx->lrs, ts, (double)status.mem.lrs, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(ctx->drs, ts, (double)status.mem.drs, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(ctx->dt, ts, (double)status.mem.dt, 1, (char *[]) {(char *) pid}); ret = flb_input_metrics_append(ins, NULL, 0, ctx->cmt); if (ret != 0) { @@ -222,6 +302,21 @@ static int proc_metrics_collect(struct flb_input_instance *ins, return ret; } +int str_isnumeric(const char *str) +{ + int i; + + if (str == NULL) { + return FLB_FALSE; + } + for (i = 0; i < strlen(str); i++) { + if (isdigit(str[i]) == 0) { + return FLB_FALSE; + } + } + return FLB_TRUE; +} + /** * Function to initialize the proc stats plugin. * @@ -258,7 +353,7 @@ static int proc_metrics_init(struct flb_input_instance *ins, ctx->rchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "rchar", "The number of bytes which this task has " - "caused to be read from storage.", 0, NULL); + "caused to be read from storage.", 1, (char *[]) {"pid"}); if (ctx->rchar == NULL) { flb_plg_error(ins, "could not initialize rchar counter"); goto cmt_counter_error; @@ -267,7 +362,7 @@ static int proc_metrics_init(struct flb_input_instance *ins, ctx->wchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "wchar", "The number of bytes which this task has " "caused, or shall cause to be written to " - "disk.", 0, NULL); + "disk.", 1, (char *[]) {"pid"}); if (ctx->wchar == NULL) { flb_plg_error(ins, "could not initialize wchar counter"); goto cmt_counter_error; @@ -276,7 +371,7 @@ static int proc_metrics_init(struct flb_input_instance *ins, ctx->syscr = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "syscr", "Attempt to count the number of read I/O " "operations, i.e. syscalls like read() and " - "pread().", 0, NULL); + "pread().", 1, (char *[]) {"pid"}); if (ctx->syscr == NULL) { flb_plg_error(ins, "could not initialize syscr counter"); goto cmt_counter_error; @@ -285,7 +380,7 @@ static int proc_metrics_init(struct flb_input_instance *ins, ctx->syscw = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "syscw", "Attempt to count the number of write I/O " "operations, i.e. syscalls like write() and " - "pwrite().", 0, NULL); + "pwrite().", 1, (char *[]) {"pid"}); if (ctx->syscw == NULL) { flb_plg_error(ins, "could not initialize syscw counter"); goto cmt_counter_error; @@ -295,7 +390,7 @@ static int proc_metrics_init(struct flb_input_instance *ins, "Attempt to count the number of bytes " "which this process really did cause to" " be fetched from the storage layer.", - 0, NULL); + 1, (char *[]) {"pid"}); if (ctx->read_bytes == NULL) { flb_plg_error(ins, "could not initialize read_bytes counter"); goto cmt_counter_error; @@ -304,7 +399,7 @@ static int proc_metrics_init(struct flb_input_instance *ins, ctx->write_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "write_bytes", "Attempt to count the number of bytes " "which this process caused to be sent " - "to the storage layer.", 0, NULL); + "to the storage layer.", 1, (char *[]) {"pid"}); if (ctx->write_bytes == NULL) { flb_plg_error(ins, "could not initialize write_bytes counter"); goto cmt_counter_error; @@ -315,61 +410,72 @@ static int proc_metrics_init(struct flb_input_instance *ins, "The number of bytes which " "this process caused to not " "happen, by truncating " - "pagecache.", 0, NULL); + "pagecache.", 1, (char *[]) {"pid"}); if (ctx->cancelled_write_bytes == NULL) { flb_plg_error(ins, "could not initialize cancelled_write_bytes counter"); goto cmt_counter_error; } ctx->size = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "size", - "total program size (pages).", 0, NULL); + "total program size (pages).", 1, (char *[]) {"pid"}); if (ctx->size == NULL) { flb_plg_error(ins, "could not initialize size gauge"); goto cmt_gauge_error; } ctx->resident = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "resident", - "size of memory portions (pages).", 0, NULL); + "size of memory portions (pages).", 1, (char *[]) {"pid"}); if (ctx->resident == NULL) { flb_plg_error(ins, "could not initialize resident gauge"); goto cmt_gauge_error; } ctx->shared = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "shared", - "number of pages that are shared.", 0, NULL); + "number of pages that are shared.", 1, (char *[]) {"pid"}); if (ctx->shared == NULL) { flb_plg_error(ins, "could not initialize shared gauge"); goto cmt_gauge_error; } ctx->trs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "trs", - "number of pages that are ‘code’.", 0, NULL); + "number of pages that are ‘code’.", 1, (char *[]) {"pid"}); if (ctx->trs == NULL) { flb_plg_error(ins, "could not initialize trs gauge"); goto cmt_gauge_error; } ctx->lrs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "lrs", - "number of pages of library.", 0, NULL); + "number of pages of library.", 1, (char *[]) {"pid"}); if (ctx->lrs == NULL) { flb_plg_error(ins, "could not initialize lrs gauge"); goto cmt_gauge_error; } ctx->drs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "drs", - "number of pages of data/stack.", 0, NULL); + "number of pages of data/stack.", 1, (char *[]) {"pid"}); if (ctx->drs == NULL) { flb_plg_error(ins, "could not initialize drs gauge"); goto cmt_gauge_error; } ctx->dt = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "dt", - "number of dirty pages.", 0, NULL); + "number of dirty pages.", 1, (char *[]) {"pid"}); if (ctx->dt == NULL) { flb_plg_error(ins, "could not initialize dt gauge"); goto cmt_gauge_error; } + /* save the PID just once if the process is numeric */ + if (str_isnumeric(ctx->process) == FLB_TRUE) { + ret = strtol(ctx->process, (char **)NULL, 10); + if (ret == -1) { + goto cmt_gauge_error; + } + ctx->pid = ret; + } else { + ctx->proc_name = ctx->process; + } + flb_input_set_context(ins, ctx); ctx->coll_id = flb_input_set_collector_time(ins, diff --git a/plugins/in_proc_metrics/proc_metrics.h b/plugins/in_proc_metrics/proc_metrics.h index ab965da848f..066ba475b8a 100644 --- a/plugins/in_proc_metrics/proc_metrics.h +++ b/plugins/in_proc_metrics/proc_metrics.h @@ -30,6 +30,7 @@ struct proc_metrics_ctx { int coll_id; /* collector id */ pid_t pid; /* process id to monitor */ + char *proc_name; /* process name used for querying each tick */ char *process; /* process name or id to monitor */ struct flb_parser *parser; struct flb_input_instance *ins; /* Input plugin instace */ From 5f7760557fbd68f97fb7106dc616bfccd3bf18c9 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Sat, 11 Sep 2021 14:10:37 -0300 Subject: [PATCH 03/21] in_proc_metrics: use a list to track process metrics for each process. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 354 +++++++++++++------------ plugins/in_proc_metrics/proc_metrics.h | 7 + 2 files changed, 195 insertions(+), 166 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 254c07625dd..6cbda24d9ae 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -42,8 +42,7 @@ * write_bytes: 0 * cancelled_write_bytes: 0 */ -static int parse_proc_io(struct proc_metrics_ctx *ctx, const char *buf, - struct proc_metrics_io_status *status) +static int parse_proc_io(const char *buf, struct proc_metrics_io_status *status) { struct mk_list *llines; struct mk_list *head; @@ -86,7 +85,7 @@ static int parse_proc_io(struct proc_metrics_ctx *ctx, const char *buf, /* size res trs lrs drs dt (implied) * 1793 516 482 4 0 180 0 */ -static int parse_proc_mem(struct proc_metrics_ctx *ctx, const char *buf, struct proc_metrics_mem_status *status) +static int parse_proc_mem(const char *buf, struct proc_metrics_mem_status *status) { struct mk_list *lfields; struct mk_list *head; @@ -149,19 +148,17 @@ static int read_file_lines(const char *path, char *buf, size_t maxlen, int lines return 0; } -static int read_stat_file(struct proc_metrics_ctx *ctx, const char *file, +static int read_stat_file(pid_t pid, const char *file, char *buf, size_t maxlen, int lines) { char pathname[PATH_MAX]; - if (ctx->pid > 0) { - snprintf(pathname, sizeof(pathname)-1, "/proc/%d/%s", ctx->pid, file); + if (pid > 0) { + snprintf(pathname, sizeof(pathname)-1, "/proc/%d/%s", pid, file); } else { snprintf(pathname, sizeof(pathname)-1, "/proc/%d/%s", getpid(), file); } - flb_plg_debug(ctx->ins, "open proc file: %s", pathname); if (read_file_lines(pathname, buf, maxlen, lines) == -1) { flb_errno(); - flb_plg_error(ctx->ins, "unable to open file: %s", pathname); return -1; } return 0; @@ -224,126 +221,16 @@ static pid_t get_pid_from_procname_linux(struct proc_metrics_ctx *ctx, return ret; } - -/** - * Callback function to gather statistics from /proc/$PID. - * - * - * @param ins Pointer to flb_input_instance - * @param config Pointer to flb_config - * @param in_context void Pointer used to cast to - * flb_in_de_config - * - * @return int 0 for success -1 for failure. - */ -static int proc_metrics_collect(struct flb_input_instance *ins, - struct flb_config *config, void *in_context) -{ - char buf[1024]; - struct proc_metrics_ctx *ctx = (struct proc_metrics_ctx *)in_context; - uint64_t ts = cmt_time_now(); - struct proc_metrics_status status; - char pid[64]; - int ret; - - if (ctx->proc_name != NULL) { - ret = get_pid_from_procname_linux(ctx, ctx->proc_name); - if (ret == -1) { - return -1; - } - ctx->pid = ret; - } - - if (read_stat_file(ctx, "io", buf, sizeof(buf)-1, 7) == -1) { - return -1; - } - if (parse_proc_io(ctx, buf, &status.io) != 0) { - flb_free(buf); - return -1; - } - - if (read_stat_file(ctx, "statm", buf, sizeof(buf)-1, 1) == -1) { - return -1; - } - if (parse_proc_mem(ctx, buf, &status.mem) != 0) { - return -1; - } - - if (ctx->pid == 0) { - snprintf(pid, sizeof(pid)-1, "%d", getpid()); - } else { - snprintf(pid, sizeof(pid)-1, "%d", ctx->pid); - } - - cmt_counter_set(ctx->rchar, ts, (double)status.io.rchar, 1, (char *[]) {(char *) pid}); - cmt_counter_set(ctx->wchar, ts, (double)status.io.wchar, 1, (char *[]) {(char *) pid}); - cmt_counter_set(ctx->syscr, ts, (double)status.io.syscr, 1, (char *[]) {(char *) pid}); - cmt_counter_set(ctx->syscw, ts, (double)status.io.syscw, 1, (char *[]) {(char *) pid}); - cmt_counter_set(ctx->read_bytes, ts, (double)status.io.read_bytes, - 1, (char *[]) {(char *) pid}); - cmt_counter_set(ctx->write_bytes, ts, (double)status.io.write_bytes, - 1, (char *[]) {(char *) pid}); - cmt_counter_set(ctx->cancelled_write_bytes, ts, - (double)status.io.cancelled_write_bytes, 1, (char *[]) {(char *) pid}); - - cmt_gauge_set(ctx->size, ts, (double)status.mem.size, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(ctx->resident, ts, (double)status.mem.resident, - 1, (char *[]) {(char *) pid}); - cmt_gauge_set(ctx->shared, ts, (double)status.mem.shared, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(ctx->trs, ts, (double)status.mem.trs, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(ctx->lrs, ts, (double)status.mem.lrs, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(ctx->drs, ts, (double)status.mem.drs, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(ctx->dt, ts, (double)status.mem.dt, 1, (char *[]) {(char *) pid}); - - ret = flb_input_metrics_append(ins, NULL, 0, ctx->cmt); - if (ret != 0) { - flb_plg_error(ins, "could not append metrics"); - } - return ret; -} - -int str_isnumeric(const char *str) -{ - int i; - - if (str == NULL) { - return FLB_FALSE; - } - for (i = 0; i < strlen(str); i++) { - if (isdigit(str[i]) == 0) { - return FLB_FALSE; - } - } - return FLB_TRUE; -} - -/** - * Function to initialize the proc stats plugin. - * - * @param ins Pointer to flb_input_instance - * @param config Pointer to flb_config - * @param data Unused - * - * @return int 0 on success, -1 on failure - */ -static int proc_metrics_init(struct flb_input_instance *ins, - struct flb_config *config, void *data) +static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *ins, pid_t pid) { - struct proc_metrics_ctx *ctx; - int ret; + struct proc_metrics_pid_cmt *ctx; - ctx = flb_calloc(1, sizeof(struct proc_metrics_ctx)); - if (!ctx) { - flb_errno(); - return -1; + ctx = flb_calloc(1, sizeof(struct proc_metrics_pid_cmt)); + if (ctx == NULL) { + return NULL; } - ctx->ins = ins; - ret = flb_input_config_map_set(ins, (void *)ctx); - if (ret == -1) { - flb_free(ctx); - return -1; - } + ctx->pid = pid; ctx->cmt = cmt_create(); if (!ctx->cmt) { @@ -465,24 +352,7 @@ static int proc_metrics_init(struct flb_input_instance *ins, goto cmt_gauge_error; } - /* save the PID just once if the process is numeric */ - if (str_isnumeric(ctx->process) == FLB_TRUE) { - ret = strtol(ctx->process, (char **)NULL, 10); - if (ret == -1) { - goto cmt_gauge_error; - } - ctx->pid = ret; - } else { - ctx->proc_name = ctx->process; - } - - - flb_input_set_context(ins, ctx); - ctx->coll_id = flb_input_set_collector_time(ins, - proc_metrics_collect, - 1, 0, config); - return 0; - + return ctx; cmt_gauge_error: if (ctx->size != NULL) { cmt_gauge_destroy(ctx->size); @@ -510,6 +380,182 @@ static int proc_metrics_init(struct flb_input_instance *ins, cmt_counter_destroy(ctx->cancelled_write_bytes); } cmt_destroy(ctx->cmt); +cmt_error: + flb_free(ctx); + return NULL; +} + +struct proc_metrics_pid_cmt *get_proc_metrics(struct proc_metrics_ctx *ctx, pid_t pid) +{ + struct mk_list *tmp; + struct mk_list *head; + struct proc_metrics_pid_cmt *proc; + + if (mk_list_is_empty(&ctx->procs)) { + proc = create_pid_cmt(ctx->ins, pid); + mk_list_add(&proc->_head, &ctx->procs); + return proc; + } + + mk_list_foreach_safe(head, tmp, &ctx->procs) { + proc = mk_list_entry(head, struct proc_metrics_pid_cmt, _head); + if (proc->pid == pid) { + return proc; + } + } + + proc = create_pid_cmt(ctx->ins, pid); + mk_list_add(&proc->_head, &ctx->procs); + return proc; +} + +/** + * Callback function to gather statistics from /proc/$PID. + * + * + * @param ins Pointer to flb_input_instance + * @param config Pointer to flb_config + * @param in_context void Pointer used to cast to + * flb_in_de_config + * + * @return int 0 for success -1 for failure. + */ +static int proc_metrics_collect(struct flb_input_instance *ins, + struct flb_config *config, void *in_context) +{ + char buf[1024]; + struct proc_metrics_ctx *ctx = (struct proc_metrics_ctx *)in_context; + uint64_t ts = cmt_time_now(); + struct proc_metrics_status status; + char pid[64]; + int ret; + struct proc_metrics_pid_cmt *metrics; + + if (ctx->proc_name != NULL) { + ret = get_pid_from_procname_linux(ctx, ctx->proc_name); + if (ret == -1) { + return -1; + } + ctx->pid = ret; + } + + if (ctx->pid > 0) { + metrics = get_proc_metrics(ctx, ctx->pid); + } else { + metrics = get_proc_metrics(ctx, getpid()); + } + + if (read_stat_file(metrics->pid, "io", buf, sizeof(buf)-1, 7) == -1) { + return -1; + } + + if (parse_proc_io(buf, &status.io) != 0) { + flb_free(buf); + return -1; + } + + if (read_stat_file(metrics->pid, "statm", buf, sizeof(buf)-1, 1) == -1) { + return -1; + } + + if (parse_proc_mem(buf, &status.mem) != 0) { + return -1; + } + + if (metrics->pid == 0) { + snprintf(pid, sizeof(pid)-1, "%d", getpid()); + } else { + snprintf(pid, sizeof(pid)-1, "%d", metrics->pid); + } + + cmt_counter_set(metrics->rchar, ts, (double)status.io.rchar, 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->wchar, ts, (double)status.io.wchar, 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->syscr, ts, (double)status.io.syscr, 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->syscw, ts, (double)status.io.syscw, 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->read_bytes, ts, (double)status.io.read_bytes, + 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->write_bytes, ts, (double)status.io.write_bytes, + 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->cancelled_write_bytes, ts, + (double)status.io.cancelled_write_bytes, 1, (char *[]) {(char *) pid}); + + cmt_gauge_set(metrics->size, ts, (double)status.mem.size, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->resident, ts, (double)status.mem.resident, + 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->shared, ts, (double)status.mem.shared, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->trs, ts, (double)status.mem.trs, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->lrs, ts, (double)status.mem.lrs, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->drs, ts, (double)status.mem.drs, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->dt, ts, (double)status.mem.dt, 1, (char *[]) {(char *) pid}); + + ret = flb_input_metrics_append(ins, NULL, 0, metrics->cmt); + if (ret != 0) { + flb_plg_error(ins, "could not append metrics"); + } + return ret; +} + +int str_isnumeric(const char *str) +{ + int i; + + if (str == NULL) { + return FLB_FALSE; + } + for (i = 0; i < strlen(str); i++) { + if (isdigit(str[i]) == 0) { + return FLB_FALSE; + } + } + return FLB_TRUE; +} + +/** + * Function to initialize the proc stats plugin. + * + * @param ins Pointer to flb_input_instance + * @param config Pointer to flb_config + * @param data Unused + * + * @return int 0 on success, -1 on failure + */ +static int proc_metrics_init(struct flb_input_instance *ins, + struct flb_config *config, void *data) +{ + struct proc_metrics_ctx *ctx; + int ret; + + ctx = flb_calloc(1, sizeof(struct proc_metrics_ctx)); + if (!ctx) { + flb_errno(); + return -1; + } + ctx->ins = ins; + + ret = flb_input_config_map_set(ins, (void *)ctx); + if (ret == -1) { + flb_free(ctx); + return -1; + } + + /* save the PID just once if the process is numeric */ + if (str_isnumeric(ctx->process) == FLB_TRUE) { + ret = strtol(ctx->process, (char **)NULL, 10); + if (ret == -1) { + goto cmt_error; + } + ctx->pid = ret; + } else { + ctx->proc_name = ctx->process; + } + + mk_list_init(&ctx->procs); + + flb_input_set_context(ins, ctx); + ctx->coll_id = flb_input_set_collector_time(ins, + proc_metrics_collect, + 1, 0, config); + return 0; cmt_error: flb_free(ctx); return -1; @@ -524,30 +570,6 @@ static int proc_metrics_init(struct flb_input_instance *ins, */ static int proc_metrics_ctx_destroy(struct proc_metrics_ctx *ctx) { - if (ctx->cmt) { - if (ctx->rchar != NULL) { - cmt_counter_destroy(ctx->rchar); - } - if (ctx->wchar != NULL) { - cmt_counter_destroy(ctx->wchar); - } - if (ctx->syscr != NULL) { - cmt_counter_destroy(ctx->syscr); - } - if (ctx->syscw != NULL) { - cmt_counter_destroy(ctx->syscw); - } - if (ctx->read_bytes != NULL) { - cmt_counter_destroy(ctx->read_bytes); - } - if (ctx->write_bytes != NULL) { - cmt_counter_destroy(ctx->write_bytes); - } - if (ctx->cancelled_write_bytes != NULL) { - cmt_counter_destroy(ctx->cancelled_write_bytes); - } - cmt_destroy(ctx->cmt); - } flb_free(ctx); return 0; } diff --git a/plugins/in_proc_metrics/proc_metrics.h b/plugins/in_proc_metrics/proc_metrics.h index 066ba475b8a..6d334842cc6 100644 --- a/plugins/in_proc_metrics/proc_metrics.h +++ b/plugins/in_proc_metrics/proc_metrics.h @@ -34,6 +34,11 @@ struct proc_metrics_ctx char *process; /* process name or id to monitor */ struct flb_parser *parser; struct flb_input_instance *ins; /* Input plugin instace */ + struct mk_list procs; +}; + +struct proc_metrics_pid_cmt { + pid_t pid; struct cmt *cmt; /* rchar: 260189 * wchar: 413454 @@ -58,6 +63,8 @@ struct proc_metrics_ctx struct cmt_gauge *lrs; struct cmt_gauge *drs; struct cmt_gauge *dt; + + struct mk_list _head; }; struct proc_metrics_io_status From 1c226b2cb44ca83edf499285ebb63a058fd0b4b6 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Sat, 11 Sep 2021 14:43:37 -0300 Subject: [PATCH 04/21] in_proc_metrics: rename ctx to proc in create_pid_cmt to reduce confusion. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 106 ++++++++++++------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 6cbda24d9ae..ffe07d72f38 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -223,165 +223,165 @@ static pid_t get_pid_from_procname_linux(struct proc_metrics_ctx *ctx, static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *ins, pid_t pid) { - struct proc_metrics_pid_cmt *ctx; + struct proc_metrics_pid_cmt *proc; - ctx = flb_calloc(1, sizeof(struct proc_metrics_pid_cmt)); - if (ctx == NULL) { + proc = flb_calloc(1, sizeof(struct proc_metrics_pid_cmt)); + if (proc == NULL) { return NULL; } - ctx->pid = pid; + proc->pid = pid; - ctx->cmt = cmt_create(); - if (!ctx->cmt) { + proc->cmt = cmt_create(); + if (!proc->cmt) { flb_plg_error(ins, "could not initialize CMetrics"); goto cmt_error; } - ctx->rchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "rchar", + proc->rchar = cmt_counter_create(proc->cmt, "proc_metrics", "io", "rchar", "The number of bytes which this task has " "caused to be read from storage.", 1, (char *[]) {"pid"}); - if (ctx->rchar == NULL) { + if (proc->rchar == NULL) { flb_plg_error(ins, "could not initialize rchar counter"); goto cmt_counter_error; } - ctx->wchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "wchar", + proc->wchar = cmt_counter_create(proc->cmt, "proc_metrics", "io", "wchar", "The number of bytes which this task has " "caused, or shall cause to be written to " "disk.", 1, (char *[]) {"pid"}); - if (ctx->wchar == NULL) { + if (proc->wchar == NULL) { flb_plg_error(ins, "could not initialize wchar counter"); goto cmt_counter_error; } - ctx->syscr = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "syscr", + proc->syscr = cmt_counter_create(proc->cmt, "proc_metrics", "io", "syscr", "Attempt to count the number of read I/O " "operations, i.e. syscalls like read() and " "pread().", 1, (char *[]) {"pid"}); - if (ctx->syscr == NULL) { + if (proc->syscr == NULL) { flb_plg_error(ins, "could not initialize syscr counter"); goto cmt_counter_error; } - ctx->syscw = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "syscw", + proc->syscw = cmt_counter_create(proc->cmt, "proc_metrics", "io", "syscw", "Attempt to count the number of write I/O " "operations, i.e. syscalls like write() and " "pwrite().", 1, (char *[]) {"pid"}); - if (ctx->syscw == NULL) { + if (proc->syscw == NULL) { flb_plg_error(ins, "could not initialize syscw counter"); goto cmt_counter_error; } - ctx->read_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "read_bytes", + proc->read_bytes = cmt_counter_create(proc->cmt, "proc_metrics", "io", "read_bytes", "Attempt to count the number of bytes " "which this process really did cause to" " be fetched from the storage layer.", 1, (char *[]) {"pid"}); - if (ctx->read_bytes == NULL) { + if (proc->read_bytes == NULL) { flb_plg_error(ins, "could not initialize read_bytes counter"); goto cmt_counter_error; } - ctx->write_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "write_bytes", + proc->write_bytes = cmt_counter_create(proc->cmt, "proc_metrics", "io", "write_bytes", "Attempt to count the number of bytes " "which this process caused to be sent " "to the storage layer.", 1, (char *[]) {"pid"}); - if (ctx->write_bytes == NULL) { + if (proc->write_bytes == NULL) { flb_plg_error(ins, "could not initialize write_bytes counter"); goto cmt_counter_error; } - ctx->cancelled_write_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", + proc->cancelled_write_bytes = cmt_counter_create(proc->cmt, "proc_metrics", "io", "cancelled_write_bytes", "The number of bytes which " "this process caused to not " "happen, by truncating " "pagecache.", 1, (char *[]) {"pid"}); - if (ctx->cancelled_write_bytes == NULL) { + if (proc->cancelled_write_bytes == NULL) { flb_plg_error(ins, "could not initialize cancelled_write_bytes counter"); goto cmt_counter_error; } - ctx->size = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "size", + proc->size = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "size", "total program size (pages).", 1, (char *[]) {"pid"}); - if (ctx->size == NULL) { + if (proc->size == NULL) { flb_plg_error(ins, "could not initialize size gauge"); goto cmt_gauge_error; } - ctx->resident = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "resident", + proc->resident = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "resident", "size of memory portions (pages).", 1, (char *[]) {"pid"}); - if (ctx->resident == NULL) { + if (proc->resident == NULL) { flb_plg_error(ins, "could not initialize resident gauge"); goto cmt_gauge_error; } - ctx->shared = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "shared", + proc->shared = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "shared", "number of pages that are shared.", 1, (char *[]) {"pid"}); - if (ctx->shared == NULL) { + if (proc->shared == NULL) { flb_plg_error(ins, "could not initialize shared gauge"); goto cmt_gauge_error; } - ctx->trs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "trs", + proc->trs = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "trs", "number of pages that are ‘code’.", 1, (char *[]) {"pid"}); - if (ctx->trs == NULL) { + if (proc->trs == NULL) { flb_plg_error(ins, "could not initialize trs gauge"); goto cmt_gauge_error; } - ctx->lrs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "lrs", + proc->lrs = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "lrs", "number of pages of library.", 1, (char *[]) {"pid"}); - if (ctx->lrs == NULL) { + if (proc->lrs == NULL) { flb_plg_error(ins, "could not initialize lrs gauge"); goto cmt_gauge_error; } - ctx->drs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "drs", + proc->drs = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "drs", "number of pages of data/stack.", 1, (char *[]) {"pid"}); - if (ctx->drs == NULL) { + if (proc->drs == NULL) { flb_plg_error(ins, "could not initialize drs gauge"); goto cmt_gauge_error; } - ctx->dt = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "dt", + proc->dt = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "dt", "number of dirty pages.", 1, (char *[]) {"pid"}); - if (ctx->dt == NULL) { + if (proc->dt == NULL) { flb_plg_error(ins, "could not initialize dt gauge"); goto cmt_gauge_error; } - return ctx; + return proc; cmt_gauge_error: - if (ctx->size != NULL) { - cmt_gauge_destroy(ctx->size); + if (proc->size != NULL) { + cmt_gauge_destroy(proc->size); } cmt_counter_error: - if (ctx->rchar != NULL) { - cmt_counter_destroy(ctx->rchar); + if (proc->rchar != NULL) { + cmt_counter_destroy(proc->rchar); } - if (ctx->wchar != NULL) { - cmt_counter_destroy(ctx->wchar); + if (proc->wchar != NULL) { + cmt_counter_destroy(proc->wchar); } - if (ctx->syscr != NULL) { - cmt_counter_destroy(ctx->syscr); + if (proc->syscr != NULL) { + cmt_counter_destroy(proc->syscr); } - if (ctx->syscw != NULL) { - cmt_counter_destroy(ctx->syscw); + if (proc->syscw != NULL) { + cmt_counter_destroy(proc->syscw); } - if (ctx->read_bytes != NULL) { - cmt_counter_destroy(ctx->read_bytes); + if (proc->read_bytes != NULL) { + cmt_counter_destroy(proc->read_bytes); } - if (ctx->write_bytes != NULL) { - cmt_counter_destroy(ctx->write_bytes); + if (proc->write_bytes != NULL) { + cmt_counter_destroy(proc->write_bytes); } - if (ctx->cancelled_write_bytes != NULL) { - cmt_counter_destroy(ctx->cancelled_write_bytes); + if (proc->cancelled_write_bytes != NULL) { + cmt_counter_destroy(proc->cancelled_write_bytes); } - cmt_destroy(ctx->cmt); + cmt_destroy(proc->cmt); cmt_error: - flb_free(ctx); + flb_free(proc); return NULL; } From b0c76e0585bd92e9c5846ab9b309205e5304a293 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Sat, 11 Sep 2021 14:45:37 -0300 Subject: [PATCH 05/21] in_proc_metrics: simplify code that selects pid to collect. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index ffe07d72f38..4f450b10c17 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -436,10 +436,8 @@ static int proc_metrics_collect(struct flb_input_instance *ins, if (ret == -1) { return -1; } - ctx->pid = ret; - } - - if (ctx->pid > 0) { + metrics = get_proc_metrics(ctx, ret); + } else if (ctx->pid > 0) { metrics = get_proc_metrics(ctx, ctx->pid); } else { metrics = get_proc_metrics(ctx, getpid()); From 184c78e0d651f8fd6b0da256a44fd3a8641d2688 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 13 Sep 2021 18:42:40 -0300 Subject: [PATCH 06/21] in_proc_metrics: work on lists of processes, WIP. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 186 +++++++++++++++++++------ plugins/in_proc_metrics/proc_metrics.h | 5 + 2 files changed, 149 insertions(+), 42 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 4f450b10c17..543a5db6006 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -221,6 +221,92 @@ static pid_t get_pid_from_procname_linux(struct proc_metrics_ctx *ctx, return ret; } +static struct mk_list *get_proc_entries_from_procname_linux(struct proc_metrics_ctx *ctx, + const char* proc) +{ + struct mk_list *pids; + struct proc_entry *entry; + glob_t glb; + int i; + int fd = -1; + long ret_scan = -1; + int ret_glb = -1; + ssize_t count; + + char cmdname[FLB_CMD_LEN]; + char* bname = NULL; + + pids = flb_calloc(1, sizeof(struct mk_list)); + if (pids == NULL) { + return NULL; + } + mk_list_init(pids); + + ret_glb = glob("/proc/*/cmdline", 0 ,NULL, &glb); + if (ret_glb != 0) { + switch(ret_glb){ + case GLOB_NOSPACE: + flb_plg_warn(ctx->ins, "glob: no space"); + break; + case GLOB_NOMATCH: + flb_plg_warn(ctx->ins, "glob: no match"); + break; + case GLOB_ABORTED: + flb_plg_warn(ctx->ins, "glob: aborted"); + break; + default: + flb_plg_warn(ctx->ins, "glob: other error"); + } + goto glob_error; + } + + for (i = 0; i < glb.gl_pathc; i++) { + fd = open(glb.gl_pathv[i], O_RDONLY); + if (fd < 0) { + continue; + } + count = read(fd, &cmdname, FLB_CMD_LEN); + if (count <= 0){ + close(fd); + continue; + } + cmdname[FLB_CMD_LEN-1] = '\0'; + bname = basename(cmdname); + + if (strncmp(proc, bname, FLB_CMD_LEN) == 0) { + sscanf(glb.gl_pathv[i],"/proc/%ld/cmdline",&ret_scan); + entry = flb_calloc(1, sizeof(struct proc_entry)); + if (entry == NULL) { + goto proc_entry_error; + } + entry->pid = (pid_t)ret_scan; + mk_list_add(&entry->_head, pids); + } + close(fd); + } + globfree(&glb); + return pids; +proc_entry_error: + globfree(&glb); +glob_error: + flb_free(pids); + return NULL; +} + +static void proc_entries_free(struct mk_list *procs) +{ + struct mk_list *head; + struct mk_list *tmp; + struct proc_entry *entry; + + mk_list_foreach_safe(head, tmp, procs) { + entry = mk_list_entry(head, struct proc_entry, _head); + flb_free(entry); + } + + flb_free(procs); +} + static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *ins, pid_t pid) { struct proc_metrics_pid_cmt *proc; @@ -430,6 +516,8 @@ static int proc_metrics_collect(struct flb_input_instance *ins, char pid[64]; int ret; struct proc_metrics_pid_cmt *metrics; + struct mk_list *head; + struct mk_list *tmp; if (ctx->proc_name != NULL) { ret = get_pid_from_procname_linux(ctx, ctx->proc_name); @@ -443,52 +531,66 @@ static int proc_metrics_collect(struct flb_input_instance *ins, metrics = get_proc_metrics(ctx, getpid()); } - if (read_stat_file(metrics->pid, "io", buf, sizeof(buf)-1, 7) == -1) { - return -1; - } + mk_list_foreach_safe(head, tmp, &ctx->procs) { + metrics = mk_list_entry(head, struct proc_metrics_pid_cmt, _head); + if (read_stat_file(metrics->pid, "io", buf, sizeof(buf)-1, 7) == -1) { + if (errno == ENOENT) { + mk_list_del(&metrics->_head); + } else { + flb_errno(); + } + flb_free(metrics); + continue; + } - if (parse_proc_io(buf, &status.io) != 0) { - flb_free(buf); - return -1; - } + if (parse_proc_io(buf, &status.io) != 0) { + continue; + } - if (read_stat_file(metrics->pid, "statm", buf, sizeof(buf)-1, 1) == -1) { - return -1; - } + if (read_stat_file(metrics->pid, "statm", buf, sizeof(buf)-1, 1) == -1) { + if (errno == ENOENT) { + mk_list_del(&metrics->_head); + } else { + flb_errno(); + } + flb_free(metrics); + continue; + } - if (parse_proc_mem(buf, &status.mem) != 0) { - return -1; - } + if (parse_proc_mem(buf, &status.mem) != 0) { + continue; + } - if (metrics->pid == 0) { - snprintf(pid, sizeof(pid)-1, "%d", getpid()); - } else { - snprintf(pid, sizeof(pid)-1, "%d", metrics->pid); - } - - cmt_counter_set(metrics->rchar, ts, (double)status.io.rchar, 1, (char *[]) {(char *) pid}); - cmt_counter_set(metrics->wchar, ts, (double)status.io.wchar, 1, (char *[]) {(char *) pid}); - cmt_counter_set(metrics->syscr, ts, (double)status.io.syscr, 1, (char *[]) {(char *) pid}); - cmt_counter_set(metrics->syscw, ts, (double)status.io.syscw, 1, (char *[]) {(char *) pid}); - cmt_counter_set(metrics->read_bytes, ts, (double)status.io.read_bytes, - 1, (char *[]) {(char *) pid}); - cmt_counter_set(metrics->write_bytes, ts, (double)status.io.write_bytes, - 1, (char *[]) {(char *) pid}); - cmt_counter_set(metrics->cancelled_write_bytes, ts, - (double)status.io.cancelled_write_bytes, 1, (char *[]) {(char *) pid}); - - cmt_gauge_set(metrics->size, ts, (double)status.mem.size, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->resident, ts, (double)status.mem.resident, - 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->shared, ts, (double)status.mem.shared, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->trs, ts, (double)status.mem.trs, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->lrs, ts, (double)status.mem.lrs, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->drs, ts, (double)status.mem.drs, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->dt, ts, (double)status.mem.dt, 1, (char *[]) {(char *) pid}); - - ret = flb_input_metrics_append(ins, NULL, 0, metrics->cmt); - if (ret != 0) { - flb_plg_error(ins, "could not append metrics"); + if (metrics->pid == 0) { + snprintf(pid, sizeof(pid)-1, "%d", getpid()); + } else { + snprintf(pid, sizeof(pid)-1, "%d", metrics->pid); + } + + cmt_counter_set(metrics->rchar, ts, (double)status.io.rchar, 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->wchar, ts, (double)status.io.wchar, 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->syscr, ts, (double)status.io.syscr, 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->syscw, ts, (double)status.io.syscw, 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->read_bytes, ts, (double)status.io.read_bytes, + 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->write_bytes, ts, (double)status.io.write_bytes, + 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->cancelled_write_bytes, ts, + (double)status.io.cancelled_write_bytes, 1, (char *[]) {(char *) pid}); + + cmt_gauge_set(metrics->size, ts, (double)status.mem.size, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->resident, ts, (double)status.mem.resident, + 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->shared, ts, (double)status.mem.shared, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->trs, ts, (double)status.mem.trs, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->lrs, ts, (double)status.mem.lrs, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->drs, ts, (double)status.mem.drs, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->dt, ts, (double)status.mem.dt, 1, (char *[]) {(char *) pid}); + + ret = flb_input_metrics_append(ins, NULL, 0, metrics->cmt); + if (ret != 0) { + flb_plg_error(ins, "could not append metrics"); + } } return ret; } diff --git a/plugins/in_proc_metrics/proc_metrics.h b/plugins/in_proc_metrics/proc_metrics.h index 6d334842cc6..57de981afab 100644 --- a/plugins/in_proc_metrics/proc_metrics.h +++ b/plugins/in_proc_metrics/proc_metrics.h @@ -37,6 +37,11 @@ struct proc_metrics_ctx struct mk_list procs; }; +struct proc_entry { + pid_t pid; + struct mk_list _head; +}; + struct proc_metrics_pid_cmt { pid_t pid; struct cmt *cmt; From e08adcab5b700d6d3630c4109ed41c73822a18b2 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 20 Sep 2021 18:12:49 -0300 Subject: [PATCH 07/21] in_proc_metrics: label metrics with the process cmdline. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 59 +++++++++++++------------- plugins/in_proc_metrics/proc_metrics.h | 3 ++ 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 543a5db6006..0fd77883b2f 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -32,8 +32,6 @@ #include "proc_metrics.h" -#define FLB_CMD_LEN 256 - /* rchar: 260189 * wchar: 413454 * syscr: 2036 @@ -317,6 +315,7 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *in } proc->pid = pid; + read_stat_file(pid, "cmdline", proc->cmdline, FLB_CMD_LEN-1, 1); proc->cmt = cmt_create(); if (!proc->cmt) { @@ -326,7 +325,7 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *in proc->rchar = cmt_counter_create(proc->cmt, "proc_metrics", "io", "rchar", "The number of bytes which this task has " - "caused to be read from storage.", 1, (char *[]) {"pid"}); + "caused to be read from storage.", 2, (char *[]) {"pid", "cmdline"}); if (proc->rchar == NULL) { flb_plg_error(ins, "could not initialize rchar counter"); goto cmt_counter_error; @@ -335,7 +334,7 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *in proc->wchar = cmt_counter_create(proc->cmt, "proc_metrics", "io", "wchar", "The number of bytes which this task has " "caused, or shall cause to be written to " - "disk.", 1, (char *[]) {"pid"}); + "disk.", 2, (char *[]) {"pid", "cmdline"}); if (proc->wchar == NULL) { flb_plg_error(ins, "could not initialize wchar counter"); goto cmt_counter_error; @@ -344,7 +343,7 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *in proc->syscr = cmt_counter_create(proc->cmt, "proc_metrics", "io", "syscr", "Attempt to count the number of read I/O " "operations, i.e. syscalls like read() and " - "pread().", 1, (char *[]) {"pid"}); + "pread().", 2, (char *[]) {"pid", "cmdline"}); if (proc->syscr == NULL) { flb_plg_error(ins, "could not initialize syscr counter"); goto cmt_counter_error; @@ -353,7 +352,7 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *in proc->syscw = cmt_counter_create(proc->cmt, "proc_metrics", "io", "syscw", "Attempt to count the number of write I/O " "operations, i.e. syscalls like write() and " - "pwrite().", 1, (char *[]) {"pid"}); + "pwrite().", 2, (char *[]) {"pid", "cmdline"}); if (proc->syscw == NULL) { flb_plg_error(ins, "could not initialize syscw counter"); goto cmt_counter_error; @@ -363,7 +362,7 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *in "Attempt to count the number of bytes " "which this process really did cause to" " be fetched from the storage layer.", - 1, (char *[]) {"pid"}); + 2, (char *[]) {"pid", "cmdline"}); if (proc->read_bytes == NULL) { flb_plg_error(ins, "could not initialize read_bytes counter"); goto cmt_counter_error; @@ -372,7 +371,7 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *in proc->write_bytes = cmt_counter_create(proc->cmt, "proc_metrics", "io", "write_bytes", "Attempt to count the number of bytes " "which this process caused to be sent " - "to the storage layer.", 1, (char *[]) {"pid"}); + "to the storage layer.", 2, (char *[]) {"pid", "cmdline"}); if (proc->write_bytes == NULL) { flb_plg_error(ins, "could not initialize write_bytes counter"); goto cmt_counter_error; @@ -383,56 +382,56 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *in "The number of bytes which " "this process caused to not " "happen, by truncating " - "pagecache.", 1, (char *[]) {"pid"}); + "pagecache.", 2, (char *[]) {"pid", "cmdline"}); if (proc->cancelled_write_bytes == NULL) { flb_plg_error(ins, "could not initialize cancelled_write_bytes counter"); goto cmt_counter_error; } proc->size = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "size", - "total program size (pages).", 1, (char *[]) {"pid"}); + "total program size (pages).", 2, (char *[]) {"pid", "cmdline"}); if (proc->size == NULL) { flb_plg_error(ins, "could not initialize size gauge"); goto cmt_gauge_error; } proc->resident = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "resident", - "size of memory portions (pages).", 1, (char *[]) {"pid"}); + "size of memory portions (pages).", 2, (char *[]) {"pid", "cmdline"}); if (proc->resident == NULL) { flb_plg_error(ins, "could not initialize resident gauge"); goto cmt_gauge_error; } proc->shared = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "shared", - "number of pages that are shared.", 1, (char *[]) {"pid"}); + "number of pages that are shared.", 2, (char *[]) {"pid", "cmdline"}); if (proc->shared == NULL) { flb_plg_error(ins, "could not initialize shared gauge"); goto cmt_gauge_error; } proc->trs = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "trs", - "number of pages that are ‘code’.", 1, (char *[]) {"pid"}); + "number of pages that are ‘code’.", 2, (char *[]) {"pid", "cmdline"}); if (proc->trs == NULL) { flb_plg_error(ins, "could not initialize trs gauge"); goto cmt_gauge_error; } proc->lrs = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "lrs", - "number of pages of library.", 1, (char *[]) {"pid"}); + "number of pages of library.", 2, (char *[]) {"pid", "cmdline"}); if (proc->lrs == NULL) { flb_plg_error(ins, "could not initialize lrs gauge"); goto cmt_gauge_error; } proc->drs = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "drs", - "number of pages of data/stack.", 1, (char *[]) {"pid"}); + "number of pages of data/stack.", 2, (char *[]) {"pid", "cmdline"}); if (proc->drs == NULL) { flb_plg_error(ins, "could not initialize drs gauge"); goto cmt_gauge_error; } proc->dt = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "dt", - "number of dirty pages.", 1, (char *[]) {"pid"}); + "number of dirty pages.", 2, (char *[]) {"pid", "cmdline"}); if (proc->dt == NULL) { flb_plg_error(ins, "could not initialize dt gauge"); goto cmt_gauge_error; @@ -567,25 +566,25 @@ static int proc_metrics_collect(struct flb_input_instance *ins, snprintf(pid, sizeof(pid)-1, "%d", metrics->pid); } - cmt_counter_set(metrics->rchar, ts, (double)status.io.rchar, 1, (char *[]) {(char *) pid}); - cmt_counter_set(metrics->wchar, ts, (double)status.io.wchar, 1, (char *[]) {(char *) pid}); - cmt_counter_set(metrics->syscr, ts, (double)status.io.syscr, 1, (char *[]) {(char *) pid}); - cmt_counter_set(metrics->syscw, ts, (double)status.io.syscw, 1, (char *[]) {(char *) pid}); + cmt_counter_set(metrics->rchar, ts, (double)status.io.rchar, 2, (char *[]) {pid, metrics->cmdline}); + cmt_counter_set(metrics->wchar, ts, (double)status.io.wchar, 2, (char *[]) {pid, metrics->cmdline}); + cmt_counter_set(metrics->syscr, ts, (double)status.io.syscr, 2, (char *[]) {pid, metrics->cmdline}); + cmt_counter_set(metrics->syscw, ts, (double)status.io.syscw, 2, (char *[]) {pid, metrics->cmdline}); cmt_counter_set(metrics->read_bytes, ts, (double)status.io.read_bytes, - 1, (char *[]) {(char *) pid}); + 2, (char *[]) {pid, metrics->cmdline}); cmt_counter_set(metrics->write_bytes, ts, (double)status.io.write_bytes, - 1, (char *[]) {(char *) pid}); + 2, (char *[]) {pid, metrics->cmdline}); cmt_counter_set(metrics->cancelled_write_bytes, ts, - (double)status.io.cancelled_write_bytes, 1, (char *[]) {(char *) pid}); + (double)status.io.cancelled_write_bytes, 2, (char *[]) {pid, metrics->cmdline}); - cmt_gauge_set(metrics->size, ts, (double)status.mem.size, 1, (char *[]) {(char *) pid}); + cmt_gauge_set(metrics->size, ts, (double)status.mem.size, 2, (char *[]) {pid, metrics->cmdline}); cmt_gauge_set(metrics->resident, ts, (double)status.mem.resident, - 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->shared, ts, (double)status.mem.shared, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->trs, ts, (double)status.mem.trs, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->lrs, ts, (double)status.mem.lrs, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->drs, ts, (double)status.mem.drs, 1, (char *[]) {(char *) pid}); - cmt_gauge_set(metrics->dt, ts, (double)status.mem.dt, 1, (char *[]) {(char *) pid}); + 2, (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->shared, ts, (double)status.mem.shared, 2, (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->trs, ts, (double)status.mem.trs, 2, (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->lrs, ts, (double)status.mem.lrs, 2, (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->drs, ts, (double)status.mem.drs, 2, (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->dt, ts, (double)status.mem.dt, 2, (char *[]) {pid, metrics->cmdline}); ret = flb_input_metrics_append(ins, NULL, 0, metrics->cmt); if (ret != 0) { diff --git a/plugins/in_proc_metrics/proc_metrics.h b/plugins/in_proc_metrics/proc_metrics.h index 57de981afab..74734b67c37 100644 --- a/plugins/in_proc_metrics/proc_metrics.h +++ b/plugins/in_proc_metrics/proc_metrics.h @@ -37,6 +37,8 @@ struct proc_metrics_ctx struct mk_list procs; }; +#define FLB_CMD_LEN 256 + struct proc_entry { pid_t pid; struct mk_list _head; @@ -44,6 +46,7 @@ struct proc_entry { struct proc_metrics_pid_cmt { pid_t pid; + char cmdline[FLB_CMD_LEN]; struct cmt *cmt; /* rchar: 260189 * wchar: 413454 From 65ddac84f7f8b76c497f4440380c864ebe64e4cc Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 20 Sep 2021 18:13:39 -0300 Subject: [PATCH 08/21] in_proc_metrics: free process metrics on shutdown, cmt when process disappears. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 0fd77883b2f..8392e8d70f1 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -538,6 +538,7 @@ static int proc_metrics_collect(struct flb_input_instance *ins, } else { flb_errno(); } + cmt_destroy(metrics->cmt); flb_free(metrics); continue; } @@ -552,6 +553,7 @@ static int proc_metrics_collect(struct flb_input_instance *ins, } else { flb_errno(); } + cmt_destroy(metrics->cmt); flb_free(metrics); continue; } @@ -669,6 +671,15 @@ static int proc_metrics_init(struct flb_input_instance *ins, */ static int proc_metrics_ctx_destroy(struct proc_metrics_ctx *ctx) { + struct proc_metrics_pid_cmt *metrics; + struct mk_list *head; + struct mk_list *tmp; + + mk_list_foreach_safe(head, tmp, &ctx->procs) { + metrics = mk_list_entry(head, struct proc_metrics_pid_cmt, _head); + cmt_destroy(metrics->cmt); + flb_free(metrics); + } flb_free(ctx); return 0; } From 2471ccd8ba76e963ee0effae86f8e9a196f026d7 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 20 Sep 2021 18:20:35 -0300 Subject: [PATCH 09/21] in_proc_metrics: find all processes when using process name. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 8392e8d70f1..840bb40e651 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -517,13 +517,19 @@ static int proc_metrics_collect(struct flb_input_instance *ins, struct proc_metrics_pid_cmt *metrics; struct mk_list *head; struct mk_list *tmp; + struct mk_list *procs; + struct proc_entry *proc; if (ctx->proc_name != NULL) { - ret = get_pid_from_procname_linux(ctx, ctx->proc_name); - if (ret == -1) { - return -1; + procs = get_proc_entries_from_procname_linux(ctx, ctx->proc_name); + if (procs == NULL) { + return 0; + } + mk_list_foreach_safe(head, tmp, procs) { + proc = mk_list_entry(head, struct proc_entry, _head); + metrics = get_proc_metrics(ctx, proc->pid); } - metrics = get_proc_metrics(ctx, ret); + proc_entries_free(procs); } else if (ctx->pid > 0) { metrics = get_proc_metrics(ctx, ctx->pid); } else { From a18803dec10735634acec666b410feee31cfe577 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 20 Sep 2021 18:40:59 -0300 Subject: [PATCH 10/21] in_proc_metrics: refactor code to use a single cmetrics context. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 118 ++++++++++++++----------- plugins/in_proc_metrics/proc_metrics.h | 2 +- 2 files changed, 69 insertions(+), 51 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 840bb40e651..4959d8d7179 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -305,7 +305,28 @@ static void proc_entries_free(struct mk_list *procs) flb_free(procs); } -static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *ins, pid_t pid) +static void proc_metrics_free(struct proc_metrics_pid_cmt *metrics) +{ + cmt_counter_destroy(metrics->rchar); + cmt_counter_destroy(metrics->wchar); + cmt_counter_destroy(metrics->syscr); + cmt_counter_destroy(metrics->syscw); + cmt_counter_destroy(metrics->read_bytes); + cmt_counter_destroy(metrics->write_bytes); + cmt_counter_destroy(metrics->cancelled_write_bytes); + + cmt_gauge_destroy(metrics->size); + cmt_gauge_destroy(metrics->resident); + cmt_gauge_destroy(metrics->shared); + cmt_gauge_destroy(metrics->trs); + cmt_gauge_destroy(metrics->lrs); + cmt_gauge_destroy(metrics->drs); + cmt_gauge_destroy(metrics->dt); + + flb_free(metrics); +} + +static struct proc_metrics_pid_cmt *create_pid_cmt(struct proc_metrics_ctx *ctx, pid_t pid) { struct proc_metrics_pid_cmt *proc; @@ -317,123 +338,117 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *in proc->pid = pid; read_stat_file(pid, "cmdline", proc->cmdline, FLB_CMD_LEN-1, 1); - proc->cmt = cmt_create(); - if (!proc->cmt) { - flb_plg_error(ins, "could not initialize CMetrics"); - goto cmt_error; - } - - proc->rchar = cmt_counter_create(proc->cmt, "proc_metrics", "io", "rchar", + proc->rchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "rchar", "The number of bytes which this task has " "caused to be read from storage.", 2, (char *[]) {"pid", "cmdline"}); if (proc->rchar == NULL) { - flb_plg_error(ins, "could not initialize rchar counter"); + flb_plg_error(ctx->ins, "could not initialize rchar counter"); goto cmt_counter_error; } - proc->wchar = cmt_counter_create(proc->cmt, "proc_metrics", "io", "wchar", + proc->wchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "wchar", "The number of bytes which this task has " "caused, or shall cause to be written to " "disk.", 2, (char *[]) {"pid", "cmdline"}); if (proc->wchar == NULL) { - flb_plg_error(ins, "could not initialize wchar counter"); + flb_plg_error(ctx->ins, "could not initialize wchar counter"); goto cmt_counter_error; } - proc->syscr = cmt_counter_create(proc->cmt, "proc_metrics", "io", "syscr", + proc->syscr = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "syscr", "Attempt to count the number of read I/O " "operations, i.e. syscalls like read() and " "pread().", 2, (char *[]) {"pid", "cmdline"}); if (proc->syscr == NULL) { - flb_plg_error(ins, "could not initialize syscr counter"); + flb_plg_error(ctx->ins, "could not initialize syscr counter"); goto cmt_counter_error; } - proc->syscw = cmt_counter_create(proc->cmt, "proc_metrics", "io", "syscw", + proc->syscw = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "syscw", "Attempt to count the number of write I/O " "operations, i.e. syscalls like write() and " "pwrite().", 2, (char *[]) {"pid", "cmdline"}); if (proc->syscw == NULL) { - flb_plg_error(ins, "could not initialize syscw counter"); + flb_plg_error(ctx->ins, "could not initialize syscw counter"); goto cmt_counter_error; } - proc->read_bytes = cmt_counter_create(proc->cmt, "proc_metrics", "io", "read_bytes", + proc->read_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "read_bytes", "Attempt to count the number of bytes " "which this process really did cause to" " be fetched from the storage layer.", 2, (char *[]) {"pid", "cmdline"}); if (proc->read_bytes == NULL) { - flb_plg_error(ins, "could not initialize read_bytes counter"); + flb_plg_error(ctx->ins, "could not initialize read_bytes counter"); goto cmt_counter_error; } - proc->write_bytes = cmt_counter_create(proc->cmt, "proc_metrics", "io", "write_bytes", + proc->write_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "write_bytes", "Attempt to count the number of bytes " "which this process caused to be sent " "to the storage layer.", 2, (char *[]) {"pid", "cmdline"}); if (proc->write_bytes == NULL) { - flb_plg_error(ins, "could not initialize write_bytes counter"); + flb_plg_error(ctx->ins, "could not initialize write_bytes counter"); goto cmt_counter_error; } - proc->cancelled_write_bytes = cmt_counter_create(proc->cmt, "proc_metrics", "io", + proc->cancelled_write_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "cancelled_write_bytes", "The number of bytes which " "this process caused to not " "happen, by truncating " "pagecache.", 2, (char *[]) {"pid", "cmdline"}); if (proc->cancelled_write_bytes == NULL) { - flb_plg_error(ins, "could not initialize cancelled_write_bytes counter"); + flb_plg_error(ctx->ins, "could not initialize cancelled_write_bytes counter"); goto cmt_counter_error; } - proc->size = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "size", + proc->size = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "size", "total program size (pages).", 2, (char *[]) {"pid", "cmdline"}); if (proc->size == NULL) { - flb_plg_error(ins, "could not initialize size gauge"); + flb_plg_error(ctx->ins, "could not initialize size gauge"); goto cmt_gauge_error; } - proc->resident = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "resident", + proc->resident = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "resident", "size of memory portions (pages).", 2, (char *[]) {"pid", "cmdline"}); if (proc->resident == NULL) { - flb_plg_error(ins, "could not initialize resident gauge"); + flb_plg_error(ctx->ins, "could not initialize resident gauge"); goto cmt_gauge_error; } - proc->shared = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "shared", + proc->shared = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "shared", "number of pages that are shared.", 2, (char *[]) {"pid", "cmdline"}); if (proc->shared == NULL) { - flb_plg_error(ins, "could not initialize shared gauge"); + flb_plg_error(ctx->ins, "could not initialize shared gauge"); goto cmt_gauge_error; } - proc->trs = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "trs", + proc->trs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "trs", "number of pages that are ‘code’.", 2, (char *[]) {"pid", "cmdline"}); if (proc->trs == NULL) { - flb_plg_error(ins, "could not initialize trs gauge"); + flb_plg_error(ctx->ins, "could not initialize trs gauge"); goto cmt_gauge_error; } - proc->lrs = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "lrs", + proc->lrs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "lrs", "number of pages of library.", 2, (char *[]) {"pid", "cmdline"}); if (proc->lrs == NULL) { - flb_plg_error(ins, "could not initialize lrs gauge"); + flb_plg_error(ctx->ins, "could not initialize lrs gauge"); goto cmt_gauge_error; } - proc->drs = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "drs", + proc->drs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "drs", "number of pages of data/stack.", 2, (char *[]) {"pid", "cmdline"}); if (proc->drs == NULL) { - flb_plg_error(ins, "could not initialize drs gauge"); + flb_plg_error(ctx->ins, "could not initialize drs gauge"); goto cmt_gauge_error; } - proc->dt = cmt_gauge_create(proc->cmt, "proc_metrics", "mem", "dt", + proc->dt = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "dt", "number of dirty pages.", 2, (char *[]) {"pid", "cmdline"}); if (proc->dt == NULL) { - flb_plg_error(ins, "could not initialize dt gauge"); + flb_plg_error(ctx->ins, "could not initialize dt gauge"); goto cmt_gauge_error; } @@ -464,13 +479,11 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct flb_input_instance *in if (proc->cancelled_write_bytes != NULL) { cmt_counter_destroy(proc->cancelled_write_bytes); } - cmt_destroy(proc->cmt); -cmt_error: flb_free(proc); return NULL; } -struct proc_metrics_pid_cmt *get_proc_metrics(struct proc_metrics_ctx *ctx, pid_t pid) +static struct proc_metrics_pid_cmt *get_proc_metrics(struct proc_metrics_ctx *ctx, pid_t pid) { struct mk_list *tmp; struct mk_list *head; @@ -489,7 +502,7 @@ struct proc_metrics_pid_cmt *get_proc_metrics(struct proc_metrics_ctx *ctx, pid_ } } - proc = create_pid_cmt(ctx->ins, pid); + proc = create_pid_cmt(ctx, pid); mk_list_add(&proc->_head, &ctx->procs); return proc; } @@ -544,8 +557,7 @@ static int proc_metrics_collect(struct flb_input_instance *ins, } else { flb_errno(); } - cmt_destroy(metrics->cmt); - flb_free(metrics); + proc_metrics_free(metrics); continue; } @@ -559,8 +571,7 @@ static int proc_metrics_collect(struct flb_input_instance *ins, } else { flb_errno(); } - cmt_destroy(metrics->cmt); - flb_free(metrics); + proc_metrics_free(metrics); continue; } @@ -594,10 +605,11 @@ static int proc_metrics_collect(struct flb_input_instance *ins, cmt_gauge_set(metrics->drs, ts, (double)status.mem.drs, 2, (char *[]) {pid, metrics->cmdline}); cmt_gauge_set(metrics->dt, ts, (double)status.mem.dt, 2, (char *[]) {pid, metrics->cmdline}); - ret = flb_input_metrics_append(ins, NULL, 0, metrics->cmt); - if (ret != 0) { - flb_plg_error(ins, "could not append metrics"); - } + flb_plg_info(ctx->ins, "submit metrics for pid=%d", metrics->pid); + } + ret = flb_input_metrics_append(ins, NULL, 0, ctx->cmt); + if (ret != 0) { + flb_plg_error(ins, "could not append metrics"); } return ret; } @@ -639,10 +651,15 @@ static int proc_metrics_init(struct flb_input_instance *ins, } ctx->ins = ins; + ctx->cmt = cmt_create(); + if (!ctx->cmt) { + flb_plg_error(ins, "could not initialize CMetrics"); + goto cmt_error; + } + ret = flb_input_config_map_set(ins, (void *)ctx); if (ret == -1) { - flb_free(ctx); - return -1; + goto cmt_error; } /* save the PID just once if the process is numeric */ @@ -683,9 +700,10 @@ static int proc_metrics_ctx_destroy(struct proc_metrics_ctx *ctx) mk_list_foreach_safe(head, tmp, &ctx->procs) { metrics = mk_list_entry(head, struct proc_metrics_pid_cmt, _head); - cmt_destroy(metrics->cmt); + flb_plg_debug(ctx->ins, "free metrics=%p:%d", metrics, metrics->pid); flb_free(metrics); } + cmt_destroy(ctx->cmt); flb_free(ctx); return 0; } diff --git a/plugins/in_proc_metrics/proc_metrics.h b/plugins/in_proc_metrics/proc_metrics.h index 74734b67c37..20197a03e92 100644 --- a/plugins/in_proc_metrics/proc_metrics.h +++ b/plugins/in_proc_metrics/proc_metrics.h @@ -35,6 +35,7 @@ struct proc_metrics_ctx struct flb_parser *parser; struct flb_input_instance *ins; /* Input plugin instace */ struct mk_list procs; + struct cmt *cmt; }; #define FLB_CMD_LEN 256 @@ -47,7 +48,6 @@ struct proc_entry { struct proc_metrics_pid_cmt { pid_t pid; char cmdline[FLB_CMD_LEN]; - struct cmt *cmt; /* rchar: 260189 * wchar: 413454 * syscr: 2036 From 1b1bc2cad198e641989b20228cf3c1efcc70b429 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 20 Sep 2021 18:41:53 -0300 Subject: [PATCH 11/21] in_proc_metrics: free gauges in process metrics initialization. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 4959d8d7179..54fa781d1a3 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -457,6 +457,24 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct proc_metrics_ctx *ctx, if (proc->size != NULL) { cmt_gauge_destroy(proc->size); } + if (proc->resident != NULL) { + cmt_gauge_destroy(proc->resident); + } + if (proc->shared != NULL) { + cmt_gauge_destroy(proc->shared); + } + if (proc->trs != NULL) { + cmt_gauge_destroy(proc->trs); + } + if (proc->lrs != NULL) { + cmt_gauge_destroy(proc->lrs); + } + if (proc->drs != NULL) { + cmt_gauge_destroy(proc->drs); + } + if (proc->dt != NULL) { + cmt_gauge_destroy(proc->dt); + } cmt_counter_error: if (proc->rchar != NULL) { cmt_counter_destroy(proc->rchar); From 3c4232cb492eca6ac4a2322334d4182b9a97cdfd Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 20 Sep 2021 18:42:46 -0300 Subject: [PATCH 12/21] in_proc_metrics: fix bug where duplicate process entries are added. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 54fa781d1a3..3f266e297bd 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -507,12 +507,6 @@ static struct proc_metrics_pid_cmt *get_proc_metrics(struct proc_metrics_ctx *ct struct mk_list *head; struct proc_metrics_pid_cmt *proc; - if (mk_list_is_empty(&ctx->procs)) { - proc = create_pid_cmt(ctx->ins, pid); - mk_list_add(&proc->_head, &ctx->procs); - return proc; - } - mk_list_foreach_safe(head, tmp, &ctx->procs) { proc = mk_list_entry(head, struct proc_metrics_pid_cmt, _head); if (proc->pid == pid) { From d1eea558c8488dcb5c83056cb95d75c2736c9b02 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 20 Sep 2021 18:50:06 -0300 Subject: [PATCH 13/21] in_proc_metrics: just use the metrics pid without pid==0 magic. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 3f266e297bd..35adcf00937 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -591,11 +591,7 @@ static int proc_metrics_collect(struct flb_input_instance *ins, continue; } - if (metrics->pid == 0) { - snprintf(pid, sizeof(pid)-1, "%d", getpid()); - } else { - snprintf(pid, sizeof(pid)-1, "%d", metrics->pid); - } + snprintf(pid, sizeof(pid)-1, "%d", metrics->pid); cmt_counter_set(metrics->rchar, ts, (double)status.io.rchar, 2, (char *[]) {pid, metrics->cmdline}); cmt_counter_set(metrics->wchar, ts, (double)status.io.wchar, 2, (char *[]) {pid, metrics->cmdline}); From 92980ed124789f8d3186913145d950fcd0c131e7 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 20 Sep 2021 18:53:23 -0300 Subject: [PATCH 14/21] in_proc_metrics: only register on linux. Signed-off-by: Phillip Whelan --- plugins/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index ddeb76eae7b..14118a8e59a 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -170,6 +170,7 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") REGISTER_IN_PLUGIN("in_docker") REGISTER_IN_PLUGIN("in_docker_events") REGISTER_IN_PLUGIN("in_node_exporter_metrics") + REGISTER_IN_PLUGIN("in_proc_metrics") endif() REGISTER_IN_PLUGIN("in_fluentbit_metrics") @@ -182,7 +183,6 @@ REGISTER_IN_PLUGIN("in_http") REGISTER_IN_PLUGIN("in_collectd") REGISTER_IN_PLUGIN("in_statsd") REGISTER_IN_PLUGIN("in_storage_backlog") -REGISTER_IN_PLUGIN("in_proc_metrics") if (FLB_STREAM_PROCESSOR) REGISTER_IN_PLUGIN("in_stream_processor") From 9727c34d400d6dc2542d883b04e4420805211551 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Mon, 20 Sep 2021 18:53:46 -0300 Subject: [PATCH 15/21] in_proc_metrics: change submit log message to debug priority. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 35adcf00937..3175368c194 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -613,7 +613,7 @@ static int proc_metrics_collect(struct flb_input_instance *ins, cmt_gauge_set(metrics->drs, ts, (double)status.mem.drs, 2, (char *[]) {pid, metrics->cmdline}); cmt_gauge_set(metrics->dt, ts, (double)status.mem.dt, 2, (char *[]) {pid, metrics->cmdline}); - flb_plg_info(ctx->ins, "submit metrics for pid=%d", metrics->pid); + flb_plg_debug(ctx->ins, "submit metrics for pid=%d", metrics->pid); } ret = flb_input_metrics_append(ins, NULL, 0, ctx->cmt); if (ret != 0) { From 29866a885ce4426bd599eaa8bbe062bcc8911f76 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Tue, 21 Sep 2021 10:59:57 -0300 Subject: [PATCH 16/21] in_proc_metrics: fix use-after-free when triggering ENOPERM on /proc/$PID/*. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 3175368c194..67b64d7a2b9 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -336,7 +336,10 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct proc_metrics_ctx *ctx, } proc->pid = pid; - read_stat_file(pid, "cmdline", proc->cmdline, FLB_CMD_LEN-1, 1); + if (read_stat_file(pid, "cmdline", proc->cmdline, FLB_CMD_LEN-1, 1) == -1) { + flb_free(proc); + return NULL; + } proc->rchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "rchar", "The number of bytes which this task has " @@ -566,10 +569,10 @@ static int proc_metrics_collect(struct flb_input_instance *ins, if (read_stat_file(metrics->pid, "io", buf, sizeof(buf)-1, 7) == -1) { if (errno == ENOENT) { mk_list_del(&metrics->_head); + proc_metrics_free(metrics); } else { flb_errno(); } - proc_metrics_free(metrics); continue; } @@ -580,10 +583,10 @@ static int proc_metrics_collect(struct flb_input_instance *ins, if (read_stat_file(metrics->pid, "statm", buf, sizeof(buf)-1, 1) == -1) { if (errno == ENOENT) { mk_list_del(&metrics->_head); + proc_metrics_free(metrics); } else { flb_errno(); } - proc_metrics_free(metrics); continue; } From e78af94fb8793222038c932baea02d03c9a123d8 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Tue, 21 Sep 2021 11:00:59 -0300 Subject: [PATCH 17/21] in_proc_metrics: indentation and alignment for multiple line statements. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 109 +++++++++++++++---------- 1 file changed, 65 insertions(+), 44 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 67b64d7a2b9..460d520d350 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -342,114 +342,124 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct proc_metrics_ctx *ctx, } proc->rchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "rchar", - "The number of bytes which this task has " - "caused to be read from storage.", 2, (char *[]) {"pid", "cmdline"}); + "The number of bytes which this task has " + "caused to be read from storage.", 2, + (char *[]) {"pid", "cmdline"}); if (proc->rchar == NULL) { flb_plg_error(ctx->ins, "could not initialize rchar counter"); goto cmt_counter_error; } proc->wchar = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "wchar", - "The number of bytes which this task has " - "caused, or shall cause to be written to " - "disk.", 2, (char *[]) {"pid", "cmdline"}); + "The number of bytes which this task has " + "caused, or shall cause to be written to " + "disk.", 2, (char *[]) {"pid", "cmdline"}); if (proc->wchar == NULL) { flb_plg_error(ctx->ins, "could not initialize wchar counter"); goto cmt_counter_error; } proc->syscr = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "syscr", - "Attempt to count the number of read I/O " - "operations, i.e. syscalls like read() and " - "pread().", 2, (char *[]) {"pid", "cmdline"}); + "Attempt to count the number of read I/O " + "operations, i.e. syscalls like read() and " + "pread().", 2, (char *[]) {"pid", "cmdline"}); if (proc->syscr == NULL) { flb_plg_error(ctx->ins, "could not initialize syscr counter"); goto cmt_counter_error; } proc->syscw = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "syscw", - "Attempt to count the number of write I/O " - "operations, i.e. syscalls like write() and " - "pwrite().", 2, (char *[]) {"pid", "cmdline"}); + "Attempt to count the number of write I/O " + "operations, i.e. syscalls like write() and " + "pwrite().", 2, (char *[]) {"pid", "cmdline"}); if (proc->syscw == NULL) { flb_plg_error(ctx->ins, "could not initialize syscw counter"); goto cmt_counter_error; } proc->read_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "read_bytes", - "Attempt to count the number of bytes " - "which this process really did cause to" - " be fetched from the storage layer.", - 2, (char *[]) {"pid", "cmdline"}); + "Attempt to count the number of bytes " + "which this process really did cause to" + " be fetched from the storage layer.", + 2, (char *[]) {"pid", "cmdline"}); if (proc->read_bytes == NULL) { flb_plg_error(ctx->ins, "could not initialize read_bytes counter"); goto cmt_counter_error; } proc->write_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", "write_bytes", - "Attempt to count the number of bytes " - "which this process caused to be sent " - "to the storage layer.", 2, (char *[]) {"pid", "cmdline"}); + "Attempt to count the number of bytes " + "which this process caused to be sent " + "to the storage layer.", 2, + (char *[]) {"pid", "cmdline"}); if (proc->write_bytes == NULL) { flb_plg_error(ctx->ins, "could not initialize write_bytes counter"); goto cmt_counter_error; } proc->cancelled_write_bytes = cmt_counter_create(ctx->cmt, "proc_metrics", "io", - "cancelled_write_bytes", - "The number of bytes which " - "this process caused to not " - "happen, by truncating " - "pagecache.", 2, (char *[]) {"pid", "cmdline"}); + "cancelled_write_bytes", + "The number of bytes which " + "this process caused to not " + "happen, by truncating " + "pagecache.", 2, + (char *[]) {"pid", "cmdline"}); if (proc->cancelled_write_bytes == NULL) { flb_plg_error(ctx->ins, "could not initialize cancelled_write_bytes counter"); goto cmt_counter_error; } proc->size = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "size", - "total program size (pages).", 2, (char *[]) {"pid", "cmdline"}); + "total program size (pages).", 2, + (char *[]) {"pid", "cmdline"}); if (proc->size == NULL) { flb_plg_error(ctx->ins, "could not initialize size gauge"); goto cmt_gauge_error; } proc->resident = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "resident", - "size of memory portions (pages).", 2, (char *[]) {"pid", "cmdline"}); + "size of memory portions (pages).", 2, + (char *[]) {"pid", "cmdline"}); if (proc->resident == NULL) { flb_plg_error(ctx->ins, "could not initialize resident gauge"); goto cmt_gauge_error; } proc->shared = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "shared", - "number of pages that are shared.", 2, (char *[]) {"pid", "cmdline"}); + "number of pages that are shared.", 2, + (char *[]) {"pid", "cmdline"}); if (proc->shared == NULL) { flb_plg_error(ctx->ins, "could not initialize shared gauge"); goto cmt_gauge_error; } proc->trs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "trs", - "number of pages that are ‘code’.", 2, (char *[]) {"pid", "cmdline"}); + "number of pages that are ‘code’.", 2, + (char *[]) {"pid", "cmdline"}); if (proc->trs == NULL) { flb_plg_error(ctx->ins, "could not initialize trs gauge"); goto cmt_gauge_error; } proc->lrs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "lrs", - "number of pages of library.", 2, (char *[]) {"pid", "cmdline"}); + "number of pages of library.", 2, + (char *[]) {"pid", "cmdline"}); if (proc->lrs == NULL) { flb_plg_error(ctx->ins, "could not initialize lrs gauge"); goto cmt_gauge_error; } proc->drs = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "drs", - "number of pages of data/stack.", 2, (char *[]) {"pid", "cmdline"}); + "number of pages of data/stack.", 2, + (char *[]) {"pid", "cmdline"}); if (proc->drs == NULL) { flb_plg_error(ctx->ins, "could not initialize drs gauge"); goto cmt_gauge_error; } proc->dt = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "dt", - "number of dirty pages.", 2, (char *[]) {"pid", "cmdline"}); + "number of dirty pages.", 2, + (char *[]) {"pid", "cmdline"}); if (proc->dt == NULL) { flb_plg_error(ctx->ins, "could not initialize dt gauge"); goto cmt_gauge_error; @@ -596,25 +606,36 @@ static int proc_metrics_collect(struct flb_input_instance *ins, snprintf(pid, sizeof(pid)-1, "%d", metrics->pid); - cmt_counter_set(metrics->rchar, ts, (double)status.io.rchar, 2, (char *[]) {pid, metrics->cmdline}); - cmt_counter_set(metrics->wchar, ts, (double)status.io.wchar, 2, (char *[]) {pid, metrics->cmdline}); - cmt_counter_set(metrics->syscr, ts, (double)status.io.syscr, 2, (char *[]) {pid, metrics->cmdline}); - cmt_counter_set(metrics->syscw, ts, (double)status.io.syscw, 2, (char *[]) {pid, metrics->cmdline}); + cmt_counter_set(metrics->rchar, ts, (double)status.io.rchar, 2, + (char *[]) {pid, metrics->cmdline}); + cmt_counter_set(metrics->wchar, ts, (double)status.io.wchar, 2, + (char *[]) {pid, metrics->cmdline}); + cmt_counter_set(metrics->syscr, ts, (double)status.io.syscr, 2, + (char *[]) {pid, metrics->cmdline}); + cmt_counter_set(metrics->syscw, ts, (double)status.io.syscw, 2, + (char *[]) {pid, metrics->cmdline}); cmt_counter_set(metrics->read_bytes, ts, (double)status.io.read_bytes, 2, (char *[]) {pid, metrics->cmdline}); cmt_counter_set(metrics->write_bytes, ts, (double)status.io.write_bytes, 2, (char *[]) {pid, metrics->cmdline}); cmt_counter_set(metrics->cancelled_write_bytes, ts, - (double)status.io.cancelled_write_bytes, 2, (char *[]) {pid, metrics->cmdline}); - - cmt_gauge_set(metrics->size, ts, (double)status.mem.size, 2, (char *[]) {pid, metrics->cmdline}); - cmt_gauge_set(metrics->resident, ts, (double)status.mem.resident, - 2, (char *[]) {pid, metrics->cmdline}); - cmt_gauge_set(metrics->shared, ts, (double)status.mem.shared, 2, (char *[]) {pid, metrics->cmdline}); - cmt_gauge_set(metrics->trs, ts, (double)status.mem.trs, 2, (char *[]) {pid, metrics->cmdline}); - cmt_gauge_set(metrics->lrs, ts, (double)status.mem.lrs, 2, (char *[]) {pid, metrics->cmdline}); - cmt_gauge_set(metrics->drs, ts, (double)status.mem.drs, 2, (char *[]) {pid, metrics->cmdline}); - cmt_gauge_set(metrics->dt, ts, (double)status.mem.dt, 2, (char *[]) {pid, metrics->cmdline}); + (double)status.io.cancelled_write_bytes, 2, + (char *[]) {pid, metrics->cmdline}); + + cmt_gauge_set(metrics->size, ts, (double)status.mem.size, 2, + (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->resident, ts, (double)status.mem.resident, 2, + (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->shared, ts, (double)status.mem.shared, 2, + (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->trs, ts, (double)status.mem.trs, 2, + (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->lrs, ts, (double)status.mem.lrs, 2, + (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->drs, ts, (double)status.mem.drs, 2, + (char *[]) {pid, metrics->cmdline}); + cmt_gauge_set(metrics->dt, ts, (double)status.mem.dt, 2, + (char *[]) {pid, metrics->cmdline}); flb_plg_debug(ctx->ins, "submit metrics for pid=%d", metrics->pid); } From 03e799d6172d7186dc62346b18d4fd05b622e2ab Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Tue, 21 Sep 2021 16:17:24 -0300 Subject: [PATCH 18/21] in_proc_metrics: use process setting 'all' or '*' to track all processes. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 77 +++++++++++++------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index 460d520d350..f281b3dabcf 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -162,10 +162,11 @@ static int read_stat_file(pid_t pid, const char *file, return 0; } -static pid_t get_pid_from_procname_linux(struct proc_metrics_ctx *ctx, +static struct mk_list *get_proc_entries_from_procname_linux(struct proc_metrics_ctx *ctx, const char* proc) { - pid_t ret = -1; + struct mk_list *pids; + struct proc_entry *entry; glob_t glb; int i; int fd = -1; @@ -176,6 +177,12 @@ static pid_t get_pid_from_procname_linux(struct proc_metrics_ctx *ctx, char cmdname[FLB_CMD_LEN]; char* bname = NULL; + pids = flb_calloc(1, sizeof(struct mk_list)); + if (pids == NULL) { + return NULL; + } + mk_list_init(pids); + ret_glb = glob("/proc/*/cmdline", 0 ,NULL, &glb); if (ret_glb != 0) { switch(ret_glb){ @@ -191,7 +198,7 @@ static pid_t get_pid_from_procname_linux(struct proc_metrics_ctx *ctx, default: flb_plg_warn(ctx->ins, "glob: other error"); } - return ret; + goto glob_error; } for (i = 0; i < glb.gl_pathc; i++) { @@ -209,30 +216,32 @@ static pid_t get_pid_from_procname_linux(struct proc_metrics_ctx *ctx, if (strncmp(proc, bname, FLB_CMD_LEN) == 0) { sscanf(glb.gl_pathv[i],"/proc/%ld/cmdline",&ret_scan); - ret = (pid_t)ret_scan; - close(fd); - break; + entry = flb_calloc(1, sizeof(struct proc_entry)); + if (entry == NULL) { + goto proc_entry_error; + } + entry->pid = (pid_t)ret_scan; + mk_list_add(&entry->_head, pids); } close(fd); } globfree(&glb); - return ret; + return pids; +proc_entry_error: + globfree(&glb); +glob_error: + flb_free(pids); + return NULL; } -static struct mk_list *get_proc_entries_from_procname_linux(struct proc_metrics_ctx *ctx, - const char* proc) +static struct mk_list *get_all_proc_entries(struct proc_metrics_ctx *ctx) { struct mk_list *pids; struct proc_entry *entry; glob_t glb; int i; - int fd = -1; long ret_scan = -1; int ret_glb = -1; - ssize_t count; - - char cmdname[FLB_CMD_LEN]; - char* bname = NULL; pids = flb_calloc(1, sizeof(struct mk_list)); if (pids == NULL) { @@ -259,28 +268,13 @@ static struct mk_list *get_proc_entries_from_procname_linux(struct proc_metrics_ } for (i = 0; i < glb.gl_pathc; i++) { - fd = open(glb.gl_pathv[i], O_RDONLY); - if (fd < 0) { - continue; + sscanf(glb.gl_pathv[i],"/proc/%ld/cmdline",&ret_scan); + entry = flb_calloc(1, sizeof(struct proc_entry)); + if (entry == NULL) { + goto proc_entry_error; } - count = read(fd, &cmdname, FLB_CMD_LEN); - if (count <= 0){ - close(fd); - continue; - } - cmdname[FLB_CMD_LEN-1] = '\0'; - bname = basename(cmdname); - - if (strncmp(proc, bname, FLB_CMD_LEN) == 0) { - sscanf(glb.gl_pathv[i],"/proc/%ld/cmdline",&ret_scan); - entry = flb_calloc(1, sizeof(struct proc_entry)); - if (entry == NULL) { - goto proc_entry_error; - } - entry->pid = (pid_t)ret_scan; - mk_list_add(&entry->_head, pids); - } - close(fd); + entry->pid = (pid_t)ret_scan; + mk_list_add(&entry->_head, pids); } globfree(&glb); return pids; @@ -559,9 +553,16 @@ static int proc_metrics_collect(struct flb_input_instance *ins, struct proc_entry *proc; if (ctx->proc_name != NULL) { - procs = get_proc_entries_from_procname_linux(ctx, ctx->proc_name); - if (procs == NULL) { - return 0; + if (strcmp(ctx->proc_name, "*") == 0 || strcmp(ctx->proc_name, "all") == 0) { + procs = get_all_proc_entries(ctx); + if (procs == NULL) { + return 0; + } + } else { + procs = get_proc_entries_from_procname_linux(ctx, ctx->proc_name); + if (procs == NULL) { + return 0; + } } mk_list_foreach_safe(head, tmp, procs) { proc = mk_list_entry(head, struct proc_entry, _head); From d02715075f3c591d87ae7fdac554615169d6de47 Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Tue, 21 Sep 2021 21:24:32 -0300 Subject: [PATCH 19/21] in_proc_metrics: add support for cpu metrics. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 141 ++++++++++++++++++++++++- plugins/in_proc_metrics/proc_metrics.h | 19 ++++ 2 files changed, 159 insertions(+), 1 deletion(-) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index f281b3dabcf..a0d867def8e 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -122,6 +122,31 @@ static int parse_proc_mem(const char *buf, struct proc_metrics_mem_status *statu return 0; } + +int parse_proc_stat(const char *buf, struct proc_metrics_cpu_status *status) +{ + pid_t pid; + pid_t parent; + pid_t pgroup; + int session; + int tty_nr; + int tty_pgrp; + unsigned int flags; + long unsigned int min_flt; + long unsigned int cmin_flt; + long unsigned int maj_flt; + long unsigned int cmaj_flt; + char exec[32]; + char state; + int ret; + + ret = sscanf(buf, "%d (%[^)]) %c %d %d %d %d %d %u %lu %lu %lu %lu %lu %lu", &pid, exec, &state, + &parent, &pgroup, &session, &tty_nr, &tty_pgrp, &flags, &min_flt, &cmin_flt, + &maj_flt, &cmaj_flt, &status->cpu_user_time, &status->cpu_system_time); + + return 0; +} + /* We specifically *CANNOT* use flb_utils_read_file because * /proc special files tend to report their own size as 0. */ @@ -403,6 +428,23 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct proc_metrics_ctx *ctx, goto cmt_counter_error; } + proc->cpu_user_time = cmt_counter_create(ctx->cmt, "cpu", "time", "user", + "total cpu user time in jiffies", + 2, (char *[]) {"pid", "cmdline"}); + if (proc->cpu_user_time == NULL) { + flb_plg_error(ctx->ins, "could not initialize cpu_user_time counter"); + goto cmt_counter_error; + } + + proc->cpu_system_time = cmt_counter_create(ctx->cmt, "cpu", "time", "system", + "total cpu system time in jiffies", + 2, (char *[]) {"pid", "cmdline"}); + if (proc->cpu_system_time == NULL) { + flb_plg_error(ctx->ins, "could not initialize cpu_system_time counter"); + goto cmt_counter_error; + } + + proc->size = cmt_gauge_create(ctx->cmt, "proc_metrics", "mem", "size", "total program size (pages).", 2, (char *[]) {"pid", "cmdline"}); @@ -459,6 +501,30 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct proc_metrics_ctx *ctx, goto cmt_gauge_error; } + proc->cpu_user_percent = cmt_gauge_create(ctx->cmt, "cpu", "percent", "user", + "total cpu user time percent", + 2, (char *[]) {"pid", "cmdline"}); + if (proc->cpu_user_percent == NULL) { + flb_plg_error(ctx->ins, "could not initialize cpu_user_percent counter"); + goto cmt_counter_error; + } + + proc->cpu_system_percent = cmt_gauge_create(ctx->cmt, "cpu", "percent", "system", + "total cpu system time percent", + 2, (char *[]) {"pid", "cmdline"}); + if (proc->cpu_system_percent == NULL) { + flb_plg_error(ctx->ins, "could not initialize cpu_system_percent counter"); + goto cmt_counter_error; + } + + proc->cpu_percent = cmt_gauge_create(ctx->cmt, "cpu", "percent", "total", + "total cpu total time percent", + 2, (char *[]) {"pid", "cmdline"}); + if (proc->cpu_percent == NULL) { + flb_plg_error(ctx->ins, "could not initialize cpu_percent counter"); + goto cmt_counter_error; + } + return proc; cmt_gauge_error: if (proc->size != NULL) { @@ -482,6 +548,15 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct proc_metrics_ctx *ctx, if (proc->dt != NULL) { cmt_gauge_destroy(proc->dt); } + if (proc->cpu_user_percent != NULL) { + cmt_gauge_destroy(proc->cpu_user_percent); + } + if (proc->cpu_system_percent != NULL) { + cmt_gauge_destroy(proc->cpu_system_percent); + } + if (proc->cpu_percent != NULL) { + cmt_gauge_destroy(proc->cpu_percent); + } cmt_counter_error: if (proc->rchar != NULL) { cmt_counter_destroy(proc->rchar); @@ -504,6 +579,12 @@ static struct proc_metrics_pid_cmt *create_pid_cmt(struct proc_metrics_ctx *ctx, if (proc->cancelled_write_bytes != NULL) { cmt_counter_destroy(proc->cancelled_write_bytes); } + if (proc->cpu_user_time != NULL) { + cmt_counter_destroy(proc->cpu_user_time); + } + if (proc->cpu_system_time != NULL) { + cmt_counter_destroy(proc->cpu_system_time); + } flb_free(proc); return NULL; } @@ -551,6 +632,17 @@ static int proc_metrics_collect(struct flb_input_instance *ins, struct mk_list *tmp; struct mk_list *procs; struct proc_entry *proc; + uint64_t cpu_user_time; + uint64_t cpu_nice_time; + uint64_t cpu_system_time; + uint64_t cpu_idle_time; + double old_cpu_user_time; + double old_cpu_system_time; + double pctime; + + read_file_lines("/proc/stat", buf, sizeof(buf)-1, 1); + sscanf(buf, "%s %ld %ld %ld %ld", pid, &cpu_user_time, &cpu_nice_time, &cpu_system_time, + &cpu_idle_time); if (ctx->proc_name != NULL) { if (strcmp(ctx->proc_name, "*") == 0 || strcmp(ctx->proc_name, "all") == 0) { @@ -577,6 +669,9 @@ static int proc_metrics_collect(struct flb_input_instance *ins, mk_list_foreach_safe(head, tmp, &ctx->procs) { metrics = mk_list_entry(head, struct proc_metrics_pid_cmt, _head); + + snprintf(pid, sizeof(pid)-1, "%d", metrics->pid); + if (read_stat_file(metrics->pid, "io", buf, sizeof(buf)-1, 7) == -1) { if (errno == ENOENT) { mk_list_del(&metrics->_head); @@ -605,7 +700,45 @@ static int proc_metrics_collect(struct flb_input_instance *ins, continue; } - snprintf(pid, sizeof(pid)-1, "%d", metrics->pid); + if (read_stat_file(metrics->pid, "stat", buf, sizeof(buf)-1, 1) == -1) { + if (errno == ENOENT) { + mk_list_del(&metrics->_head); + proc_metrics_free(metrics); + } else { + flb_errno(); + } + continue; + } + + if (parse_proc_stat(buf, &status.cpu) != 0) { + continue; + } + + if (ctx->cpu_user_time + ctx->cpu_nice_time + ctx->cpu_system_time + + ctx->cpu_idle_time > 0) { + cmt_counter_get_val(metrics->cpu_user_time, 2, (char *[]) {pid, metrics->cmdline}, + &old_cpu_user_time); + cmt_counter_get_val(metrics->cpu_system_time, 2, (char *[]) {pid, metrics->cmdline}, + &old_cpu_system_time); + pctime = + (double)( + (status.cpu.cpu_system_time-old_cpu_system_time) + + (status.cpu.cpu_user_time-old_cpu_user_time) + ) / + (double) ( + (cpu_system_time-ctx->cpu_system_time) + + (cpu_user_time-ctx->cpu_user_time) + + (cpu_nice_time-ctx->cpu_nice_time) + ); + + cmt_gauge_set(metrics->cpu_percent, ts, (double)pctime*100, + 2, (char *[]) {pid, metrics->cmdline}); + } + + cmt_counter_set(metrics->cpu_user_time, ts, (double)status.cpu.cpu_user_time, 2, + (char *[]) {pid, metrics->cmdline}); + cmt_counter_set(metrics->cpu_system_time, ts, (double)status.cpu.cpu_system_time, 2, + (char *[]) {pid, metrics->cmdline}); cmt_counter_set(metrics->rchar, ts, (double)status.io.rchar, 2, (char *[]) {pid, metrics->cmdline}); @@ -640,6 +773,12 @@ static int proc_metrics_collect(struct flb_input_instance *ins, flb_plg_debug(ctx->ins, "submit metrics for pid=%d", metrics->pid); } + + ctx->cpu_user_time = cpu_user_time; + ctx->cpu_nice_time = cpu_nice_time; + ctx->cpu_system_time = cpu_system_time; + ctx->cpu_idle_time = cpu_idle_time; + ret = flb_input_metrics_append(ins, NULL, 0, ctx->cmt); if (ret != 0) { flb_plg_error(ins, "could not append metrics"); diff --git a/plugins/in_proc_metrics/proc_metrics.h b/plugins/in_proc_metrics/proc_metrics.h index 20197a03e92..8f4d7f19fc0 100644 --- a/plugins/in_proc_metrics/proc_metrics.h +++ b/plugins/in_proc_metrics/proc_metrics.h @@ -36,6 +36,11 @@ struct proc_metrics_ctx struct flb_input_instance *ins; /* Input plugin instace */ struct mk_list procs; struct cmt *cmt; + + uint64_t cpu_user_time; + uint64_t cpu_nice_time; + uint64_t cpu_system_time; + uint64_t cpu_idle_time; }; #define FLB_CMD_LEN 256 @@ -72,6 +77,13 @@ struct proc_metrics_pid_cmt { struct cmt_gauge *drs; struct cmt_gauge *dt; + struct cmt_counter *cpu_user_time; + struct cmt_counter *cpu_system_time; + + struct cmt_gauge *cpu_user_percent; + struct cmt_gauge *cpu_system_percent; + struct cmt_gauge *cpu_percent; + struct mk_list _head; }; @@ -97,9 +109,16 @@ struct proc_metrics_mem_status uint64_t dt; }; +struct proc_metrics_cpu_status +{ + uint64_t cpu_user_time; + uint64_t cpu_system_time; +}; + struct proc_metrics_status { struct proc_metrics_io_status io; struct proc_metrics_mem_status mem; + struct proc_metrics_cpu_status cpu; }; #endif From 881b336a1b5fb1305f1fa59f35b69a7fd5217b9a Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Fri, 15 Oct 2021 11:08:34 -0300 Subject: [PATCH 20/21] in_proc_metrics: clean out cpu stats when processes finish. Signed-off-by: Phillip Whelan --- plugins/in_proc_metrics/proc_metrics.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/plugins/in_proc_metrics/proc_metrics.c b/plugins/in_proc_metrics/proc_metrics.c index a0d867def8e..4046d46079f 100644 --- a/plugins/in_proc_metrics/proc_metrics.c +++ b/plugins/in_proc_metrics/proc_metrics.c @@ -333,6 +333,8 @@ static void proc_metrics_free(struct proc_metrics_pid_cmt *metrics) cmt_counter_destroy(metrics->read_bytes); cmt_counter_destroy(metrics->write_bytes); cmt_counter_destroy(metrics->cancelled_write_bytes); + cmt_counter_destroy(metrics->cpu_user_time); + cmt_counter_destroy(metrics->cpu_system_time); cmt_gauge_destroy(metrics->size); cmt_gauge_destroy(metrics->resident); @@ -342,6 +344,10 @@ static void proc_metrics_free(struct proc_metrics_pid_cmt *metrics) cmt_gauge_destroy(metrics->drs); cmt_gauge_destroy(metrics->dt); + cmt_gauge_destroy(metrics->cpu_user_percent); + cmt_gauge_destroy(metrics->cpu_system_percent); + cmt_gauge_destroy(metrics->cpu_percent); + flb_free(metrics); } From 3f055e72bec17c985f6609ffbc4db2236f10f53b Mon Sep 17 00:00:00 2001 From: Phillip Whelan Date: Fri, 11 Mar 2022 12:42:05 -0300 Subject: [PATCH 21/21] in_proc: fix possible free before allocation. Signed-off-by: Phillip Whelan --- plugins/in_proc/in_proc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/in_proc/in_proc.c b/plugins/in_proc/in_proc.c index d98fe2931bc..fbd4c413125 100644 --- a/plugins/in_proc/in_proc.c +++ b/plugins/in_proc/in_proc.c @@ -471,7 +471,6 @@ static int in_proc_exit(void *data, struct flb_config *config) } /* Destroy context */ - flb_free(ctx->proc_name); flb_free(ctx); return 0;