Skip to content

Commit 3cdbe9e

Browse files
committed
in_podman_metrics: Added remove_stale_counters opt
For environments with containers being created and removed often, it might be good to specify an option to remove counters for removed containers. This option is default to false, since it increases resource consumption of plugin Signed-off-by: Paweł Cendrzak <[email protected]>
1 parent 30eb89e commit 3cdbe9e

File tree

4 files changed

+111
-11
lines changed

4 files changed

+111
-11
lines changed

plugins/in_podman_metrics/podman_metrics.c

Lines changed: 94 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
* that are children to root array, and in them, search for ID and name (which is also
3737
* an array.
3838
*/
39-
static int collect_container_data(struct flb_in_metrics *ctx)
39+
static int collect_container_data(struct flb_in_metrics *ctx, int gather_only)
4040
{
4141
/* Buffers for reading data from JSON */
4242
char *buffer;
@@ -57,6 +57,8 @@ static int collect_container_data(struct flb_in_metrics *ctx)
5757
jsmn_parser p;
5858
jsmntok_t t[JSON_TOKENS];
5959

60+
struct container_id *cid;
61+
6062
flb_utils_read_file(ctx->config, &buffer, &read_bytes);
6163
if (!read_bytes) {
6264
flb_plg_warn(ctx->ins, "Failed to open %s", ctx->config);
@@ -119,11 +121,26 @@ static int collect_container_data(struct flb_in_metrics *ctx)
119121
image_name[metadata_token_size] = '\0';
120122

121123
flb_plg_trace(ctx->ins, "Found image name %s", image_name);
122-
add_container_to_list(ctx, id, name, image_name);
124+
if (!gather_only) {
125+
add_container_to_list(ctx, id, name, image_name);
126+
}
123127
}
124128
else {
125129
flb_plg_warn(ctx->ins, "Image name was not found for %s", id);
126-
add_container_to_list(ctx, id, name, "unknown");
130+
if (!gather_only) {
131+
add_container_to_list(ctx, id, name, "unknown");
132+
}
133+
}
134+
135+
if (gather_only) {
136+
cid = flb_malloc(sizeof(struct container_id));
137+
if (!cid) {
138+
flb_errno();
139+
return -1;
140+
}
141+
cid->id = flb_sds_create(id);
142+
mk_list_add(&cid->_head, &ctx->ids);
143+
flb_plg_trace(ctx->ins, "Found id for gather only %s", cid->id);
127144
}
128145
collected_containers++;
129146
}
@@ -173,18 +190,55 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
173190
struct container *cnt;
174191
struct net_iface *iface;
175192
struct sysfs_path *pth;
193+
struct container_id *id;
176194
struct mk_list *head;
177195
struct mk_list *tmp;
178196
struct mk_list *inner_head;
179197
struct mk_list *inner_tmp;
198+
int can_remove_stale_counters = FLB_FALSE;
199+
int id_found;
200+
int collected;
201+
202+
if (ctx->remove_stale_counters) {
203+
collected = collect_container_data(ctx, FLB_TRUE);
204+
if (collected == -1) {
205+
flb_plg_error(ctx->ins, "Could not collect container ids");
206+
}
207+
else {
208+
can_remove_stale_counters = FLB_TRUE;
209+
flb_plg_debug(ctx->ins, "Collected %d for deletion", collected);
210+
}
211+
}
180212

181213
mk_list_foreach_safe(head, tmp, &ctx->items) {
214+
id_found = FLB_FALSE;
182215
cnt = mk_list_entry(head, struct container, _head);
183216
flb_plg_debug(ctx->ins, "Destroying container data (id: %s, name: %s", cnt->id, cnt->name);
184217

218+
/* If recreation was already triggered, there is no point in determining it again */
219+
if (can_remove_stale_counters && !ctx->recreate_cmt) {
220+
mk_list_foreach_safe(inner_head, inner_tmp, &ctx->ids) {
221+
id = mk_list_entry(inner_head, struct container_id, _head);
222+
if (strcmp(cnt->id, id->id) == 0) {
223+
id_found = FLB_TRUE;
224+
break;
225+
}
226+
}
227+
228+
if (!id_found) {
229+
flb_plg_info(ctx->ins, "Counter will be removed because %s is gone", cnt->name);
230+
ctx->recreate_cmt = FLB_TRUE;
231+
}
232+
else {
233+
flb_plg_debug(ctx->ins, "No need to remove stale counters");
234+
}
235+
}
236+
237+
185238
flb_sds_destroy(cnt->id);
186239
flb_sds_destroy(cnt->name);
187240
flb_sds_destroy(cnt->image_name);
241+
188242
mk_list_foreach_safe(inner_head, inner_tmp, &cnt->net_data) {
189243
iface = mk_list_entry(inner_head, struct net_iface, _head);
190244
flb_sds_destroy(iface->name);
@@ -194,6 +248,7 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
194248
mk_list_del(&cnt->_head);
195249
flb_free(cnt);
196250
}
251+
197252

198253
mk_list_foreach_safe(head, tmp, &ctx->sysfs_items) {
199254
pth = mk_list_entry(head, struct sysfs_path, _head);
@@ -202,10 +257,19 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
202257
mk_list_del(&pth->_head);
203258
flb_free(pth);
204259
}
260+
261+
if (ctx->remove_stale_counters) {
262+
mk_list_foreach_safe(head, tmp, &ctx->ids) {
263+
id = mk_list_entry(head, struct container_id, _head);
264+
flb_plg_trace(ctx->ins, "Destroying container id: %s", id->id);
265+
flb_sds_destroy(id->id);
266+
mk_list_del(&id->_head);
267+
flb_free(id);
268+
}
269+
}
205270
return 0;
206271
}
207272

208-
209273
/*
210274
* Create counter for given metric name, using name, image name and value as counter labels. Counters
211275
* are created per counter name, so they are "shared" between multiple containers - counter
@@ -218,8 +282,8 @@ static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **count
218282
{
219283
flb_sds_t *labels;
220284
uint64_t fvalue = value;
221-
222285
int label_count;
286+
223287
if (value == UINT64_MAX) {
224288
flb_plg_debug(ctx->ins, "Ignoring invalid counter for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
225289
return -1;
@@ -246,6 +310,12 @@ static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **count
246310
*counter = cmt_counter_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
247311
}
248312

313+
if (ctx->recreate_cmt) {
314+
flb_plg_debug(ctx->ins, "Recreating counter for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
315+
cmt_counter_destroy(*counter);
316+
*counter = cmt_counter_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
317+
}
318+
249319
/* Allow setting value that is not grater that current one (if, for example, memory usage stays exactly the same) */
250320
cmt_counter_allow_reset(*counter);
251321
flb_plg_debug(ctx->ins, "Set counter for %s, %s_%s_%s: %lu", name, COUNTER_PREFIX, metric_prefix, metric_name, fvalue);
@@ -268,20 +338,26 @@ static int create_gauge(struct flb_in_metrics *ctx, struct cmt_gauge **gauge, fl
268338
{
269339
flb_sds_t *labels;
270340
int label_count;
341+
labels = (char *[]){id, name, image_name};
342+
label_count = 3;
343+
271344
if (value == UINT64_MAX) {
272345
flb_plg_debug(ctx->ins, "Ignoring invalid gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
273346
return -1;
274347
}
275348

276-
labels = (char *[]){id, name, image_name};
277-
label_count = 3;
278-
279349
/* if gauge was not yet created, it means that this function is called for the first time per counter type */
280350
if (*gauge == NULL) {
281351
flb_plg_debug(ctx->ins, "Creating gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
282352
*gauge = cmt_gauge_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
283353
}
284354

355+
if (ctx->recreate_cmt) {
356+
flb_plg_debug(ctx->ins, "Recreating gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
357+
cmt_gauge_destroy(*gauge);
358+
*gauge = cmt_gauge_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
359+
}
360+
285361
flb_plg_debug(ctx->ins, "Set gauge for %s, %s_%s_%s: %lu", name, COUNTER_PREFIX, metric_prefix, metric_name, value);
286362
if (cmt_gauge_set(*gauge, cfl_time_now(), value, label_count, labels) == -1) {
287363
flb_plg_warn(ctx->ins, "Failed to set gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
@@ -340,7 +416,12 @@ static int create_counters(struct flb_in_metrics *ctx)
340416
DESCRIPTION_TX_BYTES, iface->name, iface->tx_bytes);
341417
create_counter(ctx, &ctx->tx_errors, cnt->id, cnt->name, cnt->image_name, COUNTER_NETWORK_PREFIX, FIELDS_METRIC_WITH_IFACE, COUNTER_TX_ERRORS,
342418
DESCRIPTION_TX_ERRORS, iface->name, iface->tx_errors);
419+
/* Stop recreating after first iteration, at this point we cleared all counters/gauges */
420+
ctx->recreate_cmt = FLB_FALSE;
343421
}
422+
423+
// Do it again in case of previous loop not looping at all
424+
ctx->recreate_cmt = FLB_FALSE;
344425
}
345426
return 0;
346427
}
@@ -357,7 +438,7 @@ static int scrape_metrics(struct flb_config *config, struct flb_in_metrics *ctx)
357438
return -1;
358439
}
359440

360-
if (collect_container_data(ctx) == -1) {
441+
if (collect_container_data(ctx, FLB_FALSE) == -1) {
361442
flb_plg_error(ctx->ins, "Could not collect container ids");
362443
return -1;
363444
}
@@ -429,6 +510,8 @@ static int in_metrics_init(struct flb_input_instance *in, struct flb_config *con
429510
ctx->tx_bytes = NULL;
430511
ctx->tx_errors = NULL;
431512

513+
ctx->recreate_cmt = FLB_FALSE;
514+
432515
if (flb_input_config_map_set(in, (void *) ctx) == -1) {
433516
flb_free(ctx);
434517
return -1;
@@ -462,6 +545,7 @@ static int in_metrics_init(struct flb_input_instance *in, struct flb_config *con
462545

463546
mk_list_init(&ctx->items);
464547
mk_list_init(&ctx->sysfs_items);
548+
mk_list_init(&ctx->ids);
465549

466550
if (ctx->scrape_interval >= 2 && ctx->scrape_on_start) {
467551
flb_plg_info(ctx->ins, "Generating podman metrics (initial scrape)");
@@ -490,8 +574,8 @@ static int in_metrics_exit(void *data, struct flb_config *config)
490574
return 0;
491575
}
492576

493-
flb_sds_destroy(ctx->config);
494577
destroy_container_list(ctx);
578+
flb_sds_destroy(ctx->config);
495579
flb_free(ctx);
496580
return 0;
497581
}

plugins/in_podman_metrics/podman_metrics.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
#include "podman_metrics_config.h"
3232

33-
static int collect_container_data(struct flb_in_metrics *ctx);
33+
static int collect_container_data(struct flb_in_metrics *ctx, int gather_only);
3434
static int add_container_to_list(struct flb_in_metrics *ctx, flb_sds_t id, flb_sds_t name, flb_sds_t image_name);
3535
static int destroy_container_list(struct flb_in_metrics *ctx);
3636

@@ -78,6 +78,11 @@ static struct flb_config_map config_map[] = {
7878
0, FLB_TRUE, offsetof(struct flb_in_metrics, procfs_path),
7979
"Path to proc subsystem directory"
8080
},
81+
{
82+
FLB_CONFIG_MAP_BOOL, "remove_stale_counters", "false",
83+
0, FLB_TRUE, offsetof(struct flb_in_metrics, remove_stale_counters),
84+
"Remove counters for removed containers"
85+
},
8186

8287
/* EOF */
8388
{0}

plugins/in_podman_metrics/podman_metrics_config.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,18 +169,28 @@ struct sysfs_path {
169169
struct mk_list _head;
170170
};
171171

172+
struct container_id {
173+
flb_sds_t id;
174+
struct mk_list _head;
175+
};
176+
172177
struct flb_in_metrics {
173178
/* config map options */
174179
int scrape_on_start;
175180
int scrape_interval;
176181
flb_sds_t podman_config_path;
182+
int remove_stale_counters;
183+
int recreate_cmt;
177184

178185
/* container list */
179186
struct mk_list items;
180187

181188
/* sysfs path list */
182189
struct mk_list sysfs_items;
183190

191+
/* container id list */
192+
struct mk_list ids;
193+
184194
/* counters */
185195
struct cmt_counter *c_memory_usage;
186196
struct cmt_counter *c_memory_max_usage;

tests/runtime/in_podman_metrics.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ void flb_test_ipm_regular() {
9696
"scrape_on_start", "true",
9797
"path.sysfs", DPATH_PODMAN_REGULAR,
9898
"path.procfs", DPATH_PODMAN_REGULAR,
99+
"remove_stale_counters", "true",
99100
NULL);
100101
TEST_CHECK(flb_start(ctx) == 0);
101102
sleep(1);

0 commit comments

Comments
 (0)