3636 * that are children to root array, and in them, search for ID and name (which is also
3737 * an array.
3838 */
39- static int collect_container_data (struct flb_in_metrics * ctx )
39+ static int collect_container_data (struct flb_in_metrics * ctx , int gather_only )
4040{
4141 /* Buffers for reading data from JSON */
4242 char * buffer ;
@@ -57,6 +57,8 @@ static int collect_container_data(struct flb_in_metrics *ctx)
5757 jsmn_parser p ;
5858 jsmntok_t t [JSON_TOKENS ];
5959
60+ struct container_id * cid ;
61+
6062 flb_utils_read_file (ctx -> config , & buffer , & read_bytes );
6163 if (!read_bytes ) {
6264 flb_plg_warn (ctx -> ins , "Failed to open %s" , ctx -> config );
@@ -119,11 +121,26 @@ static int collect_container_data(struct flb_in_metrics *ctx)
119121 image_name [metadata_token_size ] = '\0' ;
120122
121123 flb_plg_trace (ctx -> ins , "Found image name %s" , image_name );
122- add_container_to_list (ctx , id , name , image_name );
124+ if (!gather_only ) {
125+ add_container_to_list (ctx , id , name , image_name );
126+ }
123127 }
124128 else {
125129 flb_plg_warn (ctx -> ins , "Image name was not found for %s" , id );
126- add_container_to_list (ctx , id , name , "unknown" );
130+ if (!gather_only ) {
131+ add_container_to_list (ctx , id , name , "unknown" );
132+ }
133+ }
134+
135+ if (gather_only ) {
136+ cid = flb_malloc (sizeof (struct container_id ));
137+ if (!cid ) {
138+ flb_errno ();
139+ return -1 ;
140+ }
141+ cid -> id = flb_sds_create (id );
142+ mk_list_add (& cid -> _head , & ctx -> ids );
143+ flb_plg_trace (ctx -> ins , "Found id for gather only %s" , cid -> id );
127144 }
128145 collected_containers ++ ;
129146 }
@@ -173,18 +190,55 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
173190 struct container * cnt ;
174191 struct net_iface * iface ;
175192 struct sysfs_path * pth ;
193+ struct container_id * id ;
176194 struct mk_list * head ;
177195 struct mk_list * tmp ;
178196 struct mk_list * inner_head ;
179197 struct mk_list * inner_tmp ;
198+ int can_remove_stale_counters = FLB_FALSE ;
199+ int id_found ;
200+ int collected ;
201+
202+ if (ctx -> remove_stale_counters ) {
203+ collected = collect_container_data (ctx , FLB_TRUE );
204+ if (collected == -1 ) {
205+ flb_plg_error (ctx -> ins , "Could not collect container ids" );
206+ }
207+ else {
208+ can_remove_stale_counters = FLB_TRUE ;
209+ flb_plg_debug (ctx -> ins , "Collected %d for deletion" , collected );
210+ }
211+ }
180212
181213 mk_list_foreach_safe (head , tmp , & ctx -> items ) {
214+ id_found = FLB_FALSE ;
182215 cnt = mk_list_entry (head , struct container , _head );
183216 flb_plg_debug (ctx -> ins , "Destroying container data (id: %s, name: %s" , cnt -> id , cnt -> name );
184217
218+ /* If recreation was already triggered, there is no point in determining it again */
219+ if (can_remove_stale_counters && !ctx -> recreate_cmt ) {
220+ mk_list_foreach_safe (inner_head , inner_tmp , & ctx -> ids ) {
221+ id = mk_list_entry (inner_head , struct container_id , _head );
222+ if (strcmp (cnt -> id , id -> id ) == 0 ) {
223+ id_found = FLB_TRUE ;
224+ break ;
225+ }
226+ }
227+
228+ if (!id_found ) {
229+ flb_plg_info (ctx -> ins , "Counter will be removed because %s is gone" , cnt -> name );
230+ ctx -> recreate_cmt = FLB_TRUE ;
231+ }
232+ else {
233+ flb_plg_debug (ctx -> ins , "No need to remove stale counters" );
234+ }
235+ }
236+
237+
185238 flb_sds_destroy (cnt -> id );
186239 flb_sds_destroy (cnt -> name );
187240 flb_sds_destroy (cnt -> image_name );
241+
188242 mk_list_foreach_safe (inner_head , inner_tmp , & cnt -> net_data ) {
189243 iface = mk_list_entry (inner_head , struct net_iface , _head );
190244 flb_sds_destroy (iface -> name );
@@ -194,6 +248,7 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
194248 mk_list_del (& cnt -> _head );
195249 flb_free (cnt );
196250 }
251+
197252
198253 mk_list_foreach_safe (head , tmp , & ctx -> sysfs_items ) {
199254 pth = mk_list_entry (head , struct sysfs_path , _head );
@@ -202,10 +257,19 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
202257 mk_list_del (& pth -> _head );
203258 flb_free (pth );
204259 }
260+
261+ if (ctx -> remove_stale_counters ) {
262+ mk_list_foreach_safe (head , tmp , & ctx -> ids ) {
263+ id = mk_list_entry (head , struct container_id , _head );
264+ flb_plg_trace (ctx -> ins , "Destroying container id: %s" , id -> id );
265+ flb_sds_destroy (id -> id );
266+ mk_list_del (& id -> _head );
267+ flb_free (id );
268+ }
269+ }
205270 return 0 ;
206271}
207272
208-
209273/*
210274 * Create counter for given metric name, using name, image name and value as counter labels. Counters
211275 * are created per counter name, so they are "shared" between multiple containers - counter
@@ -218,8 +282,8 @@ static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **count
218282{
219283 flb_sds_t * labels ;
220284 uint64_t fvalue = value ;
221-
222285 int label_count ;
286+
223287 if (value == UINT64_MAX ) {
224288 flb_plg_debug (ctx -> ins , "Ignoring invalid counter for %s, %s_%s_%s" , name , COUNTER_PREFIX , metric_prefix , metric_name );
225289 return -1 ;
@@ -246,6 +310,12 @@ static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **count
246310 * counter = cmt_counter_create (ctx -> ins -> cmt , COUNTER_PREFIX , metric_prefix , metric_name , description , label_count , fields );
247311 }
248312
313+ if (ctx -> recreate_cmt ) {
314+ flb_plg_debug (ctx -> ins , "Recreating counter for %s, %s_%s_%s" , name , COUNTER_PREFIX , metric_prefix , metric_name );
315+ cmt_counter_destroy (* counter );
316+ * counter = cmt_counter_create (ctx -> ins -> cmt , COUNTER_PREFIX , metric_prefix , metric_name , description , label_count , fields );
317+ }
318+
249319 /* Allow setting value that is not grater that current one (if, for example, memory usage stays exactly the same) */
250320 cmt_counter_allow_reset (* counter );
251321 flb_plg_debug (ctx -> ins , "Set counter for %s, %s_%s_%s: %lu" , name , COUNTER_PREFIX , metric_prefix , metric_name , fvalue );
@@ -268,20 +338,26 @@ static int create_gauge(struct flb_in_metrics *ctx, struct cmt_gauge **gauge, fl
268338{
269339 flb_sds_t * labels ;
270340 int label_count ;
341+ labels = (char * []){id , name , image_name };
342+ label_count = 3 ;
343+
271344 if (value == UINT64_MAX ) {
272345 flb_plg_debug (ctx -> ins , "Ignoring invalid gauge for %s, %s_%s_%s" , name , COUNTER_PREFIX , metric_prefix , metric_name );
273346 return -1 ;
274347 }
275348
276- labels = (char * []){id , name , image_name };
277- label_count = 3 ;
278-
279349 /* if gauge was not yet created, it means that this function is called for the first time per counter type */
280350 if (* gauge == NULL ) {
281351 flb_plg_debug (ctx -> ins , "Creating gauge for %s, %s_%s_%s" , name , COUNTER_PREFIX , metric_prefix , metric_name );
282352 * gauge = cmt_gauge_create (ctx -> ins -> cmt , COUNTER_PREFIX , metric_prefix , metric_name , description , label_count , fields );
283353 }
284354
355+ if (ctx -> recreate_cmt ) {
356+ flb_plg_debug (ctx -> ins , "Recreating gauge for %s, %s_%s_%s" , name , COUNTER_PREFIX , metric_prefix , metric_name );
357+ cmt_gauge_destroy (* gauge );
358+ * gauge = cmt_gauge_create (ctx -> ins -> cmt , COUNTER_PREFIX , metric_prefix , metric_name , description , label_count , fields );
359+ }
360+
285361 flb_plg_debug (ctx -> ins , "Set gauge for %s, %s_%s_%s: %lu" , name , COUNTER_PREFIX , metric_prefix , metric_name , value );
286362 if (cmt_gauge_set (* gauge , cfl_time_now (), value , label_count , labels ) == -1 ) {
287363 flb_plg_warn (ctx -> ins , "Failed to set gauge for %s, %s_%s_%s" , name , COUNTER_PREFIX , metric_prefix , metric_name );
@@ -340,7 +416,12 @@ static int create_counters(struct flb_in_metrics *ctx)
340416 DESCRIPTION_TX_BYTES , iface -> name , iface -> tx_bytes );
341417 create_counter (ctx , & ctx -> tx_errors , cnt -> id , cnt -> name , cnt -> image_name , COUNTER_NETWORK_PREFIX , FIELDS_METRIC_WITH_IFACE , COUNTER_TX_ERRORS ,
342418 DESCRIPTION_TX_ERRORS , iface -> name , iface -> tx_errors );
419+ /* Stop recreating after first iteration, at this point we cleared all counters/gauges */
420+ ctx -> recreate_cmt = FLB_FALSE ;
343421 }
422+
423+ // Do it again in case of previous loop not looping at all
424+ ctx -> recreate_cmt = FLB_FALSE ;
344425 }
345426 return 0 ;
346427}
@@ -357,7 +438,7 @@ static int scrape_metrics(struct flb_config *config, struct flb_in_metrics *ctx)
357438 return -1 ;
358439 }
359440
360- if (collect_container_data (ctx ) == -1 ) {
441+ if (collect_container_data (ctx , FLB_FALSE ) == -1 ) {
361442 flb_plg_error (ctx -> ins , "Could not collect container ids" );
362443 return -1 ;
363444 }
@@ -429,6 +510,8 @@ static int in_metrics_init(struct flb_input_instance *in, struct flb_config *con
429510 ctx -> tx_bytes = NULL ;
430511 ctx -> tx_errors = NULL ;
431512
513+ ctx -> recreate_cmt = FLB_FALSE ;
514+
432515 if (flb_input_config_map_set (in , (void * ) ctx ) == -1 ) {
433516 flb_free (ctx );
434517 return -1 ;
@@ -462,6 +545,7 @@ static int in_metrics_init(struct flb_input_instance *in, struct flb_config *con
462545
463546 mk_list_init (& ctx -> items );
464547 mk_list_init (& ctx -> sysfs_items );
548+ mk_list_init (& ctx -> ids );
465549
466550 if (ctx -> scrape_interval >= 2 && ctx -> scrape_on_start ) {
467551 flb_plg_info (ctx -> ins , "Generating podman metrics (initial scrape)" );
@@ -490,8 +574,8 @@ static int in_metrics_exit(void *data, struct flb_config *config)
490574 return 0 ;
491575 }
492576
493- flb_sds_destroy (ctx -> config );
494577 destroy_container_list (ctx );
578+ flb_sds_destroy (ctx -> config );
495579 flb_free (ctx );
496580 return 0 ;
497581}
0 commit comments