Skip to content

Commit 2b16b24

Browse files
stokscedsiper
authored andcommitted
reload: fix race between watchdog start and setting async cancellation
Signed-off-by: Bradley Laney <[email protected]>
1 parent 0def4e6 commit 2b16b24

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

src/flb_reload.c

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -383,21 +383,28 @@ static int flb_reload_reinstantiate_external_plugins(struct flb_config *src, str
383383
struct flb_reload_watchdog_ctx {
384384
pthread_t tid;
385385
int timeout_seconds;
386+
volatile int should_stop;
386387
};
387388

388389
static void *hot_reload_watchdog_thread(void *arg)
389390
{
390-
int loop_sleep;
391+
int elapsed_ms = 0;
392+
int timeout_ms;
391393
struct flb_reload_watchdog_ctx *ctx = (struct flb_reload_watchdog_ctx *)arg;
392-
393-
/* Set async cancellation type for (mostly) immediate response to pthread_cancel */
394-
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
395394

396-
/* loop for each sleep in a busy pattern to avoid delaying flb_reload() */
397-
for (loop_sleep = 0; loop_sleep < ctx->timeout_seconds*10; loop_sleep++) {
398-
flb_time_msleep(100);
395+
timeout_ms = ctx->timeout_seconds * 1000;
396+
397+
/* Check should_stop flag every 100ms while tracking elapsed time */
398+
while (elapsed_ms < timeout_ms) {
399+
if (ctx->should_stop) {
400+
/* Clean shutdown requested */
401+
return NULL;
402+
}
403+
flb_time_msleep(100);
404+
elapsed_ms += 100;
399405
}
400406

407+
/* Only abort if we timed out, not if cleanly signaled to stop */
401408
flb_error("[hot_reload_watchdog] Hot reload timeout exceeded (%d seconds), "
402409
"aborting to prevent indefinite hang", ctx->timeout_seconds);
403410
abort();
@@ -419,14 +426,15 @@ static struct flb_reload_watchdog_ctx *flb_reload_watchdog_start(struct flb_conf
419426
return NULL;
420427
}
421428
watchdog_ctx->timeout_seconds = config->hot_reload_watchdog_timeout_seconds;
429+
watchdog_ctx->should_stop = 0;
422430

423431
ret = pthread_create(&watchdog_ctx->tid, NULL, hot_reload_watchdog_thread, watchdog_ctx);
424432
if (ret != 0) {
425433
flb_error("[reload] Failed to create hot reload watchdog thread: %d", ret);
426434
flb_free(watchdog_ctx);
427435
return NULL;
428436
}
429-
437+
430438
flb_debug("[reload] Hot reload watchdog thread started");
431439
return watchdog_ctx;
432440
}
@@ -437,9 +445,12 @@ static void flb_reload_watchdog_cleanup(struct flb_reload_watchdog_ctx *watchdog
437445
return;
438446
}
439447

440-
pthread_cancel(watchdog_ctx->tid);
448+
/* Signal thread to stop cooperatively */
449+
watchdog_ctx->should_stop = 1;
450+
451+
/* Wait for graceful thread exit */
441452
pthread_join(watchdog_ctx->tid, NULL);
442-
flb_debug("[reload] Hot reload watchdog thread cancelled");
453+
flb_debug("[reload] Hot reload watchdog thread stopped");
443454

444455
flb_free(watchdog_ctx);
445456
}

0 commit comments

Comments
 (0)