Skip to content

Commit fc1413b

Browse files
authored
Merge pull request #6623 from garlick/issue#6621
log helpful info when shutdown is stuck
2 parents b7141c4 + 576a8ac commit fc1413b

File tree

4 files changed

+123
-12
lines changed

4 files changed

+123
-12
lines changed

src/broker/overlay.c

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,24 @@ int overlay_get_child_peer_count (struct overlay *ov)
372372
return count;
373373
}
374374

375+
struct idset *overlay_get_child_peer_idset (struct overlay *ov)
376+
{
377+
struct idset *ids;
378+
struct child *child;
379+
380+
if (!(ids = idset_create (ov->size, 0)))
381+
return NULL;
382+
foreach_overlay_child (ov, child) {
383+
if (subtree_is_online (child->status))
384+
if (idset_set (ids, child->rank) < 0)
385+
goto error;
386+
}
387+
return ids;
388+
error:
389+
idset_destroy (ids);
390+
return NULL;
391+
}
392+
375393
void overlay_set_ipv6 (struct overlay *ov, int enable)
376394
{
377395
ov->enable_ipv6 = enable;
@@ -945,6 +963,17 @@ static void child_cb (flux_reactor_t *r,
945963
goto done;
946964
}
947965
if (!(child = child_lookup_online (ov, uuid))) {
966+
bool is_hello = false;
967+
json_int_t rank = FLUX_NODEID_ANY;
968+
969+
if (type == FLUX_MSGTYPE_REQUEST
970+
&& flux_msg_get_topic (msg, &topic) == 0
971+
&& streq (topic, "overlay.hello")) {
972+
// extract the hello rank so we can include it in the trace
973+
(void)flux_request_unpack (msg, NULL, "{s:I}", "rank", &rank);
974+
is_hello = true;
975+
}
976+
trace_overlay_msg (ov->h, "rx", rank, ov->trace_requests, msg);
948977
/* This is a new peer trying to introduce itself by sending an
949978
* overlay.hello request.
950979
* N.B. the broker generates a new UUID on startup, and hello is only
@@ -953,15 +982,7 @@ static void child_cb (flux_reactor_t *r,
953982
* we don't bother checking if we've seen this UUID before, which can
954983
* be slow given current design. See flux-framework/flux-core#5864.
955984
*/
956-
if (type == FLUX_MSGTYPE_REQUEST
957-
&& flux_msg_get_topic (msg, &topic) == 0
958-
&& streq (topic, "overlay.hello")
959-
&& !ov->shutdown_in_progress) {
960-
trace_overlay_msg (ov->h,
961-
"rx",
962-
FLUX_NODEID_ANY,
963-
ov->trace_requests,
964-
msg);
985+
if (is_hello && !ov->shutdown_in_progress) {
965986
hello_request_handler (ov, msg);
966987
}
967988
/* Or one of the following cases occurred that requires (or at least

src/broker/overlay.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ uint32_t overlay_get_rank (struct overlay *ov);
9595
void overlay_set_rank (struct overlay *ov, uint32_t rank); // test only
9696
uint32_t overlay_get_size (struct overlay *ov);
9797
int overlay_get_child_peer_count (struct overlay *ov);
98+
struct idset *overlay_get_child_peer_idset (struct overlay *ov);
9899
const char *overlay_get_bind_uri (struct overlay *ov);
99100
const char *overlay_get_parent_uri (struct overlay *ov);
100101
int overlay_set_parent_uri (struct overlay *ov, const char *uri);
@@ -148,6 +149,7 @@ int overlay_connect (struct overlay *ov);
148149
* The following accessors may be useful in the callback:
149150
* - overlay_parent_error() - test whether TBON parent connection has failed
150151
* - overlay_get_child_peer_count() - number of online children
152+
* - overlay_get_child_peer_idset () - set of online children
151153
* - overlay_get_subtree_status (rank) - subtree status of child
152154
* - overlay_get_subtree_topo (rank) - topology of subtree rooted at child
153155
*/

src/broker/state_machine.c

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ struct cleanup {
5454
flux_watcher_t *timer;
5555
};
5656

57+
struct shutdown {
58+
double timeout;
59+
flux_watcher_t *timer;
60+
};
61+
5762
struct monitor {
5863
struct flux_msglist *requests;
5964

@@ -78,6 +83,7 @@ struct state_machine {
7883
struct monitor monitor;
7984
struct quorum quorum;
8085
struct cleanup cleanup;
86+
struct shutdown shutdown;
8187

8288
struct flux_msglist *wait_requests;
8389

@@ -159,6 +165,7 @@ static struct state_next nexttab[] = {
159165
};
160166

161167
static const double default_quorum_timeout = 60; // log slow joiners
168+
static const double default_shutdown_timeout = 60; // log slow shutdown
162169
static const double default_cleanup_timeout = -1;
163170
static const double goodbye_timeout = 60;
164171

@@ -424,17 +431,49 @@ static void action_finalize (struct state_machine *s)
424431
state_machine_post (s, "rc3-none");
425432
}
426433

434+
static void shutdown_timer_cb (flux_reactor_t *r,
435+
flux_watcher_t *w,
436+
int revents,
437+
void *arg)
438+
{
439+
struct state_machine *s = arg;
440+
struct idset *ranks = overlay_get_child_peer_idset (s->ctx->overlay);
441+
char *rankstr = idset_encode (ranks, IDSET_FLAG_RANGE);
442+
char *hoststr = flux_hostmap_lookup (s->ctx->h, rankstr, NULL);
443+
444+
flux_log (s->ctx->h,
445+
LOG_ERR,
446+
"shutdown delayed: waiting for %d peers: %s (rank %s)",
447+
overlay_get_child_peer_count (s->ctx->overlay),
448+
hoststr ? hoststr : "?",
449+
rankstr ? rankstr : "?");
450+
451+
free (hoststr);
452+
free (rankstr);
453+
idset_destroy (ranks);
454+
455+
flux_timer_watcher_reset (w, s->shutdown.timeout, 0.);
456+
flux_watcher_start (w);
457+
}
458+
459+
427460
static void action_shutdown (struct state_machine *s)
428461
{
429-
if (overlay_get_child_peer_count (s->ctx->overlay) == 0)
462+
if (overlay_get_child_peer_count (s->ctx->overlay) == 0) {
430463
state_machine_post (s, "children-none");
464+
return;
465+
}
431466
#if HAVE_LIBSYSTEMD
432467
if (s->ctx->sd_notify) {
433468
sd_notifyf (0,
434469
"STATUS=Waiting for %d peers to shutdown",
435470
overlay_get_child_peer_count (s->ctx->overlay));
436471
}
437472
#endif
473+
if (s->shutdown.timeout >= 0) {
474+
flux_timer_watcher_reset (s->shutdown.timer, s->shutdown.timeout, 0.);
475+
flux_watcher_start (s->shutdown.timer);
476+
}
438477
}
439478

440479
static void goodbye_continuation (flux_future_t *f, void *arg)
@@ -1144,6 +1183,7 @@ static void overlay_monitor_cb (struct overlay *overlay,
11441183
void *arg)
11451184
{
11461185
struct state_machine *s = arg;
1186+
int count;
11471187

11481188
switch (s->state) {
11491189
/* IN JOIN state, post parent-fail if something goes wrong with the
@@ -1166,8 +1206,21 @@ static void overlay_monitor_cb (struct overlay *overlay,
11661206
* node) the exit event is posted immediately in action_shutdown().
11671207
*/
11681208
case STATE_SHUTDOWN:
1169-
if (overlay_get_child_peer_count (overlay) == 0)
1209+
count = overlay_get_child_peer_count (overlay);
1210+
if (count == 0) {
11701211
state_machine_post (s, "children-complete");
1212+
flux_watcher_stop (s->shutdown.timer);
1213+
}
1214+
#if HAVE_LIBSYSTEMD
1215+
else {
1216+
if (s->ctx->sd_notify) {
1217+
sd_notifyf (0,
1218+
"STATUS=Waiting for %d peer%s to shutdown",
1219+
count,
1220+
count > 1 ? "s" : "");
1221+
}
1222+
}
1223+
#endif
11711224
break;
11721225
default:
11731226
break;
@@ -1255,6 +1308,7 @@ void state_machine_destroy (struct state_machine *s)
12551308
flux_watcher_destroy (s->quorum.timer);
12561309
flux_future_destroy (s->quorum.f);
12571310
flux_watcher_destroy (s->cleanup.timer);
1311+
flux_watcher_destroy (s->shutdown.timer);
12581312
free (s);
12591313
errno = saved_errno;
12601314
}
@@ -1289,7 +1343,12 @@ struct state_machine *state_machine_create (struct broker *ctx)
12891343
0.,
12901344
0.,
12911345
cleanup_timer_cb,
1292-
s)))
1346+
s))
1347+
|| !(s->shutdown.timer = flux_timer_watcher_create (r,
1348+
0.,
1349+
0.,
1350+
shutdown_timer_cb,
1351+
s)))
12931352
goto error;
12941353
flux_watcher_start (s->prep);
12951354
flux_watcher_start (s->check);
@@ -1320,6 +1379,13 @@ struct state_machine *state_machine_create (struct broker *ctx)
13201379
log_err ("error configuring cleanup timeout attribute");
13211380
goto error;
13221381
}
1382+
if (timeout_configure (s,
1383+
"broker.shutdown-timeout",
1384+
&s->shutdown.timeout,
1385+
default_shutdown_timeout) < 0) {
1386+
log_err ("error configuring shutdown timeout attribute");
1387+
goto error;
1388+
}
13231389
norestart_configure (s);
13241390
overlay_set_monitor_cb (ctx->overlay, overlay_monitor_cb, s);
13251391
if (s->ctx->rank == 0) {

t/t0025-broker-state-machine.t

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,4 +334,26 @@ test_expect_success 'cleanup gets SIGHUP after broker.cleanup-timeout expires' '
334334
./killbroker 15 60
335335
'
336336

337+
test_expect_success 'create hanging rc3 for rank > 0' '
338+
cat <<-EOT >rc3_hang &&
339+
#!/bin/sh
340+
rank=\$(flux getattr rank)
341+
test \$rank -eq 0 && exit 0
342+
sleep 5
343+
EOT
344+
chmod +x rc3_hang
345+
'
346+
347+
test_expect_success 'run instance with short shutdown timeout' '
348+
flux start -s3 \
349+
-Slog-filename=shutdown.log \
350+
-Sbroker.rc1_path= \
351+
-Sbroker.rc3_path="$(pwd)/rc3_hang" \
352+
-Sbroker.shutdown-timeout=1s \
353+
true
354+
'
355+
test_expect_success 'appropriate message was logged' '
356+
grep "shutdown delayed" shutdown.log
357+
'
358+
337359
test_done

0 commit comments

Comments
 (0)