Skip to content

Commit 44d7d48

Browse files
committed
broker: run shutdown script not cleanup commands
Problem: the cleanup commands pushed into broker memory in rc1 are not easily maintained or extended. Create a new "shutdown" script that lives next to rc1 and rc3. Run this script instead of the "cleanup" commands when the broker CLEANUP state is entered. Upon completion of the shutdown script, the broker transitions to the SHUTDOWN state.
1 parent a0588da commit 44d7d48

File tree

5 files changed

+40
-19
lines changed

5 files changed

+40
-19
lines changed

etc/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ tmpfiles_DATA = flux.conf
1010

1111
dist_fluxconf_SCRIPTS = \
1212
rc1 \
13-
rc3
13+
rc3 \
14+
shutdown
1415

1516
dist_fluxrc1_SCRIPTS = \
1617
rc1.d/02-cron

etc/rc1

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,3 @@ if test $RANK -eq 0 -a "${FLUX_SCHED_MODULE}" != "none" \
109109
-a -z "$(lookup_sched_module)"; then
110110
flux module load ${FLUX_SCHED_MODULE:-sched-simple}
111111
fi
112-
113-
if test $RANK -eq 0; then
114-
if test -z "${FLUX_DISABLE_JOB_CLEANUP}"; then
115-
flux admin cleanup-push <<-EOT
116-
flux queue stop --quiet --all --nocheckpoint
117-
flux cancel --user=all --quiet --states RUN
118-
flux queue idle --quiet
119-
EOT
120-
fi
121-
fi

etc/shutdown

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/sh
2+
3+
flux queue stop --quiet --all --nocheckpoint
4+
flux cancel --user=all --quiet --states RUN
5+
flux queue idle --quiet

src/broker/broker.c

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,11 @@ static void init_attrs_rc_paths (attr_t *attrs)
594594
flux_conf_builtin_get ("rc1_path", FLUX_CONF_AUTO),
595595
0) < 0)
596596
log_err_exit ("attr_add rc1_path");
597-
597+
if (attr_add (attrs,
598+
"broker.shutdown_path",
599+
flux_conf_builtin_get ("shutdown_path", FLUX_CONF_AUTO),
600+
0) < 0)
601+
log_err_exit ("attr_add shutdown_path");
598602
if (attr_add (attrs,
599603
"broker.rc3_path",
600604
flux_conf_builtin_get ("rc3_path", FLUX_CONF_AUTO),
@@ -723,7 +727,7 @@ static int create_runat_rc2 (struct runat *r, const char *argz, size_t argz_len)
723727

724728
static int create_runat_phases (broker_ctx_t *ctx)
725729
{
726-
const char *rc1, *rc3, *local_uri;
730+
const char *rc1, *rc3, *shutdown, *local_uri;
727731
bool rc2_none = false;
728732

729733
if (attr_get (ctx->attrs, "local-uri", &local_uri, NULL) < 0) {
@@ -734,6 +738,10 @@ static int create_runat_phases (broker_ctx_t *ctx)
734738
log_err ("broker.rc1_path is not set");
735739
return -1;
736740
}
741+
if (attr_get (ctx->attrs, "broker.shutdown_path", &shutdown, NULL) < 0) {
742+
log_err ("broker.shutdown_path is not set");
743+
return -1;
744+
}
737745
if (attr_get (ctx->attrs, "broker.rc3_path", &rc3, NULL) < 0) {
738746
log_err ("broker.rc3_path is not set");
739747
return -1;
@@ -768,6 +776,18 @@ static int create_runat_phases (broker_ctx_t *ctx)
768776
}
769777
}
770778

779+
/* shutdown - clean up in preparation for instance shutdown
780+
*/
781+
if (ctx->rank == 0 && shutdown && strlen (shutdown) > 0) {
782+
if (runat_push_shell_command (ctx->runat,
783+
"shutdown",
784+
shutdown,
785+
RUNAT_FLAG_LOG_STDIO) < 0) {
786+
log_err ("runat_push_shell_command shutdown");
787+
return -1;
788+
}
789+
}
790+
771791
/* rc3 - finalization
772792
*/
773793
if (rc3 && strlen (rc3) > 0) {

src/broker/state_machine.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -358,11 +358,16 @@ static void action_run (struct state_machine *s)
358358
#endif
359359
}
360360

361+
/* In the cleanup state, we run the shutdown script. When the shutdown
362+
* script is complete, we enter shutdown state.
363+
*/
361364
static void action_cleanup (struct state_machine *s)
362365
{
363-
if (runat_is_defined (s->ctx->runat, "cleanup")) {
364-
if (runat_start (s->ctx->runat, "cleanup", runat_completion_cb, s) < 0) {
365-
flux_log_error (s->ctx->h, "runat_start cleanup");
366+
if (runat_is_defined (s->ctx->runat, "shutdown")) {
367+
if (runat_start (s->ctx->runat,
368+
"shutdown",
369+
runat_completion_cb, s) < 0) {
370+
flux_log_error (s->ctx->h, "runat_start shutdown");
366371
state_machine_post (s, "cleanup-fail");
367372
}
368373
}
@@ -525,8 +530,8 @@ void state_machine_kill (struct state_machine *s, int signum)
525530
state_machine_post (s, "shutdown");
526531
break;
527532
case STATE_CLEANUP:
528-
if (runat_abort (s->ctx->runat, "cleanup") < 0)
529-
flux_log_error (h, "runat_abort cleanup (signal %d)", signum);
533+
if (runat_abort (s->ctx->runat, "shutdown") < 0)
534+
flux_log_error (h, "runat_abort shutdown (signal %d)", signum);
530535
break;
531536
case STATE_FINALIZE:
532537
(void)runat_abort (s->ctx->runat, "rc3");
@@ -603,7 +608,7 @@ static void runat_completion_cb (struct runat *r, const char *name, void *arg)
603608
s->ctx->exit_rc = rc;
604609
state_machine_post (s, rc == 0 ? "rc2-success" : "rc2-fail");
605610
}
606-
else if (streq (name, "cleanup")) {
611+
else if (streq (name, "shutdown")) {
607612
if (s->ctx->exit_rc == 0 && rc != 0)
608613
s->ctx->exit_rc = rc;
609614
state_machine_post (s, rc == 0 ? "cleanup-success" : "cleanup-fail");

0 commit comments

Comments
 (0)