|
45 | 45 | #include "src/common/libfluxutil/method.h" |
46 | 46 | #include "ccan/array_size/array_size.h" |
47 | 47 | #include "ccan/str/str.h" |
| 48 | +#include "ccan/ptrint/ptrint.h" |
48 | 49 |
|
49 | 50 | #include "module.h" |
50 | 51 | #include "brokercfg.h" |
@@ -1308,7 +1309,8 @@ static void broker_destroy_sigwatcher (void *data) |
1308 | 1309 |
|
1309 | 1310 | static int broker_handle_signals (broker_ctx_t *ctx) |
1310 | 1311 | { |
1311 | | - int i, sigs[] = { SIGHUP, SIGINT, SIGQUIT, SIGTERM, SIGALRM }; |
| 1312 | + int i, sigs[] = { SIGHUP, SIGINT, SIGQUIT, SIGTERM, |
| 1313 | + SIGALRM, SIGUSR1, SIGUSR2 }; |
1312 | 1314 | int blocked[] = { SIGPIPE }; |
1313 | 1315 | flux_watcher_t *w; |
1314 | 1316 |
|
@@ -1965,13 +1967,80 @@ static void module_status_cb (module_t *p, int prev_status, void *arg) |
1965 | 1967 | } |
1966 | 1968 | } |
1967 | 1969 |
|
| 1970 | +static bool signal_is_deadly (int signum) |
| 1971 | +{ |
| 1972 | + int deadly_sigs[] = { SIGHUP, SIGINT, SIGQUIT, SIGTERM, SIGALRM }; |
| 1973 | + for (int i = 0; i < ARRAY_SIZE (deadly_sigs); i++) { |
| 1974 | + if (signum == deadly_sigs[i]) |
| 1975 | + return true; |
| 1976 | + } |
| 1977 | + return false; |
| 1978 | +} |
| 1979 | + |
| 1980 | +static void killall_cb (flux_future_t *f, void *arg) |
| 1981 | +{ |
| 1982 | + broker_ctx_t *ctx = arg; |
| 1983 | + int count = 0; |
| 1984 | + if (flux_rpc_get_unpack (f, "{s:i}", "count", &count) < 0) { |
| 1985 | + flux_log_error (ctx->h, |
| 1986 | + "job-manager.killall: %s", |
| 1987 | + future_strerror (f, errno)); |
| 1988 | + } |
| 1989 | + flux_future_destroy (f); |
| 1990 | + if (count) { |
| 1991 | + flux_log (ctx->h, |
| 1992 | + LOG_INFO, |
| 1993 | + "forwarded signal %d to %d jobs", |
| 1994 | + (int) ptr2int (flux_future_aux_get (f, "signal")), |
| 1995 | + count); |
| 1996 | + } |
| 1997 | +} |
| 1998 | + |
| 1999 | +static int killall_jobs (broker_ctx_t *ctx, int signum) |
| 2000 | +{ |
| 2001 | + flux_future_t *f = NULL; |
| 2002 | + if (!(f = flux_rpc_pack (ctx->h, |
| 2003 | + "job-manager.killall", |
| 2004 | + FLUX_NODEID_ANY, |
| 2005 | + 0, |
| 2006 | + "{s:b s:i s:i}", |
| 2007 | + "dry_run", 0, |
| 2008 | + "userid", FLUX_USERID_UNKNOWN, |
| 2009 | + "signum", signum)) |
| 2010 | + || flux_future_then (f, -1., killall_cb, ctx) < 0) { |
| 2011 | + flux_future_destroy (f); |
| 2012 | + return -1; |
| 2013 | + } |
| 2014 | + if (flux_future_aux_set (f, "signum", int2ptr (signum), NULL) < 0) |
| 2015 | + flux_log_error (ctx->h, "killall: future_aux_set"); |
| 2016 | + return 0; |
| 2017 | +} |
| 2018 | + |
1968 | 2019 | static void signal_cb (flux_reactor_t *r, flux_watcher_t *w, |
1969 | | - int revents, void *arg) |
| 2020 | + int revents, void *arg) |
1970 | 2021 | { |
1971 | 2022 | broker_ctx_t *ctx = arg; |
1972 | 2023 | int signum = flux_signal_watcher_get_signum (w); |
1973 | 2024 |
|
1974 | 2025 | flux_log (ctx->h, LOG_INFO, "signal %d", signum); |
| 2026 | + |
| 2027 | + if (ctx->rank == 0 && !signal_is_deadly (signum)) { |
| 2028 | + /* Attempt to forward non-deadly signals to jobs. If that fails, |
| 2029 | + * then fall through to state_machine_kill() so the signal is |
| 2030 | + * delivered somewhere. |
| 2031 | + */ |
| 2032 | + if (killall_jobs (ctx, signum) == 0) |
| 2033 | + return; |
| 2034 | + /* |
| 2035 | + * Note: flux_rpc(3) in the rank 0 broker to the job manager module |
| 2036 | + * is expected to fail immediately if the job-manager module is not |
| 2037 | + * loaded due to the broker internal flux_t handle implementation. |
| 2038 | + */ |
| 2039 | + flux_log (ctx->h, |
| 2040 | + LOG_INFO, |
| 2041 | + "killall failed, delivering signal %d locally instead", |
| 2042 | + signum); |
| 2043 | + } |
1975 | 2044 | state_machine_kill (ctx->state_machine, signum); |
1976 | 2045 | } |
1977 | 2046 |
|
|
0 commit comments