Skip to content

Commit d0b97d7

Browse files
authored
Merge pull request #2528 from rhc54/cmr20x/signals
Ensure that we always send a SIGTERM prior to SIGKILL to give child processes a chance to cleanup
2 parents ac8c019 + b9420bb commit d0b97d7

File tree

2 files changed

+90
-78
lines changed

2 files changed

+90
-78
lines changed

orte/mca/odls/base/odls_base_default_fns.c

Lines changed: 68 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,21 +1361,22 @@ void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata)
13611361
}
13621362

13631363
typedef struct {
1364+
opal_list_item_t super;
13641365
orte_proc_t *child;
1365-
orte_odls_base_kill_local_fn_t kill_local;
1366-
} odls_kill_caddy_t;
1367-
1368-
static void kill_cbfunc(int fd, short args, void *cbdata)
1366+
} orte_odls_quick_caddy_t;
1367+
static void qcdcon(orte_odls_quick_caddy_t *p)
13691368
{
1370-
odls_kill_caddy_t *cd = (odls_kill_caddy_t*)cbdata;
1371-
1372-
if (!ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_ALIVE) || 0 == cd->child->pid) {
1373-
free(cd);
1374-
return;
1369+
p->child = NULL;
1370+
}
1371+
static void qcddes(orte_odls_quick_caddy_t *p)
1372+
{
1373+
if (NULL != p->child) {
1374+
OBJ_RELEASE(p->child);
13751375
}
1376-
cd->kill_local(cd->child->pid, SIGKILL);
1377-
free(cd);
13781376
}
1377+
OBJ_CLASS_INSTANCE(orte_odls_quick_caddy_t,
1378+
opal_list_item_t,
1379+
qcdcon, qcddes);
13791380

13801381
int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
13811382
orte_odls_base_kill_local_fn_t kill_local,
@@ -1387,6 +1388,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
13871388
int i, j;
13881389
opal_pointer_array_t procarray, *procptr;
13891390
bool do_cleanup;
1391+
orte_odls_quick_caddy_t *cd;
13901392

13911393
OBJ_CONSTRUCT(&procs_killed, opal_list_t);
13921394

@@ -1503,68 +1505,78 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
15031505
*/
15041506
orte_wait_cb_cancel(child);
15051507

1506-
if (!do_cleanup) {
1507-
odls_kill_caddy_t *cd;
1508-
1509-
/* if we are killing only selected procs, then do so in a gentle
1510-
fashion. First send a SIGCONT in case the process is in stopped state.
1511-
If it is in a stopped state and we do not first change it to
1512-
running, then SIGTERM will not get delivered. Ignore return
1513-
value. */
1514-
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1515-
"%s SENDING SIGCONT TO %s",
1516-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1517-
ORTE_NAME_PRINT(&child->name)));
1518-
kill_local(child->pid, SIGCONT);
1508+
/* First send a SIGCONT in case the process is in stopped state.
1509+
If it is in a stopped state and we do not first change it to
1510+
running, then SIGTERM will not get delivered. Ignore return
1511+
value. */
1512+
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1513+
"%s SENDING SIGCONT TO %s",
1514+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1515+
ORTE_NAME_PRINT(&child->name)));
1516+
cd = OBJ_NEW(orte_odls_quick_caddy_t);
1517+
OBJ_RETAIN(child);
1518+
cd->child = child;
1519+
opal_list_append(&procs_killed, &cd->super);
1520+
kill_local(child->pid, SIGCONT);
1521+
continue;
15191522

1520-
/* Send a sigterm to the process before sigkill to be nice */
1521-
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1522-
"%s SENDING SIGTERM TO %s",
1523-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1524-
ORTE_NAME_PRINT(&child->name)));
1525-
kill_local(child->pid, SIGTERM);
1526-
/* provide a polite delay so the proc has a chance to react */
1527-
cd = (odls_kill_caddy_t*)malloc(sizeof(odls_kill_caddy_t));
1528-
OBJ_RETAIN(child); // protect against race conditions
1529-
cd->child = child;
1530-
cd->kill_local = kill_local;
1531-
ORTE_TIMER_EVENT(1, 0, kill_cbfunc, ORTE_SYS_PRI);
1532-
continue;
1523+
CLEANUP:
1524+
/* ensure the child's session directory is cleaned up */
1525+
orte_session_dir_finalize(&child->name);
1526+
/* check for everything complete - this will remove
1527+
* the child object from our local list
1528+
*/
1529+
if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
1530+
ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID)) {
1531+
ORTE_ACTIVATE_PROC_STATE(&child->name, child->state);
15331532
}
1533+
}
1534+
}
15341535

1535-
/* Force the SIGKILL just to make sure things are dead
1536-
* This fixes an issue that, if the application is masking
1537-
* SIGTERM, then the child_died()
1538-
* may return 'true' even though waipid returns with 0.
1539-
* It does this to avoid a race condition, per documentation
1540-
* in odls_default_module.c.
1541-
*/
1536+
/* if we are issuing signals, then we need to wait a little
1537+
* and send the next in sequence */
1538+
if (0 < opal_list_get_size(&procs_killed)) {
1539+
sleep(orte_odls_globals.timeout_before_sigkill);
1540+
/* issue a SIGTERM to all */
1541+
OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) {
15421542
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1543-
"%s SENDING FORCE SIGKILL TO %s pid %lu",
1543+
"%s SENDING SIGTERM TO %s",
15441544
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1545-
ORTE_NAME_PRINT(&child->name), (unsigned long)child->pid));
1546-
kill_local(child->pid, SIGKILL);
1547-
1545+
ORTE_NAME_PRINT(&child->name)));
1546+
kill_local(cd->child->pid, SIGTERM);
1547+
}
1548+
/* wait a little again */
1549+
sleep(orte_odls_globals.timeout_before_sigkill);
1550+
/* issue a SIGKILL to all */
1551+
OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) {
1552+
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1553+
"%s SENDING SIGKILL TO %s",
1554+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1555+
ORTE_NAME_PRINT(&child->name)));
1556+
kill_local(cd->child->pid, SIGKILL);
15481557
/* indicate the waitpid fired as this is effectively what
15491558
* has happened
15501559
*/
1551-
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_WAITPID);
1552-
child->pid = 0;
1560+
ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_WAITPID);
1561+
cd->child->pid = 0;
1562+
1563+
/* mark the child as "killed" */
1564+
cd->child->state = ORTE_PROC_STATE_KILLED_BY_CMD; /* we ordered it to die */
15531565

1554-
CLEANUP:
15551566
/* ensure the child's session directory is cleaned up */
1556-
orte_session_dir_finalize(&child->name);
1567+
orte_session_dir_finalize(&cd->child->name);
15571568
/* check for everything complete - this will remove
15581569
* the child object from our local list
15591570
*/
1560-
if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
1561-
ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID)) {
1562-
ORTE_ACTIVATE_PROC_STATE(&child->name, child->state);
1571+
if (ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
1572+
ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_WAITPID)) {
1573+
ORTE_ACTIVATE_PROC_STATE(&cd->child->name, cd->child->state);
15631574
}
15641575
}
15651576
}
1577+
OPAL_LIST_DESTRUCT(&procs_killed);
15661578

1567-
/* cleanup, if required */
1579+
/* cleanup arrays, if required */
15681580
if (do_cleanup) {
15691581
OBJ_DESTRUCT(&procarray);
15701582
OBJ_DESTRUCT(&proctmp);

orte/mca/odls/default/odls_default_module.c

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Copyright (c) 2010 IBM Corporation. All rights reserved.
1616
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
1717
* reserved.
18-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
18+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
1919
*
2020
* $COPYRIGHT$
2121
*
@@ -194,18 +194,18 @@ static bool odls_default_child_died(orte_proc_t *child)
194194
* that occasionally causes us to incorrectly report a proc
195195
* as refusing to die. Unfortunately, errno may not be reset
196196
* by waitpid in this case, so we cannot check it.
197-
*
198-
* (note the previous fix to this, to return 'process dead'
199-
* here, fixes the race condition at the cost of reporting
200-
* all live processes have immediately died! Better to
201-
* occasionally report a dead process as still living -
202-
* which will occasionally trip the timeout for cases that
203-
* are right on the edge.)
197+
*
198+
* (note the previous fix to this, to return 'process dead'
199+
* here, fixes the race condition at the cost of reporting
200+
* all live processes have immediately died! Better to
201+
* occasionally report a dead process as still living -
202+
* which will occasionally trip the timeout for cases that
203+
* are right on the edge.)
204204
*/
205205
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
206206
"%s odls:default:WAITPID INDICATES PID %d MAY HAVE ALREADY EXITED",
207207
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid)));
208-
/* Do nothing, process still alive */
208+
/* Do nothing, process still alive */
209209
} else if (-1 == ret && ECHILD == errno) {
210210
/* The pid no longer exists, so we'll call this "good
211211
enough for government work" */
@@ -392,12 +392,12 @@ static int do_child(orte_app_context_t* context,
392392
long fd, fdmax = sysconf(_SC_OPEN_MAX);
393393
char *param, *msg;
394394

395-
if (orte_forward_job_control) {
396-
/* Set a new process group for this child, so that a
397-
SIGSTOP can be sent to it without being sent to the
398-
orted. */
399-
setpgid(0, 0);
400-
}
395+
#if HAVE_SETPGID
396+
/* Set a new process group for this child, so that a
397+
SIGSTOP can be sent to it without being sent to the
398+
orted. */
399+
setpgid(0, 0);
400+
#endif
401401

402402
/* Setup the pipe to be close-on-exec */
403403
opal_fd_set_cloexec(write_fd);
@@ -710,7 +710,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
710710
}
711711

712712
if (pid == 0) {
713-
close(p[0]);
713+
close(p[0]);
714714
#if HAVE_SETPGID
715715
setpgid(0, 0);
716716
#endif
@@ -760,11 +760,12 @@ static int send_signal(pid_t pid, int signal)
760760
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
761761
signal, (long)pid));
762762

763-
if (orte_forward_job_control) {
764-
/* Send the signal to the process group rather than the
765-
process. The child is the leader of its process group. */
766-
pid = -pid;
767-
}
763+
#if HAVE_SETPGID
764+
/* Send the signal to the process group rather than the
765+
process. The child is the leader of its process group. */
766+
pid = -pid;
767+
#endif
768+
768769
if (kill(pid, signal) != 0) {
769770
switch(errno) {
770771
case EINVAL:
@@ -811,4 +812,3 @@ static int orte_odls_default_restart_proc(orte_proc_t *child)
811812
}
812813
return rc;
813814
}
814-

0 commit comments

Comments
 (0)