Skip to content

Commit 88313de

Browse files
author
Ralph Castain
committed
Per discussion on email thread, restore placement of child procs in their own process group so that any signal sent to one of our children is automatically propagated to any child process they might have spawned.
Signed-off-by: Ralph Castain <[email protected]>
1 parent b2e36f0 commit 88313de

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

orte/mca/odls/base/odls_base_default_fns.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1466,7 +1466,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
14661466
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(procptr, i))) {
14671467
continue;
14681468
}
1469-
for(j=0; j < orte_local_children->size; j++) {
1469+
for (j=0; j < orte_local_children->size; j++) {
14701470
if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, j))) {
14711471
continue;
14721472
}

orte/mca/odls/default/odls_default_module.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,22 @@ orte_odls_base_module_t orte_odls_default_module = {
167167
/* deliver a signal to a specified pid. */
168168
static int odls_default_kill_local(pid_t pid, int signum)
169169
{
170+
pid_t pgrp;
171+
172+
#if HAVE_SETPGID
173+
pgrp = getpgid(pid);
174+
if (-1 != pgrp) {
175+
/* target the lead process of the process
176+
* group so we ensure that the signal is
177+
* seen by all members of that group. This
178+
* ensures that the signal is seen by any
179+
* child processes our child may have
180+
* started
181+
*/
182+
pid = pgrp;
183+
}
184+
#endif
185+
170186
if (0 != kill(pid, signum)) {
171187
if (ESRCH != errno) {
172188
OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output,
@@ -313,6 +329,10 @@ static int do_child(orte_app_context_t* context,
313329
long fd, fdmax = sysconf(_SC_OPEN_MAX);
314330
char *param, *msg;
315331

332+
/* Set a new process group for this child, so that any
333+
* signals we send to it will reach any children it spawns */
334+
setpgid(0, 0);
335+
316336
/* Setup the pipe to be close-on-exec */
317337
opal_fd_set_cloexec(write_fd);
318338

@@ -717,4 +737,3 @@ static int orte_odls_default_restart_proc(orte_proc_t *child)
717737
}
718738
return rc;
719739
}
720-

0 commit comments

Comments
 (0)