@@ -1767,7 +1767,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
17671767 orte_proc_t * child ;
17681768 opal_list_t procs_killed ;
17691769 orte_proc_t * proc , proctmp ;
1770- int i , j ;
1770+ int i , j , ret ;
17711771 opal_pointer_array_t procarray , * procptr ;
17721772 bool do_cleanup ;
17731773 orte_odls_quick_caddy_t * cd ;
@@ -1913,7 +1913,17 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
19131913 /* if we are issuing signals, then we need to wait a little
19141914 * and send the next in sequence */
19151915 if (0 < opal_list_get_size (& procs_killed )) {
1916- sleep (orte_odls_globals .timeout_before_sigkill );
1916+ /* Wait a little. Do so in a loop since sleep() can be interrupted by a
1917+ * signal. Most likely SIGCHLD in this case */
1918+ ret = orte_odls_globals .timeout_before_sigkill ;
1919+ while ( ret > 0 ) {
1920+ OPAL_OUTPUT_VERBOSE ((5 , orte_odls_base_framework .framework_output ,
1921+ "%s Sleep %d sec (total = %d)" ,
1922+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
1923+ ret , orte_odls_globals .timeout_before_sigkill ));
1924+ ret = sleep (ret );
1925+ }
1926+
19171927 /* issue a SIGTERM to all */
19181928 OPAL_LIST_FOREACH (cd , & procs_killed , orte_odls_quick_caddy_t ) {
19191929 OPAL_OUTPUT_VERBOSE ((5 , orte_odls_base_framework .framework_output ,
@@ -1922,8 +1932,18 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
19221932 ORTE_NAME_PRINT (& cd -> child -> name )));
19231933 kill_local (cd -> child -> pid , SIGTERM );
19241934 }
1925- /* wait a little again */
1926- sleep (orte_odls_globals .timeout_before_sigkill );
1935+
1936+ /* Wait a little. Do so in a loop since sleep() can be interrupted by a
1937+ * signal. Most likely SIGCHLD in this case */
1938+ ret = orte_odls_globals .timeout_before_sigkill ;
1939+ while ( ret > 0 ) {
1940+ OPAL_OUTPUT_VERBOSE ((5 , orte_odls_base_framework .framework_output ,
1941+ "%s Sleep %d sec (total = %d)" ,
1942+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
1943+ ret , orte_odls_globals .timeout_before_sigkill ));
1944+ ret = sleep (ret );
1945+ }
1946+
19271947 /* issue a SIGKILL to all */
19281948 OPAL_LIST_FOREACH (cd , & procs_killed , orte_odls_quick_caddy_t ) {
19291949 OPAL_OUTPUT_VERBOSE ((5 , orte_odls_base_framework .framework_output ,
0 commit comments