@@ -216,7 +216,6 @@ static void setup_sighandler(int signal, prte_event_t *ev, prte_event_cbfunc_t c
216216static void shutdown_callback (int fd , short flags , void * arg )
217217{
218218 prte_timer_t * tm = (prte_timer_t * ) arg ;
219- prte_job_t * jdata ;
220219 PRTE_HIDE_UNUSED_PARAMS (fd , flags );
221220
222221 if (NULL != tm ) {
@@ -234,8 +233,11 @@ static void shutdown_callback(int fd, short flags, void *arg)
234233 prte_odls .kill_local_procs (NULL );
235234 // mark that we are finalizing so the session directory will cleanup
236235 prte_finalizing = true;
237- jdata = prte_get_job_data_object (PRTE_PROC_MY_NAME -> nspace );
238- PMIX_RELEASE (jdata );
236+ #ifdef PRTE_PMIX_STOP_PRGTHRD
237+ PMIx_Progress_thread_stop (NULL , 0 );
238+ #endif
239+ prte_job_session_dir_finalize (NULL );
240+ PMIx_server_finalize ();
239241 exit (PRTE_ERROR_DEFAULT_EXIT_CODE );
240242}
241243
@@ -1224,12 +1226,13 @@ int prte(int argc, char *argv[])
12241226 * indicating clean termination! Instead, just forcibly cleanup
12251227 * the local session_dir tree and exit
12261228 */
1227- jdata = prte_get_job_data_object (PRTE_PROC_MY_NAME -> nspace );
1228- PMIX_RELEASE (jdata );
1229-
1230- /* return with non-zero status */
1231- ret = PRTE_ERROR_DEFAULT_EXIT_CODE ;
1232- goto DONE ;
1229+ prte_finalizing = true;
1230+ #ifdef PRTE_PMIX_STOP_PRGTHRD
1231+ PMIx_Progress_thread_stop (NULL , 0 );
1232+ #endif
1233+ prte_job_session_dir_finalize (NULL );
1234+ PMIx_server_finalize ();
1235+ exit (PRTE_ERROR_DEFAULT_EXIT_CODE );
12331236 }
12341237 }
12351238 }
@@ -1430,56 +1433,6 @@ int prte(int argc, char *argv[])
14301433 exit (prte_exit_status );
14311434}
14321435
1433- static void clean_abort (int fd , short flags , void * arg )
1434- {
1435- PRTE_HIDE_UNUSED_PARAMS (fd , flags );
1436-
1437- if (keepalive && NULL == arg ) {
1438- // ignore this
1439- return ;
1440- }
1441-
1442- /* if we have already ordered this once, don't keep
1443- * doing it to avoid race conditions
1444- */
1445- if (pmix_mutex_trylock (& prun_abort_inprogress_lock )) { /* returns 1 if already locked */
1446- if (forcibly_die ) {
1447- /* exit with a non-zero status */
1448- exit (1 );
1449- }
1450- fprintf (stderr ,
1451- "%s: abort is already in progress...hit ctrl-c again to forcibly terminate\n\n" ,
1452- prte_tool_basename );
1453- forcibly_die = true;
1454- /* reset the event */
1455- prte_event_add (& term_handler , NULL );
1456- return ;
1457- }
1458-
1459- fflush (stderr );
1460- /* ensure we exit with a non-zero status */
1461- PRTE_UPDATE_EXIT_STATUS (PRTE_ERROR_DEFAULT_EXIT_CODE );
1462- /* ensure that the forwarding of stdin stops */
1463- prte_dvm_abort_ordered = true;
1464- /* tell us to be quiet - hey, the user killed us with a ctrl-c,
1465- * so need to tell them that!
1466- */
1467- prte_execute_quiet = true;
1468- prte_abnormal_term_ordered = true;
1469- /* We are in an event handler; the job completed procedure
1470- will delete the signal handler that is currently running
1471- (which is a Bad Thing), so we can't call it directly.
1472- Instead, we have to exit this handler and setup to call
1473- job_completed() after this. */
1474- prte_plm .terminate_orteds ();
1475- if (NULL != arg ) {
1476- PMIX_RELEASE (arg );
1477- }
1478- }
1479-
1480- static bool first = true;
1481- static bool second = true;
1482-
14831436static void surekill (void )
14841437{
14851438 prte_proc_t * child ;
@@ -1514,6 +1467,47 @@ static void surekill(void)
15141467 }
15151468}
15161469
1470+ static void clean_abort (int fd , short flags , void * arg )
1471+ {
1472+ PRTE_HIDE_UNUSED_PARAMS (fd , flags );
1473+
1474+ if (keepalive && NULL == arg ) {
1475+ // ignore this
1476+ return ;
1477+ }
1478+
1479+ /* if we have already ordered this once, don't keep
1480+ * doing it to avoid race conditions
1481+ */
1482+ if (pmix_mutex_trylock (& prun_abort_inprogress_lock )) { /* returns 1 if already locked */
1483+ if (forcibly_die ) {
1484+ /* exit with a non-zero status */
1485+ exit (1 );
1486+ }
1487+ fprintf (stderr ,
1488+ "%s: abort is already in progress...hit ctrl-c again to forcibly terminate\n\n" ,
1489+ prte_tool_basename );
1490+ forcibly_die = true;
1491+ /* reset the event */
1492+ prte_event_add (& term_handler , NULL );
1493+ return ;
1494+ }
1495+
1496+ fflush (stderr );
1497+ prte_finalizing = true;
1498+ /* ensure we exit with a non-zero status */
1499+ #ifdef PRTE_PMIX_STOP_PRGTHRD
1500+ PMIx_Progress_thread_stop (NULL , 0 );
1501+ #endif
1502+ surekill (); // ensure we attempt to kill everything
1503+ prte_job_session_dir_finalize (NULL );
1504+ PMIx_server_finalize ();
1505+ exit (PRTE_ERROR_DEFAULT_EXIT_CODE );
1506+ }
1507+
1508+ static bool first = true;
1509+ static bool second = true;
1510+
15171511/*
15181512 * Attempt to terminate the job and wait for callback indicating
15191513 * the job has been aborted.
@@ -1541,7 +1535,12 @@ static void abort_signal_callback(int fd)
15411535 second = false;
15421536 } else {
15431537 surekill (); // ensure we attempt to kill everything
1538+ prte_finalizing = true;
1539+ #ifdef PRTE_PMIX_STOP_PRGTHRD
1540+ PMIx_Progress_thread_stop (NULL , 0 );
1541+ #endif
15441542 prte_job_session_dir_finalize (NULL );
1543+ PMIx_server_finalize ();
15451544 exit (1 );
15461545 }
15471546}
0 commit comments