Skip to content

Commit 6e6ac23

Browse files
authored
Merge pull request #3729 from rhc54/cmr30/cleanup
Ensure we properly cleanup on termination, including when terminating due to ctrl-c
2 parents ac51814 + ce7a183 commit 6e6ac23

File tree

9 files changed

+46
-47
lines changed

9 files changed

+46
-47
lines changed

opal/mca/pmix/pmix2x/pmix2x.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,8 +1256,10 @@ static void pmix2x_query(opal_list_t *queries,
12561256
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
12571257
if (0 >= opal_pmix_base.initialized) {
12581258
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
1259-
rc = OPAL_ERR_NOT_INITIALIZED;
1260-
goto CLEANUP;
1259+
if (NULL != cbfunc) {
1260+
cbfunc(OPAL_ERR_NOT_INITIALIZED, NULL, cbdata, NULL, NULL);
1261+
}
1262+
return;
12611263
}
12621264
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
12631265

@@ -1323,8 +1325,10 @@ static void pmix2x_log(opal_list_t *info,
13231325
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
13241326
if (0 >= opal_pmix_base.initialized) {
13251327
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
1326-
rc = OPAL_ERR_NOT_INITIALIZED;
1327-
goto CLEANUP;
1328+
if (NULL != cbfunc) {
1329+
cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata);
1330+
}
1331+
return;
13281332
}
13291333
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
13301334

opal/mca/pmix/pmix2x/pmix2x_client.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ int pmix2x_fence(opal_list_t *procs, int collect_data)
312312
pmix_status_t rc;
313313
opal_namelist_t *ptr;
314314
char *nsptr;
315-
size_t cnt, n;
315+
size_t cnt = 0, n;
316316
pmix_proc_t *parray = NULL;
317317
pmix_info_t info, *iptr;
318318

@@ -729,7 +729,7 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info)
729729
pmix_pdata_t *pdata;
730730
pmix_info_t *pinfo = NULL;
731731
pmix_status_t rc;
732-
size_t cnt, n, sz;
732+
size_t cnt, n, sz = 0;
733733
opal_value_t *iptr;
734734
opal_pmix2x_jobid_trkr_t *jptr, *job;
735735
int ret;
@@ -1000,7 +1000,7 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid)
10001000
pmix_status_t rc;
10011001
pmix_info_t *info = NULL;
10021002
pmix_app_t *papps;
1003-
size_t ninfo, napps, n, m;
1003+
size_t ninfo = 0, napps, n, m;
10041004
opal_value_t *ival;
10051005
opal_pmix_app_t *app;
10061006
char nspace[PMIX_MAX_NSLEN+1];

orte/mca/ess/base/ess_base_std_app.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,8 @@ int orte_ess_base_app_finalize(void)
342342
(void) mca_base_framework_close(&orte_state_base_framework);
343343

344344
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
345+
/* cleanup the process info */
346+
orte_proc_info_finalize();
345347

346348
return ORTE_SUCCESS;
347349
}

orte/orted/orted_main.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,10 @@ int orte_daemon(int argc, char *argv[])
933933
orte_finalize();
934934
opal_finalize_util();
935935

936+
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
937+
/* cleanup the process info */
938+
orte_proc_info_finalize();
939+
936940
if (orte_debug_flag) {
937941
fprintf(stderr, "exiting with status %d\n", orte_exit_status);
938942
}

orte/runtime/orte_finalize.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2017 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
1818
* $COPYRIGHT$
@@ -39,6 +39,7 @@
3939
#include "orte/runtime/orte_locks.h"
4040
#include "orte/util/listener.h"
4141
#include "orte/util/name_fns.h"
42+
#include "orte/util/proc_info.h"
4243
#include "orte/util/show_help.h"
4344

4445
int orte_finalize(void)
@@ -84,16 +85,16 @@ int orte_finalize(void)
8485
orte_schizo.finalize();
8586
(void) mca_base_framework_close(&orte_schizo_base_framework);
8687

87-
/* cleanup the process info */
88-
orte_proc_info_finalize();
89-
9088
/* Close the general debug stream */
9189
opal_output_close(orte_debug_output);
9290

9391
if (NULL != orte_fork_agent) {
9492
opal_argv_free(orte_fork_agent);
9593
}
9694

95+
/* destruct our process info */
96+
OBJ_DESTRUCT(&orte_process_info.super);
97+
9798
/* finalize the opal utilities */
9899
rc = opal_finalize();
99100

orte/tools/orterun/orterun.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
#include "orte/mca/rml/rml.h"
8787
#include "orte/mca/state/state.h"
8888
#include "orte/util/proc_info.h"
89+
#include "orte/util/session_dir.h"
8990
#include "orte/util/show_help.h"
9091
#include "orte/util/threads.h"
9192

@@ -222,6 +223,9 @@ int orterun(int argc, char *argv[])
222223
/* cleanup and leave */
223224
orte_submit_finalize();
224225
orte_finalize();
226+
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
227+
/* cleanup the process info */
228+
orte_proc_info_finalize();
225229

226230
if (orte_debug_flag) {
227231
fprintf(stderr, "exiting with status %d\n", orte_exit_status);

orte/util/proc_info.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2012 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved
15+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1717
* $COPYRIGHT$
1818
*
@@ -69,7 +69,6 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
6969
.aliases = NULL,
7070
.pid = 0,
7171
.proc_type = ORTE_PROC_TYPE_NONE,
72-
.sync_buf = NULL,
7372
.my_port = 0,
7473
.num_restarts = 0,
7574
.my_node_rank = ORTE_NODE_RANK_INVALID,
@@ -265,9 +264,6 @@ int orte_proc_info(void)
265264
&orte_ess_node_rank);
266265
orte_process_info.my_node_rank = (orte_node_rank_t) orte_ess_node_rank;
267266

268-
/* setup the sync buffer */
269-
orte_process_info.sync_buf = OBJ_NEW(opal_buffer_t);
270-
271267
return ORTE_SUCCESS;
272268
}
273269

@@ -330,11 +326,6 @@ int orte_proc_info_finalize(void)
330326

331327
orte_process_info.proc_type = ORTE_PROC_TYPE_NONE;
332328

333-
OBJ_RELEASE(orte_process_info.sync_buf);
334-
orte_process_info.sync_buf = NULL;
335-
336-
OBJ_DESTRUCT(&orte_process_info.super);
337-
338329
opal_argv_free(orte_process_info.aliases);
339330

340331
init = false;

orte/util/proc_info.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* All rights reserved.
1212
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
1313
* All rights reserved.
14-
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
14+
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
1515
* Copyright (c) 2017 Cisco Systems, Inc. All rights reserved
1616
* $COPYRIGHT$
1717
*
@@ -99,7 +99,6 @@ struct orte_proc_info_t {
9999
char **aliases; /**< aliases for this node */
100100
pid_t pid; /**< Local process ID for this process */
101101
orte_proc_type_t proc_type; /**< Type of process */
102-
opal_buffer_t *sync_buf; /**< buffer to store sync response */
103102
uint16_t my_port; /**< TCP port for out-of-band comm */
104103
int num_restarts; /**< number of times this proc has restarted */
105104
orte_node_rank_t my_node_rank; /**< node rank */

orte/util/session_dir.c

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -370,45 +370,33 @@ int orte_session_dir(bool create, orte_process_name_t *proc)
370370
int
371371
orte_session_dir_cleanup(orte_jobid_t jobid)
372372
{
373-
int rc = ORTE_SUCCESS;
374-
375373
if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
376374
/* we haven't created them or RM will clean them up for us*/
377375
return ORTE_SUCCESS;
378376
}
379377

380-
if (NULL == orte_process_info.job_session_dir ||
378+
if (NULL == orte_process_info.jobfam_session_dir ||
381379
NULL == orte_process_info.proc_session_dir) {
382380
/* this should never happen - it means we are calling
383381
* cleanup *before* properly setting up the session
384382
* dir system. This leaves open the possibility of
385383
* accidentally removing directories we shouldn't
386384
* touch
387385
*/
388-
rc = ORTE_ERR_NOT_INITIALIZED;
389-
goto CLEANUP;
386+
return ORTE_ERR_NOT_INITIALIZED;
390387
}
391388

392389
/* recursively blow the whole session away for our job family,
393390
* saving only output files
394391
*/
395-
opal_os_dirpath_destroy(orte_process_info.job_session_dir,
392+
opal_os_dirpath_destroy(orte_process_info.jobfam_session_dir,
396393
true, orte_dir_check_file);
397394

398-
/* now attempt to eliminate the top level directory itself - this
399-
* will fail if anything is present, but ensures we cleanup if
400-
* we are the last one out
401-
*/
402-
if( NULL != orte_process_info.top_session_dir ){
403-
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
404-
false, orte_dir_check_file);
405-
}
406-
407-
if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) {
395+
if (opal_os_dirpath_is_empty(orte_process_info.jobfam_session_dir)) {
408396
if (orte_debug_flag) {
409-
opal_output(0, "sess_dir_cleanup: found job session dir empty - deleting");
397+
opal_output(0, "sess_dir_cleanup: found jobfam session dir empty - deleting");
410398
}
411-
rmdir(orte_process_info.job_session_dir);
399+
rmdir(orte_process_info.jobfam_session_dir);
412400
} else {
413401
if (orte_debug_flag) {
414402
if (OPAL_ERR_NOT_FOUND ==
@@ -418,12 +406,10 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
418406
opal_output(0, "sess_dir_cleanup: job session dir not empty - leaving");
419407
}
420408
}
421-
goto CLEANUP;
422409
}
423410

424-
if ( NULL != orte_process_info.top_session_dir ){
425-
426-
if( opal_os_dirpath_is_empty(orte_process_info.top_session_dir) ) {
411+
if (NULL != orte_process_info.top_session_dir) {
412+
if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) {
427413
if (orte_debug_flag) {
428414
opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting");
429415
}
@@ -440,9 +426,17 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
440426
}
441427
}
442428

443-
CLEANUP:
429+
/* now attempt to eliminate the top level directory itself - this
430+
* will fail if anything is present, but ensures we cleanup if
431+
* we are the last one out
432+
*/
433+
if( NULL != orte_process_info.top_session_dir ){
434+
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
435+
false, orte_dir_check_file);
436+
}
437+
444438

445-
return rc;
439+
return ORTE_SUCCESS;
446440
}
447441

448442

0 commit comments

Comments
 (0)