Skip to content

Commit 5f4693e

Browse files
committed
Add PMIx key to provide RM with ability to indicate that it will cleanup
session directories provided at through OPAL_PMIX_TMPDIR, OPAL_PMIX_NSDIR, OPAL_PMIX_PROCDIR
1 parent c43cefd commit 5f4693e

File tree

4 files changed

+40
-23
lines changed

4 files changed

+40
-23
lines changed

opal/mca/pmix/pmix_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ BEGIN_C_DECLS
7575
#define OPAL_PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session
7676
#define OPAL_PMIX_NSDIR "pmix.nsdir" // (char*) sub-tmpdir assigned to namespace
7777
#define OPAL_PMIX_PROCDIR "pmix.pdir" // (char*) sub-nsdir assigned to proc
78+
#define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
7879

7980
/* information about relative ranks as assigned by the RM */
8081
#define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler

orte/mca/ess/pmi/ess_pmi_module.c

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ static int rte_init(void)
9494
uint16_t u16, *u16ptr;
9595
char **peers=NULL, *mycpuset, **cpusets=NULL;
9696
opal_process_name_t wildcard_rank, pname;
97+
bool bool_val, tdir_mca_override = false;
9798
size_t i;
9899

99100
/* run the prolog */
@@ -246,37 +247,51 @@ static int rte_init(void)
246247
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING);
247248
if (OPAL_SUCCESS == ret && NULL != val) {
248249
/* TODO: who has precedence - pmix of MCA setting??? */
249-
if( NULL == orte_process_info.top_session_dir ){
250+
if( NULL != orte_process_info.top_session_dir ){
250251
orte_process_info.top_session_dir = val;
251252
} else {
252253
/* keep the MCA setting */
254+
tdir_mca_override = true;
253255
free(val);
254256
}
255257
val = NULL;
256258
}
257259

258-
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING);
259-
if (OPAL_SUCCESS == ret && NULL != val) {
260-
/* TODO: who has precedence - pmix of MCA setting??? */
261-
if( NULL == orte_process_info.job_session_dir ){
262-
orte_process_info.job_session_dir = val;
263-
} else {
264-
/* keep the MCA setting */
265-
free(val);
260+
if( !tdir_mca_override ){
261+
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING);
262+
if (OPAL_SUCCESS == ret && NULL != val) {
263+
/* TODO: who has precedence - pmix of MCA setting??? */
264+
if( NULL == orte_process_info.job_session_dir ){
265+
orte_process_info.job_session_dir = val;
266+
} else {
267+
/* keep the MCA setting */
268+
free(val);
269+
tdir_mca_override = true;
270+
}
271+
val = NULL;
266272
}
267-
val = NULL;
268273
}
269274

270-
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING);
271-
if (OPAL_SUCCESS == ret && NULL != val) {
272-
/* TODO: who has precedence - pmix of MCA setting??? */
273-
if( NULL == orte_process_info.proc_session_dir ){
274-
orte_process_info.proc_session_dir = val;
275-
} else {
276-
/* keep the MCA setting */
277-
free(val);
275+
if( !tdir_mca_override ){
276+
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING);
277+
if (OPAL_SUCCESS == ret && NULL != val) {
278+
/* TODO: who has precedence - pmix of MCA setting??? */
279+
if( NULL == orte_process_info.proc_session_dir ){
280+
orte_process_info.proc_session_dir = val;
281+
} else {
282+
/* keep the MCA setting */
283+
tdir_mca_override = true;
284+
free(val);
285+
}
286+
val = NULL;
287+
}
288+
}
289+
290+
if( !tdir_mca_override ){
291+
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TDIR_RMCLEAN, &wildcard_rank, &bool_val, OPAL_BOOL);
292+
if (OPAL_SUCCESS == ret ) {
293+
orte_process_info.rm_session_dirs = val;
278294
}
279-
val = NULL;
280295
}
281296

282297
/* retrieve our topology */

orte/util/proc_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ struct orte_proc_info_t {
122122
char *jobfam_session_dir; /**< Session directory for this family of jobs (i.e., share same mpirun) */
123123
char *job_session_dir; /**< Session directory for job */
124124
char *proc_session_dir; /**< Session directory for the process */
125+
bool rm_session_dirs; /**< Session directories will be cleaned up by RM */
125126

126127
char *sock_stdin; /**< Path name to temp file for stdin. */
127128
char *sock_stdout; /**< Path name to temp file for stdout. */

orte/util/session_dir.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,8 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
366366
{
367367
int rc = ORTE_SUCCESS;
368368

369-
if (!orte_create_session_dirs ) {
370-
/* we haven't created them */
369+
if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
370+
/* we haven't created them or RM will clean them up for us*/
371371
return ORTE_SUCCESS;
372372
}
373373

@@ -447,8 +447,8 @@ orte_session_dir_finalize(orte_process_name_t *proc)
447447
char *tmp;
448448
char *job_session_dir, *vpid, *proc_session_dir;
449449

450-
if (!orte_create_session_dirs ) {
451-
/* we haven't created them */
450+
if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
451+
/* we haven't created them or RM will clean them up for us*/
452452
return ORTE_SUCCESS;
453453
}
454454

0 commit comments

Comments
 (0)