Skip to content

Commit b7c0767

Browse files
committed
comm: couple of enhancements wrt pmix groups
add a pmix timeout option for group operations. This may be a no-op with certain pmix server variants but set it anyway. For protection make sure when creating intercomm communicators using pmix group construct that all procs supply the same ordered list of pmix procs to pmix group construct. update the description for ompi_mca_mpi_pmix_connect_timeout to not it can be used to control timeout for pmix group calls as well. Signed-off-by: Howard Pritchard <[email protected]>
1 parent 9ba5034 commit b7c0767

File tree

2 files changed

+56
-13
lines changed

2 files changed

+56
-13
lines changed

ompi/communicator/comm_cid.c

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,16 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
315315
opal_process_name_t opal_proc_name;
316316
bool cid_base_set = false;
317317
char *tag = NULL;
318-
size_t proc_count = 0, rproc_count = 0, tproc_count = 0, cid_base = 0UL, ninfo;
318+
size_t tproc_count = 0, cid_base = 0UL, ninfo;
319319
int rc, leader_rank;
320320
pmix_proc_t *procs = NULL;
321321
void *grpinfo = NULL, *list = NULL;
322322
pmix_data_array_t darray;
323+
pmix_info_t tinfo;
324+
ompi_proc_t* order_procs[2];
325+
ompi_group_t *the_grps[2];
326+
size_t the_grp_sizes[2] = {0};
327+
bool reorder_grps = false;
323328

324329
switch (mode) {
325330
case OMPI_COMM_CID_GROUP_NEW:
@@ -349,6 +354,13 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
349354
goto fn_exit;
350355
}
351356

357+
rc = PMIx_Info_list_add(grpinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
358+
if (PMIX_SUCCESS != rc) {
359+
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Info_list_add failed %s %d", PMIx_Error_string(rc), __LINE__));
360+
rc = OMPI_ERR_OUT_OF_RESOURCE;
361+
goto fn_exit;
362+
}
363+
352364
list = PMIx_Info_list_start();
353365

354366
size_t c_index = (size_t)newcomm->c_index;
@@ -383,24 +395,51 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
383395
pinfo = (pmix_info_t*)darray.array;
384396
ninfo = darray.size;
385397

386-
proc_count = newcomm->c_local_group->grp_proc_count;
398+
/*
399+
* Make sure all processes participating in the PMIx group construct operation
400+
* use the same order of pmix procs. Added protection in case the underlying
401+
* PMIx implementation expects it. We use the first proc in the local
402+
* and remote groups to determine whether to add the remote or local procs
403+
* first to the pmix procs list.
404+
*/
405+
the_grp_sizes[0] = newcomm->c_local_group->grp_proc_count;
406+
the_grps[0] = newcomm->c_local_group;
387407
if ( OMPI_COMM_IS_INTER (newcomm) ){
388-
rproc_count = newcomm->c_remote_group->grp_proc_count;
408+
the_grp_sizes[1] = newcomm->c_remote_group->grp_proc_count;
409+
the_grps[1] = newcomm->c_remote_group;
410+
411+
order_procs[0] = ompi_group_get_proc_ptr (newcomm->c_local_group, 0, false);
412+
order_procs[1] = ompi_group_get_proc_ptr (newcomm->c_remote_group, 0, false);
413+
414+
if (order_procs[0]->super.proc_name.jobid > order_procs[1]->super.proc_name.jobid ||
415+
(order_procs[0]->super.proc_name.jobid == order_procs[1]->super.proc_name.jobid &&
416+
order_procs[0]->super.proc_name.vpid > order_procs[1]->super.proc_name.vpid)) {
417+
reorder_grps = true;
418+
}
419+
}
420+
421+
if (true == reorder_grps) {
422+
size_t itmp = the_grp_sizes[0];
423+
ompi_group_t *tmp_grp = the_grps[0];
424+
the_grp_sizes[0] = the_grp_sizes[1];
425+
the_grp_sizes[1] = itmp;
426+
the_grps[0] = the_grps[1];
427+
the_grps[1] = tmp_grp;
389428
}
390429

391-
PMIX_PROC_CREATE(procs, proc_count + rproc_count);
430+
tproc_count = the_grp_sizes[0] + the_grp_sizes[1];
392431

393-
for (size_t i = 0 ; i < proc_count; ++i) {
394-
opal_proc_name = ompi_group_get_proc_name(newcomm->c_local_group, i);
432+
PMIX_PROC_CREATE(procs, tproc_count);
433+
434+
for (size_t i = 0 ; i < the_grp_sizes[0]; ++i) {
435+
opal_proc_name = ompi_group_get_proc_name(the_grps[0], i);
395436
OPAL_PMIX_CONVERT_NAME(&procs[i],&opal_proc_name);
396437
}
397-
for (size_t i = 0; i < rproc_count; ++i) {
398-
opal_proc_name = ompi_group_get_proc_name(newcomm->c_remote_group, i);
399-
OPAL_PMIX_CONVERT_NAME(&procs[proc_count+i],&opal_proc_name);
438+
for (size_t i = 0; i < the_grp_sizes[1]; ++i) {
439+
opal_proc_name = ompi_group_get_proc_name(the_grps[1], i);
440+
OPAL_PMIX_CONVERT_NAME(&procs[the_grp_sizes[0]+i],&opal_proc_name);
400441
}
401442

402-
tproc_count = proc_count + rproc_count;
403-
404443
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "calling PMIx_Group_construct - tag %s size %ld ninfo %ld cid_base %ld\n",
405444
tag, tproc_count, ninfo, cid_base));
406445
rc = PMIx_Group_construct(tag, procs, tproc_count, pinfo, ninfo, &results, &nresults);
@@ -450,7 +489,11 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
450489
tag, tproc_count, ninfo, cid_base));
451490

452491
/* destruct the group */
453-
rc = PMIx_Group_destruct (tag, NULL, 0);
492+
493+
PMIX_INFO_CONSTRUCT(&tinfo);
494+
PMIX_INFO_LOAD(&tinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
495+
rc = PMIx_Group_destruct (tag, &tinfo, 1);
496+
PMIX_INFO_DESTRUCT(&tinfo);
454497
if(PMIX_SUCCESS != rc) {
455498
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Group_destruct failed %s", PMIx_Error_string(rc)));
456499
rc = opal_pmix_convert_status(rc);

ompi/runtime/ompi_mpi_params.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ int ompi_mpi_register_params(void)
391391

392392
ompi_pmix_connect_timeout = 0; /* infinite timeout - see PMIx standard */
393393
(void) mca_base_var_register ("ompi", "mpi", NULL, "pmix_connect_timeout",
394-
"Timeout(secs) for calls to PMIx_Connect. Default is no timeout.",
394+
"Timeout(secs) for calls to PMIx_Connect and PMIx_Group_construct/destruct. Default is no timeout.",
395395
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
396396
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
397397
&ompi_pmix_connect_timeout);

0 commit comments

Comments
 (0)