Skip to content

Commit a7741ab

Browse files
author
Ralph Castain
authored
Merge pull request #3702 from rhc54/topic/rf
Fix rank-file mapper launch by correctly setting up the remote map from the provided data
2 parents 7d07659 + 8f09929 commit a7741ab

File tree

2 files changed

+28
-5
lines changed

2 files changed

+28
-5
lines changed

opal/mca/pmix/pmix2x/pmix2x.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,10 +219,12 @@ static void _event_hdlr(int sd, short args, void *cbdata)
219219
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
220220
"%s _EVENT_HDLR CALLING EVHDLR",
221221
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
222-
event->handler(cd->status, &cd->pname,
223-
cd->info, &cd->results,
224-
return_local_event_hdlr, (void*)cd);
225-
return;
222+
if (NULL != event->handler) {
223+
event->handler(cd->status, &cd->pname,
224+
cd->info, &cd->results,
225+
return_local_event_hdlr, (void*)cd);
226+
return;
227+
}
226228
}
227229
}
228230
/* if we didn't find a match, we still have to call their final callback */

orte/mca/odls/base/odls_base_default_fns.c

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
279279
int rc;
280280
orte_std_cntr_t cnt;
281281
orte_job_t *jdata=NULL, *daemons;
282+
orte_node_t *node;
282283
int32_t n, k;
283284
opal_buffer_t *bptr;
284285
orte_proc_t *pptr, *dmn;
@@ -436,7 +437,8 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
436437
/* not ready for use yet */
437438
continue;
438439
}
439-
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
440+
if (!ORTE_PROC_IS_HNP &&
441+
orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
440442
/* the parser will have already made the connection, but the fully described
441443
* case won't have done it, so connect the proc to its node here */
442444
opal_output_verbose(5, orte_odls_base_framework.framework_output,
@@ -457,6 +459,17 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
457459
}
458460
OBJ_RETAIN(dmn->node);
459461
pptr->node = dmn->node;
462+
/* add the node to the job map, if needed */
463+
if (!ORTE_FLAG_TEST(pptr->node, ORTE_NODE_FLAG_MAPPED)) {
464+
OBJ_RETAIN(pptr->node);
465+
opal_pointer_array_add(jdata->map->nodes, pptr->node);
466+
jdata->map->num_nodes++;
467+
ORTE_FLAG_SET(pptr->node, ORTE_NODE_FLAG_MAPPED);
468+
}
469+
/* add this proc to that node */
470+
OBJ_RETAIN(pptr);
471+
opal_pointer_array_add(pptr->node->procs, pptr);
472+
pptr->node->num_procs++;
460473
}
461474
/* see if it belongs to us */
462475
if (pptr->parent == ORTE_PROC_MY_NAME->vpid) {
@@ -485,6 +498,14 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
485498
ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE);
486499
}
487500
}
501+
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
502+
/* reset the mapped flags */
503+
for (n=0; n < jdata->map->nodes->size; n++) {
504+
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) {
505+
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
506+
}
507+
}
508+
}
488509

489510
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
490511
/* compute and save bindings of local children */

0 commit comments

Comments
 (0)