Skip to content

Commit a9005d6

Browse files
author
Ralph Castain
authored
Merge pull request #3679 from rhc54/topic/spawn
Fix the backend mapper algorithm for comm_spawn. The front and back e…
2 parents bdc7206 + 7b39f19 commit a9005d6

File tree

6 files changed

+61
-59
lines changed

6 files changed

+61
-59
lines changed

orte/mca/rmaps/base/rmaps_base_map_job.c

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -378,26 +378,18 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
378378
*/
379379
if (ORTE_ERR_TAKE_NEXT_OPTION != rc) {
380380
ORTE_ERROR_LOG(rc);
381-
OBJ_RELEASE(caddy);
382381
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
383-
return;
382+
goto cleanup;
384383
}
385384
}
386-
/* reset any node map flags we used so the next job will start clean */
387-
for (i=0; i < jdata->map->nodes->size; i++) {
388-
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
389-
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
390-
}
391-
}
392385

393386
if (did_map && ORTE_ERR_RESOURCE_BUSY == rc) {
394387
/* the map was done but nothing could be mapped
395388
* for launch as all the resources were busy
396389
*/
397390
orte_show_help("help-orte-rmaps-base.txt", "cannot-launch", true);
398-
OBJ_RELEASE(caddy);
399391
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
400-
return;
392+
goto cleanup;
401393
}
402394

403395
/* if we get here without doing the map, or with zero procs in
@@ -407,9 +399,8 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
407399
orte_show_help("help-orte-rmaps-base.txt", "failed-map", true,
408400
did_map ? "mapped" : "unmapped",
409401
jdata->num_procs, jdata->map->num_nodes);
410-
OBJ_RELEASE(caddy);
411402
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
412-
return;
403+
goto cleanup;
413404
}
414405

415406
/* if any node is oversubscribed, then check to see if a binding
@@ -423,28 +414,38 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
423414
}
424415

425416
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
417+
/* we didn't add the nodes to the node map as it would cause them to
418+
* be in a different order than on the backend if this is a dynamic
419+
* spawn (which means we may have started somewhere other than at
420+
* the beginning of the allocation) */
421+
for (i=0; i < orte_node_pool->size; i++) {
422+
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
423+
continue;
424+
}
425+
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
426+
OBJ_RETAIN(node);
427+
opal_pointer_array_add(jdata->map->nodes, node);
428+
}
429+
}
426430
/* compute and save location assignments */
427431
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
428432
ORTE_ERROR_LOG(rc);
429-
OBJ_RELEASE(caddy);
430433
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
431-
return;
434+
goto cleanup;
432435
}
433436
} else {
434437
/* compute and save local ranks */
435438
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
436439
ORTE_ERROR_LOG(rc);
437-
OBJ_RELEASE(caddy);
438440
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
439-
return;
441+
goto cleanup;
440442
}
441443

442444
/* compute and save bindings */
443445
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
444446
ORTE_ERROR_LOG(rc);
445-
OBJ_RELEASE(caddy);
446447
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
447-
return;
448+
goto cleanup;
448449
}
449450
}
450451

@@ -465,6 +466,14 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
465466
/* set the job state to the next position */
466467
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE);
467468

469+
cleanup:
470+
/* reset any node map flags we used so the next job will start clean */
471+
for (i=0; i < jdata->map->nodes->size; i++) {
472+
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
473+
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
474+
}
475+
}
476+
468477
/* cleanup */
469478
OBJ_RELEASE(caddy);
470479
}

orte/mca/rmaps/ppr/rmaps_ppr.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -275,12 +275,7 @@ static int ppr_mapper(orte_job_t *jdata)
275275
}
276276
/* add the node to the map, if needed */
277277
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
278-
if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
279-
ORTE_ERROR_LOG(rc);
280-
goto error;
281-
}
282278
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
283-
OBJ_RETAIN(node); /* maintain accounting on object */
284279
jdata->map->num_nodes++;
285280
}
286281
/* if we are mapping solely at the node level, just put
@@ -407,7 +402,7 @@ static int ppr_mapper(orte_job_t *jdata)
407402
}
408403
return ORTE_SUCCESS;
409404

410-
error:
405+
error:
411406
while (NULL != (item = opal_list_remove_first(&node_list))) {
412407
OBJ_RELEASE(item);
413408
}

orte/mca/rmaps/round_robin/rmaps_rr_mappers.c

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
4343
orte_std_cntr_t num_slots,
4444
orte_vpid_t num_procs)
4545
{
46-
int rc, i, nprocs_mapped;
46+
int i, nprocs_mapped;
4747
orte_node_t *node;
4848
orte_proc_t *proc;
4949
int num_procs_to_assign, extra_procs_to_assign=0, nxtra_nodes=0;
@@ -94,12 +94,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
9494
for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) {
9595
/* add this node to the map - do it only once */
9696
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
97-
if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
98-
ORTE_ERROR_LOG(rc);
99-
return rc;
100-
}
10197
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
102-
OBJ_RETAIN(node); /* maintain accounting on object */
10398
++(jdata->map->num_nodes);
10499
}
105100
if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
@@ -149,12 +144,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
149144

150145
/* add this node to the map - do it only once */
151146
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
152-
if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
153-
ORTE_ERROR_LOG(rc);
154-
return rc;
155-
}
156147
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
157-
OBJ_RETAIN(node); /* maintain accounting on object */
158148
++(jdata->map->num_nodes);
159149
}
160150
if (add_one) {
@@ -221,7 +211,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
221211
int j, nprocs_mapped, nnodes;
222212
orte_node_t *node;
223213
orte_proc_t *proc;
224-
int num_procs_to_assign, navg, idx;
214+
int num_procs_to_assign, navg;
225215
int extra_procs_to_assign=0, nxtra_nodes=0;
226216
hwloc_obj_t obj=NULL;
227217
float balance;
@@ -293,12 +283,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
293283
}
294284
/* add this node to the map, but only do so once */
295285
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
296-
if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
297-
ORTE_ERROR_LOG(idx);
298-
return idx;
299-
}
300286
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
301-
OBJ_RETAIN(node); /* maintain accounting on object */
302287
++(jdata->map->num_nodes);
303288
}
304289
if (oversubscribed) {
@@ -456,7 +441,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
456441
orte_node_t *node;
457442
orte_proc_t *proc;
458443
int nprocs, start;
459-
int idx;
460444
hwloc_obj_t obj=NULL;
461445
unsigned int nobjs;
462446
bool add_one;
@@ -547,12 +531,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
547531
}
548532
/* add this node to the map, if reqd */
549533
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
550-
if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
551-
ORTE_ERROR_LOG(idx);
552-
return idx;
553-
}
554534
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
555-
OBJ_RETAIN(node); /* maintain accounting on object */
556535
++(jdata->map->num_nodes);
557536
}
558537
nmapped = 0;
@@ -638,7 +617,6 @@ static int byobj_span(orte_job_t *jdata,
638617
orte_node_t *node;
639618
orte_proc_t *proc;
640619
int nprocs, nxtra_objs;
641-
int idx;
642620
hwloc_obj_t obj=NULL;
643621
unsigned int nobjs;
644622

@@ -699,12 +677,7 @@ static int byobj_span(orte_job_t *jdata,
699677
OPAL_LIST_FOREACH(node, node_list, orte_node_t) {
700678
/* add this node to the map, if reqd */
701679
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
702-
if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
703-
ORTE_ERROR_LOG(idx);
704-
return idx;
705-
}
706680
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
707-
OBJ_RETAIN(node); /* maintain accounting on object */
708681
++(jdata->map->num_nodes);
709682
}
710683
/* get the number of objects of this type on this node */

orte/runtime/data_type_support/orte_dt_packing_fns.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
6464
int32_t num_vals, opal_data_type_t type)
6565
{
6666
int rc;
67-
int32_t i, j, count;
67+
int32_t i, j, count, bookmark;
6868
orte_job_t **jobs;
6969
orte_app_context_t *app;
7070
orte_proc_t *proc;
@@ -241,7 +241,16 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
241241
}
242242
}
243243

244-
/* do not pack the bookmark or oversubscribe_override flags */
244+
/* pack the bookmark */
245+
if (NULL == jobs[i]->bookmark) {
246+
bookmark = -1;
247+
} else {
248+
bookmark = jobs[i]->bookmark->index;
249+
}
250+
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &bookmark, 1, OPAL_INT32))) {
251+
ORTE_ERROR_LOG(rc);
252+
return rc;
253+
}
245254

246255
/* pack the job state */
247256
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,

orte/runtime/data_type_support/orte_dt_unpacking_fns.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
6161
int32_t *num_vals, opal_data_type_t type)
6262
{
6363
int rc;
64-
int32_t i, k, n, count;
64+
int32_t i, k, n, count, bookmark;
6565
orte_job_t **jobs;
6666
orte_app_idx_t j;
6767
orte_attribute_t *kv;
@@ -237,7 +237,17 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
237237
}
238238
}
239239

240-
/* no bookmark of oversubscribe_override flags to unpack */
240+
/* unpack the bookmark */
241+
n = 1;
242+
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
243+
&bookmark, &n, OPAL_INT32))) {
244+
ORTE_ERROR_LOG(rc);
245+
return rc;
246+
}
247+
if (0 <= bookmark) {
248+
/* retrieve it */
249+
jobs[i]->bookmark = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, bookmark);
250+
}
241251

242252
/* unpack the job state */
243253
n = 1;

orte/test/mpi/simple_spawn.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <stdio.h>
2+
#include <stdlib.h>
23
#include <sys/types.h>
34
#include <unistd.h>
45
#include <sys/param.h>
@@ -12,9 +13,15 @@ int main(int argc, char* argv[])
1213
int rank, size;
1314
char hostname[MAXHOSTNAMELEN];
1415
pid_t pid;
16+
char *env_rank,*env_nspace;
1517

18+
env_rank = getenv("PMIX_RANK");
19+
env_nspace = getenv("PMIX_NAMESPACE");
1620
pid = getpid();
17-
printf("[pid %ld] starting up!\n", (long)pid);
21+
gethostname(hostname, sizeof(hostname));
22+
23+
printf("[%s:%s pid %ld] starting up on node %s!\n", env_nspace, env_rank, (long)pid, hostname);
24+
1825
MPI_Init(NULL, NULL);
1926
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
2027
printf("%d completed MPI_Init\n", rank);
@@ -42,7 +49,6 @@ int main(int argc, char* argv[])
4249
else {
4350
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
4451
MPI_Comm_size(MPI_COMM_WORLD, &size);
45-
gethostname(hostname, sizeof(hostname));
4652
pid = getpid();
4753
printf("Hello from the child %d of %d on host %s pid %ld\n", rank, 3, hostname, (long)pid);
4854
if (0 == rank) {

0 commit comments

Comments
 (0)