Skip to content

Commit 322f6c5

Browse files
author
Ralph Castain
committed
Fix a breakage in the ranking system
While it may be faster to reverse the order of the assignment loops, it also results in the wrong answer Signed-off-by: Ralph Castain <[email protected]>
1 parent c1c0c02 commit 322f6c5

File tree

6 files changed

+111
-42
lines changed

6 files changed

+111
-42
lines changed

orte/mca/plm/base/plm_base_launch_support.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,17 @@ void orte_plm_base_allocation_complete(int fd, short args, void *cbdata)
190190

191191
ORTE_ACQUIRE_OBJECT(caddy);
192192

193-
/* move the state machine along */
194-
caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
195-
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
193+
/* if we don't want to launch, then we at least want
194+
* to map so we can see where the procs would have
195+
* gone - so skip to the mapping state */
196+
if (orte_do_not_launch) {
197+
caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
198+
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_MAP);
199+
} else {
200+
/* move the state machine along */
201+
caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
202+
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
203+
}
196204

197205
/* cleanup */
198206
OBJ_RELEASE(caddy);

orte/mca/ras/base/ras_base_node.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* All rights reserved.
1212
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
1313
* reserved.
14-
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
14+
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
1515
* Copyright (c) 2015 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
1717
* $COPYRIGHT$
@@ -50,6 +50,8 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
5050
bool hnp_alone = true, skiphnp = false;
5151
orte_attribute_t *kv;
5252
char **alias=NULL, **nalias;
53+
orte_proc_t *daemon;
54+
orte_job_t *djob;
5355

5456
/* get the number of nodes */
5557
num_nodes = (orte_std_cntr_t)opal_list_get_size(nodes);
@@ -76,6 +78,9 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
7678
return rc;
7779
}
7880

81+
/* if we are not launching, get the daemon job */
82+
djob = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
83+
7984
/* get the hnp node's info */
8085
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
8186

@@ -189,6 +194,21 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
189194
ORTE_ERROR_LOG(rc);
190195
return rc;
191196
}
197+
if (orte_do_not_launch) {
198+
/* create a daemon for this node since we won't be launching
199+
* and the mapper needs to see a daemon - this is used solely
200+
* for testing the mappers */
201+
daemon = OBJ_NEW(orte_proc_t);
202+
daemon->name.jobid = ORTE_PROC_MY_NAME->jobid;
203+
daemon->name.vpid = node->index;
204+
daemon->state = ORTE_PROC_STATE_RUNNING;
205+
OBJ_RETAIN(node);
206+
daemon->node = node;
207+
opal_pointer_array_set_item(djob->procs, daemon->name.vpid, daemon);
208+
djob->num_procs++;
209+
OBJ_RETAIN(daemon);
210+
node->daemon = daemon;
211+
}
192212
/* update the total slots in the job */
193213
orte_ras_base.total_slots_alloc += node->slots;
194214
/* check if we have fqdn names in the allocation */

orte/mca/ras/simulator/ras_sim_module.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved
44
* Copyright (c) 2015-2017 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
6-
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
6+
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
77
*
88
* $COPYRIGHT$
99
*
@@ -23,6 +23,7 @@
2323
#include "opal/mca/hwloc/hwloc-internal.h"
2424
#include "opal/util/argv.h"
2525

26+
#include "orte/mca/errmgr/errmgr.h"
2627
#include "orte/util/show_help.h"
2728
#include "orte/runtime/orte_globals.h"
2829

@@ -179,6 +180,10 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
179180
support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
180181
support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind;
181182
support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind;
183+
/* pass it thru the filter so we create the summaries required by the mappers */
184+
if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) {
185+
ORTE_ERROR_LOG(ORTE_ERROR);
186+
}
182187
/* add it to our array */
183188
t = OBJ_NEW(orte_topology_t);
184189
t->topo = topo;

orte/mca/rmaps/base/rmaps_base_binding.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2015-2017 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
1818
* $COPYRIGHT$
@@ -246,7 +246,7 @@ static int bind_downwards(orte_job_t *jdata,
246246
hwloc_obj_type_t target,
247247
unsigned cache_level)
248248
{
249-
int j;
249+
int j, rc;
250250
orte_job_map_t *map;
251251
orte_proc_t *proc;
252252
hwloc_obj_t trg_obj, nxt_obj;
@@ -367,7 +367,10 @@ static int bind_downwards(orte_job_t *jdata,
367367
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
368368
ORTE_NAME_PRINT(&proc->name), node->name);
369369
} else {
370-
opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology->topo, totalcpuset);
370+
rc = opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology->topo, totalcpuset);
371+
if (OPAL_SUCCESS != rc) {
372+
ORTE_ERROR_LOG(rc);
373+
}
371374
opal_output(orte_rmaps_base_framework.framework_output,
372375
"%s BOUND PROC %s[%s] TO %s: %s",
373376
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@@ -841,7 +844,8 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
841844
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
842845
continue;
843846
}
844-
if (!orte_no_vm && (int)ORTE_PROC_MY_NAME->vpid != node->index) {
847+
if (!orte_no_vm && !orte_do_not_launch &&
848+
(int)ORTE_PROC_MY_NAME->vpid != node->index) {
845849
continue;
846850
}
847851
if (!orte_do_not_launch) {

orte/mca/rmaps/base/rmaps_base_map_job.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,33 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
417417
}
418418
}
419419

420-
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
420+
if (orte_do_not_launch) {
421+
/* compute the ranks and add the proc objects
422+
* to the jdata->procs array */
423+
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
424+
ORTE_ERROR_LOG(rc);
425+
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
426+
goto cleanup;
427+
}
428+
/* compute and save local ranks */
429+
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
430+
ORTE_ERROR_LOG(rc);
431+
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
432+
goto cleanup;
433+
}
434+
/* compute and save location assignments */
435+
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
436+
ORTE_ERROR_LOG(rc);
437+
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
438+
goto cleanup;
439+
}
440+
/* compute and save bindings */
441+
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
442+
ORTE_ERROR_LOG(rc);
443+
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
444+
goto cleanup;
445+
}
446+
} else if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
421447
/* compute and save location assignments */
422448
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
423449
ORTE_ERROR_LOG(rc);
@@ -454,6 +480,11 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
454480
}
455481
}
456482

483+
if (orte_do_not_launch) {
484+
/* display the devel map */
485+
orte_rmaps_base_display_map(jdata);
486+
}
487+
457488
/* set the job state to the next position */
458489
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE);
459490

orte/mca/rmaps/base/rmaps_base_ranking.c

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved
13-
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
13+
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
1414
* Copyright (c) 2017 Research Organization for Information Science
1515
* and Technology (RIST). All rights reserved.
1616
* $COPYRIGHT$
@@ -379,35 +379,34 @@ static int rank_by(orte_job_t *jdata,
379379
all_done = false;
380380
while (!all_done && cnt < app->num_procs) {
381381
all_done = true;
382-
/* cycle across the objects */
383-
for (i=0; i < num_objs && cnt < app->num_procs && all_done; i++) {
384-
obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i);
385-
/* find the next proc for this job and app_context */
386-
for (j=0; j < node->procs->size && cnt < app->num_procs; j++) {
387-
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
388-
continue;
389-
}
390-
/* ignore procs from other jobs */
391-
if (proc->name.jobid != jdata->jobid) {
392-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
393-
"mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d",
394-
ORTE_NAME_PRINT(&proc->name), num_ranked);
395-
continue;
396-
}
397-
/* ignore procs that are already ranked */
398-
if (ORTE_VPID_INVALID != proc->name.vpid) {
399-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
400-
"mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d",
401-
ORTE_NAME_PRINT(&proc->name), num_ranked);
402-
continue;
403-
}
404-
/* ignore procs from other apps */
405-
if (proc->app_idx != app->idx) {
406-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
407-
"mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d",
408-
ORTE_NAME_PRINT(&proc->name), num_ranked);
409-
continue;
410-
}
382+
for (j=0; j < node->procs->size && cnt < app->num_procs; j++) {
383+
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
384+
continue;
385+
}
386+
/* ignore procs from other jobs */
387+
if (proc->name.jobid != jdata->jobid) {
388+
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
389+
"mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d",
390+
ORTE_NAME_PRINT(&proc->name), num_ranked);
391+
continue;
392+
}
393+
/* ignore procs that are already ranked */
394+
if (ORTE_VPID_INVALID != proc->name.vpid) {
395+
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
396+
"mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d",
397+
ORTE_NAME_PRINT(&proc->name), num_ranked);
398+
continue;
399+
}
400+
/* ignore procs from other apps */
401+
if (proc->app_idx != app->idx) {
402+
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
403+
"mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d",
404+
ORTE_NAME_PRINT(&proc->name), num_ranked);
405+
continue;
406+
}
407+
/* cycle across the objects */
408+
for (i=0; i < num_objs && cnt < app->num_procs && all_done; i++) {
409+
obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i);
411410
/* protect against bozo case */
412411
locale = NULL;
413412
if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
@@ -429,7 +428,8 @@ static int rank_by(orte_job_t *jdata,
429428
}
430429
cnt++;
431430
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
432-
"mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid));
431+
"mca:rmaps:rank_by: proc in position %d is on object %d assigned rank %s",
432+
j, i, ORTE_VPID_PRINT(proc->name.vpid));
433433
/* insert the proc into the jdata array */
434434
if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
435435
OBJ_RELEASE(pptr);
@@ -440,7 +440,8 @@ static int rank_by(orte_job_t *jdata,
440440
OBJ_DESTRUCT(&objs);
441441
return rc;
442442
}
443-
/* flag that one was mapped */
443+
num_ranked++;
444+
/* flag that one was mapped */
444445
all_done = false;
445446
/* track where the highest vpid landed - this is our
446447
* new bookmark

0 commit comments

Comments
 (0)