Skip to content

Commit acb1626

Browse files
author
Ralph Castain
committed
Do not set a binding policy when we are overloading the default - in 1.10, we allow oversubscription by default for non-managed allocations, and we silently do-not-bind if the user has not requested a binding policy and we are overloaded.
However, we do enforce oversubscription limits if we are in managed allocations OR the user specified the #slots for a node (either in hostfile or via -host). So be sure to check that for all cases.
1 parent aeaa542 commit acb1626

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

orte/mca/rmaps/base/rmaps_base_binding.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ static void unbind_procs(orte_job_t *jdata)
119119
}
120120
}
121121
}
122-
122+
123123
static int bind_upwards(orte_job_t *jdata,
124124
orte_node_t *node,
125125
hwloc_obj_type_t target,
@@ -335,7 +335,6 @@ static int bind_downwards(orte_job_t *jdata,
335335
return ORTE_ERR_SILENT;
336336
} else {
337337
/* if we have the default binding policy, then just don't bind */
338-
OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE);
339338
unbind_procs(jdata);
340339
hwloc_bitmap_zero(totalcpuset);
341340
return ORTE_SUCCESS;
@@ -373,7 +372,7 @@ static int bind_downwards(orte_job_t *jdata,
373372
}
374373
}
375374
hwloc_bitmap_free(totalcpuset);
376-
375+
377376
return ORTE_SUCCESS;
378377
}
379378

orte/mca/rmaps/round_robin/rmaps_rr_mappers.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,24 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
386386
* properly set
387387
*/
388388
node->oversubscribed = true;
389+
/* check for permission */
390+
if (node->slots_given) {
391+
/* if we weren't given a directive either way, then we will error out
392+
* as the #slots were specifically given, either by the host RM or
393+
* via hostfile/dash-host */
394+
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
395+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
396+
true, app->num_procs, app->app);
397+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
398+
return ORTE_ERR_SILENT;
399+
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
400+
/* if we were explicitly told not to oversubscribe, then don't */
401+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
402+
true, app->num_procs, app->app);
403+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
404+
return ORTE_ERR_SILENT;
405+
}
406+
}
389407
}
390408
if (nprocs_mapped == app->num_procs) {
391409
/* we are done */

0 commit comments

Comments
 (0)