Skip to content

Commit 440eae9

Browse files
author
Ralph Castain
committed
Correct the binding algorithm to decouple it from oversubscribe.
Oversubscribe stipulates that we allow more procs on the node than assigned slots - it has nothing to do with the number of available pe's. Let overload directives handle the pe situation.
1 parent 3adff9d commit 440eae9

File tree

1 file changed

+10
-27
lines changed

1 file changed

+10
-27
lines changed

orte/mca/rmaps/base/rmaps_base_map_job.c

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -186,48 +186,31 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
186186
* already (e.g., during the call to comm_spawn), then we don't
187187
* override it */
188188
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
189-
/* if the user specified a default binding policy via
190-
* MCA param, then we use it */
191189
if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
190+
/* if the user specified a default binding policy via
191+
* MCA param, then we use it - this can include a directive
192+
* to overload */
192193
jdata->map->binding = opal_hwloc_binding_policy;
193194
} else {
194-
orte_mapping_policy_t mpol;
195-
mpol = ORTE_GET_MAPPING_POLICY(jdata->map->mapping);
196-
/* if the user specified that we allow oversubscription, then do not bind.
197-
* otherwise, if the user explicitly mapped-by some object, then we default
195+
/* if the user explicitly mapped-by some object, then we default
198196
* to binding to that object */
199-
if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
200-
!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
201-
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
202-
} else if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping) &&
203-
ORTE_MAPPING_BYBOARD < mpol && mpol < ORTE_MAPPING_BYSLOT) {
197+
orte_mapping_policy_t mpol;
198+
mpol = ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping);
199+
if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping) &&
200+
ORTE_MAPPING_BYBOARD < mpol && mpol < ORTE_MAPPING_BYSLOT) {
204201
if (ORTE_MAPPING_BYHWTHREAD == mpol) {
205-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
206-
"mca:rmaps[%d] binding not given - using byhwthread", __LINE__);
207202
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_HWTHREAD);
208203
} else if (ORTE_MAPPING_BYCORE == mpol) {
209-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
210-
"mca:rmaps[%d] binding not given - using bycore", __LINE__);
211204
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_CORE);
212205
} else if (ORTE_MAPPING_BYL1CACHE == mpol) {
213-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
214-
"mca:rmaps[%d] binding not given - using byl1cache", __LINE__);
215206
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_L1CACHE);
216207
} else if (ORTE_MAPPING_BYL2CACHE == mpol) {
217-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
218-
"mca:rmaps[%d] binding not given - using byl2cache", __LINE__);
219208
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_L2CACHE);
220209
} else if (ORTE_MAPPING_BYL3CACHE == mpol) {
221-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
222-
"mca:rmaps[%d] binding not given - using byl3cache", __LINE__);
223210
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_L3CACHE);
224211
} else if (ORTE_MAPPING_BYSOCKET == mpol) {
225-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
226-
"mca:rmaps[%d] binding not given - using bysocket", __LINE__);
227212
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_SOCKET);
228213
} else if (ORTE_MAPPING_BYNUMA == mpol) {
229-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
230-
"mca:rmaps[%d] binding not given - using bynuma", __LINE__);
231214
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NUMA);
232215
}
233216
} else if (nprocs <= 2) {
@@ -242,12 +225,12 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
242225
} else {
243226
if (opal_hwloc_use_hwthreads_as_cpus) {
244227
/* if we are using hwthread cpus, then bind to those */
245-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
228+
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
246229
"mca:rmaps[%d] binding not given - using byhwthread", __LINE__);
247230
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_HWTHREAD);
248231
} else {
249232
/* for performance, bind to core */
250-
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
233+
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
251234
"mca:rmaps[%d] binding not given - using bycore", __LINE__);
252235
OPAL_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_CORE);
253236
}

0 commit comments

Comments
 (0)