Skip to content

Commit acbf1cb

Browse files
author
rhc54
authored
Merge pull request #2646 from rhc54/topic/squeze
Begin to reduce reliance of application procs on the topology tree it…
2 parents 75be023 + 3a2d6a5 commit acbf1cb

File tree

7 files changed

+401
-158
lines changed

7 files changed

+401
-158
lines changed

opal/mca/btl/sm/btl_sm.c

Lines changed: 74 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
* All rights reserved.
1717
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
1818
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
19-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
19+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
2020
* Copyright (c) 2014-2015 Research Organization for Information Science
2121
* and Technology (RIST). All rights reserved.
2222
* Copyright (c) 2016 ARM, Inc. All rights reserved.
@@ -52,6 +52,7 @@
5252
#include "opal/util/show_help.h"
5353
#include "opal/util/printf.h"
5454
#include "opal/mca/hwloc/base/base.h"
55+
#include "opal/mca/pmix/pmix.h"
5556
#include "opal/mca/shmem/base/base.h"
5657
#include "opal/mca/shmem/shmem.h"
5758

@@ -223,23 +224,28 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
223224
int my_mem_node, num_mem_nodes, i, rc;
224225
mca_common_sm_mpool_resources_t *res = NULL;
225226
mca_btl_sm_component_t* m = &mca_btl_sm_component;
227+
char *loc, *mynuma;
228+
opal_process_name_t wildcard_rank;
226229

227230
/* Assume we don't have hwloc support and fill in dummy info */
228231
mca_btl_sm_component.mem_node = my_mem_node = 0;
229232
mca_btl_sm_component.num_mem_nodes = num_mem_nodes = 1;
230233

231-
/* If we have hwloc support, then get accurate information */
232-
if (NULL != opal_hwloc_topology) {
233-
i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology,
234-
HWLOC_OBJ_NODE, 0,
235-
OPAL_HWLOC_AVAILABLE);
236-
237-
/* If we find >0 NUMA nodes, then investigate further */
238-
if (i > 0) {
239-
int numa=0, w;
240-
unsigned n_bound=0;
241-
hwloc_cpuset_t avail;
242-
hwloc_obj_t obj;
234+
/* see if we were given a topology signature */
235+
wildcard_rank.jobid = OPAL_PROC_MY_NAME.jobid;
236+
wildcard_rank.vpid = OPAL_VPID_WILDCARD;
237+
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_TOPOLOGY_SIGNATURE,
238+
&wildcard_rank, &loc, OPAL_STRING);
239+
if (OPAL_SUCCESS == rc) {
240+
/* the number of NUMA nodes is right at the front */
241+
mca_btl_sm_component.num_mem_nodes = num_mem_nodes = strtoul(loc, NULL, 10);
242+
free(loc);
243+
} else {
244+
/* If we have hwloc support, then get accurate information */
245+
if (NULL != opal_hwloc_topology) {
246+
i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology,
247+
HWLOC_OBJ_NODE, 0,
248+
OPAL_HWLOC_AVAILABLE);
243249

244250
/* JMS This tells me how many numa nodes are *available*,
245251
but it's not how many are being used *by this job*.
@@ -248,33 +254,65 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
248254
should be improved to be how many NUMA nodes are being
249255
used *in this job*. */
250256
mca_btl_sm_component.num_mem_nodes = num_mem_nodes = i;
257+
}
258+
}
259+
/* see if we were given our location */
260+
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCALITY_STRING,
261+
&OPAL_PROC_MY_NAME, &loc, OPAL_STRING);
262+
if (OPAL_SUCCESS == rc) {
263+
if (NULL == loc) {
264+
mca_btl_sm_component.mem_node = my_mem_node = -1;
265+
} else {
266+
/* get our NUMA location */
267+
mynuma = opal_hwloc_base_get_location(loc, HWLOC_OBJ_NODE, 0);
268+
if (NULL == mynuma ||
269+
NULL != strchr(mynuma, ',') ||
270+
NULL != strchr(mynuma, '-')) {
271+
/* we either have no idea what NUMA we are on, or we
272+
* are on multiple NUMA nodes */
273+
mca_btl_sm_component.mem_node = my_mem_node = -1;
274+
} else {
275+
/* we are bound to a single NUMA node */
276+
my_mem_node = strtoul(mynuma, NULL, 10);
277+
mca_btl_sm_component.mem_node = my_mem_node;
278+
}
279+
if (NULL != mynuma) {
280+
free(mynuma);
281+
}
282+
free(loc);
283+
}
284+
} else {
285+
/* If we have hwloc support, then get accurate information */
286+
if (NULL != opal_hwloc_topology && num_mem_nodes > 0 &&
287+
NULL != opal_process_info.cpuset) {
288+
int numa=0, w;
289+
unsigned n_bound=0;
290+
hwloc_cpuset_t avail;
291+
hwloc_obj_t obj;
251292

252-
/* if we are not bound, then there is nothing further to do */
253-
if (NULL != opal_process_info.cpuset) {
254-
/* count the number of NUMA nodes to which we are bound */
255-
for (w=0; w < i; w++) {
256-
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology,
257-
HWLOC_OBJ_NODE, 0, w,
258-
OPAL_HWLOC_AVAILABLE))) {
259-
continue;
260-
}
261-
/* get that NUMA node's available cpus */
262-
avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
263-
/* see if we intersect */
264-
if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) {
265-
n_bound++;
266-
numa = w;
267-
}
293+
/* count the number of NUMA nodes to which we are bound */
294+
for (w=0; w < i; w++) {
295+
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology,
296+
HWLOC_OBJ_NODE, 0, w,
297+
OPAL_HWLOC_AVAILABLE))) {
298+
continue;
268299
}
269-
/* if we are located on more than one NUMA, or we didn't find
270-
* a NUMA we are on, then not much we can do
271-
*/
272-
if (1 == n_bound) {
273-
mca_btl_sm_component.mem_node = my_mem_node = numa;
274-
} else {
275-
mca_btl_sm_component.mem_node = my_mem_node = -1;
300+
/* get that NUMA node's available cpus */
301+
avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
302+
/* see if we intersect */
303+
if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) {
304+
n_bound++;
305+
numa = w;
276306
}
277307
}
308+
/* if we are located on more than one NUMA, or we didn't find
309+
* a NUMA we are on, then not much we can do
310+
*/
311+
if (1 == n_bound) {
312+
mca_btl_sm_component.mem_node = my_mem_node = numa;
313+
} else {
314+
mca_btl_sm_component.mem_node = my_mem_node = -1;
315+
}
278316
}
279317
}
280318

opal/mca/hwloc/base/base.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
3-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
44
* $COPYRIGHT$
55
*
66
* Additional copyrights may follow
@@ -276,6 +276,16 @@ OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo,
276276
OPAL_DECLSPEC char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo);
277277

278278

279+
/* get a string describing the locality of a given process */
280+
OPAL_DECLSPEC char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, char *bitmap);
281+
282+
/* extract a location from the locality string */
283+
OPAL_DECLSPEC char* opal_hwloc_base_get_location(char *locality,
284+
hwloc_obj_type_t type,
285+
unsigned index);
286+
287+
OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc2);
288+
279289
END_C_DECLS
280290

281291
#endif /* OPAL_HWLOC_BASE_H */

0 commit comments

Comments
 (0)