Skip to content

Commit 7a83fdb

Browse files
author
Ralph Castain
committed
Update to hwloc 2.0.0a with shmem support.
Update to support passing of HWLOC shmem topology to client procs Update use of distance API per @bgoglin Have the openib component lookup its object in the distance matrix Bring usnic up-to-date Restore binding for hwloc2 Signed-off-by: Ralph Castain <[email protected]>
1 parent 6fe5b36 commit 7a83fdb

File tree

148 files changed

+23783
-19364
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

148 files changed

+23783
-19364
lines changed

.gitignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,9 +302,7 @@ opal/mca/event/libevent*/libevent/libevent_pthreads.pc
302302
opal/mca/event/libevent*/libevent/include/event2/event-config.h
303303

304304
opal/mca/hwloc/hwloc*/hwloc/include/hwloc/autogen/config.h
305-
opal/mca/hwloc/hwloc*/hwloc/include/hwloc/autogen/config.h.in
306305
opal/mca/hwloc/hwloc*/hwloc/include/private/autogen/config.h
307-
opal/mca/hwloc/hwloc*/hwloc/include/private/autogen/config.h.in
308306
opal/mca/hwloc/base/static-components.h.new.extern
309307
opal/mca/hwloc/base/static-components.h.new.struct
310308

@@ -361,6 +359,7 @@ orte/test/mpi/accept
361359
orte/test/mpi/attach
362360
orte/test/mpi/bad_exit
363361
orte/test/mpi/bcast_loop
362+
orte/test/mpi/binding
364363
orte/test/mpi/concurrent_spawn
365364
orte/test/mpi/connect
366365
orte/test/mpi/crisscross

ompi/mca/osc/rdma/osc_rdma_active_target.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* Copyright (c) 2017 The University of Tennessee and The University
1717
* of Tennessee Research Foundation. All rights
1818
* reserved.
19+
* Copyright (c) 2017 Intel, Inc. All rights reserved.
1920
* $COPYRIGHT$
2021
*
2122
* Additional copyrights may follow
@@ -242,10 +243,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
242243
return OMPI_SUCCESS;
243244
}
244245

245-
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
246-
return OMPI_ERR_OUT_OF_RESOURCE;
247-
}
248-
249246
/* translate group ranks into the communicator */
250247
peers = ompi_osc_rdma_get_peers (module, module->pw_group);
251248
if (OPAL_UNLIKELY(NULL == peers)) {
@@ -281,7 +278,7 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
281278
do {
282279
ompi_osc_rdma_lock_t result;
283280

284-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attempting to post to index %d @ rank %d", post_index, peer->rank);
281+
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attempting to post to index %d @ rank %d", (int)post_index, peer->rank);
285282

286283
/* try to post. if the value isn't 0 then another rank is occupying this index */
287284
if (!ompi_osc_rdma_peer_local_state (peer)) {

opal/mca/btl/openib/btl_openib_component.c

Lines changed: 118 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved.
1919
* Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
2020
* Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved
21-
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
21+
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
2222
* Copyright (c) 2014-2017 Research Organization for Information Science
2323
* and Technology (RIST). All rights reserved.
2424
* Copyright (c) 2014 Bull SAS. All rights reserved.
@@ -2330,32 +2330,41 @@ static float get_ib_dev_distance(struct ibv_device *dev)
23302330
/* If we don't have hwloc, we'll default to a distance of 0,
23312331
because we have no way of measuring. */
23322332
float distance = 0;
2333+
float a, b;
2334+
int i;
2335+
hwloc_cpuset_t my_cpuset = NULL, ibv_cpuset = NULL;
2336+
hwloc_obj_t my_obj, ibv_obj, node_obj;
2337+
struct hwloc_distances_s *hwloc_distances = NULL;
23332338

2334-
#if HWLOC_API_VERSION < 0x20000
23352339
/* Override any distance logic so all devices are used */
23362340
if (0 != mca_btl_openib_component.ignore_locality ||
23372341
OPAL_SUCCESS != opal_hwloc_base_get_topology()) {
23382342
return distance;
23392343
}
23402344

2341-
float a, b;
2342-
int i;
2343-
hwloc_cpuset_t my_cpuset = NULL, ibv_cpuset = NULL;
2344-
hwloc_obj_t my_obj, ibv_obj, node_obj;
2345-
2346-
/* Note that this struct is owned by hwloc; there's no need to
2347-
free it at the end of time */
2348-
static const struct hwloc_distances_s *hwloc_distances = NULL;
2345+
#if HWLOC_API_VERSION >= 0x20000
2346+
unsigned int j, distances_nr = 1;
2347+
int ibvindex, myindex;
2348+
#endif
23492349

23502350
if (NULL == hwloc_distances) {
2351-
hwloc_distances =
2352-
hwloc_get_whole_distance_matrix_by_type(opal_hwloc_topology,
2353-
HWLOC_OBJ_NODE);
2354-
}
2351+
#if HWLOC_API_VERSION < 0x20000
2352+
hwloc_distances =
2353+
hwloc_get_whole_distance_matrix_by_type(opal_hwloc_topology,
2354+
HWLOC_OBJ_NODE);
2355+
/* If we got no info, just return 0 */
2356+
if (NULL == hwloc_distances || NULL == hwloc_distances->latency) {
2357+
goto out;
2358+
}
23552359

2356-
/* If we got no info, just return 0 */
2357-
if (NULL == hwloc_distances || NULL == hwloc_distances->latency) {
2358-
goto out;
2360+
#else
2361+
if (0 != hwloc_distances_get_by_type(opal_hwloc_topology, HWLOC_OBJ_NODE,
2362+
&distances_nr, &hwloc_distances,
2363+
HWLOC_DISTANCES_KIND_MEANS_LATENCY, 0) || 0 == distances_nr) {
2364+
hwloc_distances = NULL;
2365+
goto out;
2366+
}
2367+
#endif
23592368
}
23602369

23612370
/* Next, find the NUMA node where this IBV device is located */
@@ -2373,16 +2382,31 @@ static float get_ib_dev_distance(struct ibv_device *dev)
23732382

23742383
opal_output_verbose(5, opal_btl_base_framework.framework_output,
23752384
"hwloc_distances->nbobjs=%d", hwloc_distances->nbobjs);
2385+
#if HWLOC_API_VERSION < 0x20000
23762386
for (i = 0; i < (int)(2 * hwloc_distances->nbobjs); i++) {
23772387
opal_output_verbose(5, opal_btl_base_framework.framework_output,
23782388
"hwloc_distances->latency[%d]=%f", i, hwloc_distances->latency[i]);
23792389
}
2390+
#else
2391+
for (i = 0; i < (int)hwloc_distances->nbobjs; i++) {
2392+
opal_output_verbose(5, opal_btl_base_framework.framework_output,
2393+
"hwloc_distances->values[%d]=%"PRIu64, i, hwloc_distances->values[i]);
2394+
}
2395+
#endif
23802396

23812397
/* If ibv_obj is a NUMA node or below, we're good. */
23822398
switch (ibv_obj->type) {
23832399
case HWLOC_OBJ_NODE:
23842400
case HWLOC_OBJ_SOCKET:
2401+
#if HWLOC_API_VERSION < 0x20000
23852402
case HWLOC_OBJ_CACHE:
2403+
#else
2404+
case HWLOC_OBJ_L1CACHE:
2405+
case HWLOC_OBJ_L2CACHE:
2406+
case HWLOC_OBJ_L3CACHE:
2407+
case HWLOC_OBJ_L4CACHE:
2408+
case HWLOC_OBJ_L5CACHE:
2409+
#endif
23862410
case HWLOC_OBJ_CORE:
23872411
case HWLOC_OBJ_PU:
23882412
while (NULL != ibv_obj && ibv_obj->type != HWLOC_OBJ_NODE) {
@@ -2402,6 +2426,22 @@ static float get_ib_dev_distance(struct ibv_device *dev)
24022426
if (NULL == ibv_obj) {
24032427
goto out;
24042428
}
2429+
#if HWLOC_API_VERSION >= 0x20000
2430+
/* the new matrix format isn't quite as friendly, so we have to
2431+
* do an exhaustive search to find the index of this object
2432+
* in that array */
2433+
ibvindex = -1;
2434+
for (j=0; j < distances_nr; j++) {
2435+
if (ibv_obj == hwloc_distances->objs[j]) {
2436+
ibvindex = j;
2437+
break;
2438+
}
2439+
}
2440+
if (-1 == ibvindex) {
2441+
OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND);
2442+
goto out;
2443+
}
2444+
#endif
24052445

24062446
opal_output_verbose(5, opal_btl_base_framework.framework_output,
24072447
"ibv_obj->logical_index=%d", ibv_obj->logical_index);
@@ -2424,7 +2464,15 @@ static float get_ib_dev_distance(struct ibv_device *dev)
24242464
switch (my_obj->type) {
24252465
case HWLOC_OBJ_NODE:
24262466
case HWLOC_OBJ_SOCKET:
2427-
case HWLOC_OBJ_CACHE:
2467+
#if HWLOC_API_VERSION < 0x20000
2468+
case HWLOC_OBJ_CACHE:
2469+
#else
2470+
case HWLOC_OBJ_L1CACHE:
2471+
case HWLOC_OBJ_L2CACHE:
2472+
case HWLOC_OBJ_L3CACHE:
2473+
case HWLOC_OBJ_L4CACHE:
2474+
case HWLOC_OBJ_L5CACHE:
2475+
#endif
24282476
case HWLOC_OBJ_CORE:
24292477
case HWLOC_OBJ_PU:
24302478
while (NULL != my_obj && my_obj->type != HWLOC_OBJ_NODE) {
@@ -2435,12 +2483,31 @@ static float get_ib_dev_distance(struct ibv_device *dev)
24352483
"my_obj->logical_index=%d", my_obj->logical_index);
24362484
/* Distance may be asymetrical, so calculate both of them
24372485
and take the max */
2438-
a = hwloc_distances->latency[my_obj->logical_index +
2439-
(ibv_obj->logical_index *
2440-
hwloc_distances->nbobjs)];
2441-
b = hwloc_distances->latency[ibv_obj->logical_index +
2442-
(my_obj->logical_index *
2443-
hwloc_distances->nbobjs)];
2486+
#if HWLOC_API_VERSION < 0x20000
2487+
a = hwloc_distances->latency[my_obj->logical_index +
2488+
(ibv_obj->logical_index *
2489+
hwloc_distances->nbobjs)];
2490+
b = hwloc_distances->latency[ibv_obj->logical_index +
2491+
(my_obj->logical_index *
2492+
hwloc_distances->nbobjs)];
2493+
#else
2494+
/* the new matrix format isn't quite as friendly, so we have to
2495+
* do an exhaustive search to find the index of this object
2496+
* in that array */
2497+
myindex = -1;
2498+
for (j=0; j < distances_nr; j++) {
2499+
if (my_obj == hwloc_distances->objs[j]) {
2500+
myindex = j;
2501+
break;
2502+
}
2503+
}
2504+
if (-1 == myindex) {
2505+
OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND);
2506+
goto out;
2507+
}
2508+
a = (float)hwloc_distances->values[myindex + (ibvindex * hwloc_distances->nbobjs)];
2509+
b = (float)hwloc_distances->values[ibvindex + (myindex * hwloc_distances->nbobjs)];
2510+
#endif
24442511
distance = (a > b) ? a : b;
24452512
}
24462513
break;
@@ -2456,13 +2523,28 @@ static float get_ib_dev_distance(struct ibv_device *dev)
24562523
node_obj = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
24572524
ibv_obj->cpuset,
24582525
HWLOC_OBJ_NODE, ++i)) {
2459-
2460-
a = hwloc_distances->latency[node_obj->logical_index +
2461-
(ibv_obj->logical_index *
2462-
hwloc_distances->nbobjs)];
2463-
b = hwloc_distances->latency[ibv_obj->logical_index +
2464-
(node_obj->logical_index *
2465-
hwloc_distances->nbobjs)];
2526+
#if HWLOC_API_VERSION < 0x20000
2527+
a = hwloc_distances->latency[node_obj->logical_index +
2528+
(ibv_obj->logical_index *
2529+
hwloc_distances->nbobjs)];
2530+
b = hwloc_distances->latency[ibv_obj->logical_index +
2531+
(node_obj->logical_index *
2532+
hwloc_distances->nbobjs)];
2533+
#else
2534+
unsigned int j;
2535+
j = node_obj->logical_index + (ibv_obj->logical_index * hwloc_distances->nbobjs);
2536+
if (j < distances_nr) {
2537+
a = (float)hwloc_distances->values[j];
2538+
} else {
2539+
goto out;
2540+
}
2541+
j = ibv_obj->logical_index + (node_obj->logical_index * hwloc_distances->nbobjs);
2542+
if (j < distances_nr) {
2543+
b = (float)hwloc_distances->values[j];
2544+
} else {
2545+
goto out;
2546+
}
2547+
#endif
24662548
a = (a > b) ? a : b;
24672549
distance = (a > distance) ? a : distance;
24682550
}
@@ -2476,10 +2558,12 @@ static float get_ib_dev_distance(struct ibv_device *dev)
24762558
if (NULL != my_cpuset) {
24772559
hwloc_bitmap_free(my_cpuset);
24782560
}
2479-
#else
2480-
#warning FIXME get_ib_dev_distance is not implemented with hwloc v2
2481-
#endif
24822561

2562+
#if HWLOC_API_VERSION < 0x20000
2563+
if (NULL != hwloc_distances) {
2564+
hwloc_distances_release(opal_hwloc_topology, hwloc_distances);
2565+
}
2566+
#endif
24832567
return distance;
24842568
}
24852569

opal/mca/btl/openib/btl_openib_proc.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2006-2007 Voltaire All rights reserved.
15-
* Copyright (c) 2014 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2015-2016 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
1818
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
@@ -77,8 +77,6 @@ void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* ib_proc)
7777

7878
void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc)
7979
{
80-
mca_btl_openib_proc_btlptr_t* elem;
81-
8280
/* release resources */
8381
if(NULL != ib_proc->proc_endpoints) {
8482
free(ib_proc->proc_endpoints);

opal/mca/btl/usnic/btl_usnic_hwloc.c

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved.
3-
* Copyright (c) 2016 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
44
* $COPYRIGHT$
55
*
66
* Additional copyrights may follow
@@ -26,22 +26,34 @@
2626
*/
2727
static hwloc_obj_t my_numa_node = NULL;
2828
static int num_numa_nodes = 0;
29-
static const struct hwloc_distances_s *matrix = NULL;
29+
static struct hwloc_distances_s *matrix = NULL;
30+
#if HWLOC_API_VERSION >= 0x20000
31+
static unsigned int matrix_nr = 1;
32+
#endif
3033

3134
/*
3235
* Get the hwloc distance matrix (if we don't already have it).
33-
*
34-
* Note that the matrix data structure belongs to hwloc; we are not
35-
* responsibile for freeing it.
3636
*/
3737
static int get_distance_matrix(void)
3838
{
39+
#if HWLOC_API_VERSION < 0x20000
40+
/* Note that the matrix data structure belongs to hwloc; we are not
41+
* responsible for freeing it. */
42+
3943
if (NULL == matrix) {
4044
matrix = hwloc_get_whole_distance_matrix_by_type(opal_hwloc_topology,
4145
HWLOC_OBJ_NODE);
4246
}
4347

4448
return (NULL == matrix) ? OPAL_ERROR : OPAL_SUCCESS;
49+
#else
50+
if (0 != hwloc_distances_get_by_type(opal_hwloc_topology, HWLOC_OBJ_NODE,
51+
&matrix_nr, &matrix,
52+
HWLOC_DISTANCES_KIND_MEANS_LATENCY, 0) || 0 == matrix_nr) {
53+
return OPAL_ERROR;
54+
}
55+
return OPAL_SUCCESS;
56+
#endif
4557
}
4658

4759
/*
@@ -219,6 +231,7 @@ int opal_btl_usnic_hwloc_distance(opal_btl_usnic_module_t *module)
219231

220232
/* Lookup the distance between my NUMA node and the NUMA node of
221233
the device */
234+
#if HWLOC_API_VERSION < 0x20000
222235
if (NULL != dev_numa) {
223236
module->numa_distance =
224237
matrix->latency[dev_numa->logical_index * num_numa_nodes +
@@ -229,6 +242,40 @@ int opal_btl_usnic_hwloc_distance(opal_btl_usnic_module_t *module)
229242
module->linux_device_name,
230243
module->numa_distance);
231244
}
245+
#else
246+
if (NULL != dev_numa) {
247+
int myindex, devindex;
248+
unsigned int j;
249+
myindex = -1;
250+
for (j=0; j < matrix_nr; j++) {
251+
if (my_numa_node == matrix->objs[j]) {
252+
myindex = j;
253+
break;
254+
}
255+
}
256+
if (-1 == myindex) {
257+
return OPAL_SUCCESS;
258+
}
259+
devindex = -1;
260+
for (j=0; j < matrix_nr; j++) {
261+
if (dev_numa == matrix->objs[j]) {
262+
devindex = j;
263+
break;
264+
}
265+
}
266+
if (-1 == devindex) {
267+
return OPAL_SUCCESS;
268+
}
269+
270+
module->numa_distance =
271+
matrix->values[(devindex * num_numa_nodes) + myindex];
272+
273+
opal_output_verbose(5, USNIC_OUT,
274+
"btl:usnic:filter_numa: %s is distance %d from me",
275+
module->linux_device_name,
276+
module->numa_distance);
277+
}
278+
#endif
232279

233280
return OPAL_SUCCESS;
234281
}

0 commit comments

Comments
 (0)