Skip to content

Commit 71f240f

Browse files
committed
btl/openib: fix issue 6785
Commit d7053a3 broke things for the case when Open MPI 4.0.x is built without UCX support. Problem was it was trying to partially initialize the btl to try and delay printing of a help message till wireup. Well this sort of doesn't work in all cases. Rather than keep piling on changes to support a help message for a BTL that we are deprecating, take a keep it simple stupid approach. So, revert most of d7053a3 and instead put the help message back in the original location, during scan of ports of the available HCAs to check for whether or not link layer for that port is configured for ethernet or infiniband. If Open MPI was built with UCX support, don't emit the help message, if UCX was not linked in, emit the help message. Verified on a system with connectX5 HCAs configured with two ports configured for ethernet and two for infiniband. relates to #6785 Signed-off-by: Howard Pritchard <[email protected]>
1 parent 2df46ac commit 71f240f

File tree

4 files changed

+103
-131
lines changed

4 files changed

+103
-131
lines changed

config/ompi_check_ucx.m4

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,11 @@ AC_DEFUN([OMPI_CHECK_UCX],[
135135
[$1_CPPFLAGS="[$]$1_CPPFLAGS $ompi_check_ucx_CPPFLAGS"
136136
$1_LDFLAGS="[$]$1_LDFLAGS $ompi_check_ucx_LDFLAGS"
137137
$1_LIBS="[$]$1_LIBS $ompi_check_ucx_LIBS"
138+
AC_DEFINE([HAVE_UCX], [1], [have ucx])
138139
$2],
139140
[AS_IF([test ! -z "$with_ucx" && test "$with_ucx" != "no"],
140141
[AC_MSG_ERROR([UCX support requested but not found. Aborting])])
142+
AC_DEFINE([HAVE_UCX], [0], [have ucx])
141143
$3])
142144

143145
OPAL_VAR_SCOPE_POP

opal/mca/btl/openib/btl_openib.c

Lines changed: 64 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* Copyright (c) 2014-2018 Research Organization for Information Science
2323
* and Technology (RIST). All rights reserved.
2424
* Copyright (c) 2014 Bull SAS. All rights reserved
25+
* Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved.
2526
* $COPYRIGHT$
2627
*
2728
* Additional copyrights may follow
@@ -1040,15 +1041,6 @@ int mca_btl_openib_add_procs(
10401041
int btl_rank = 0;
10411042
volatile mca_btl_base_endpoint_t* endpoint;
10421043

1043-
1044-
if (! openib_btl->allowed) {
1045-
opal_bitmap_clear_all_bits(reachable);
1046-
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
1047-
true, opal_process_info.nodename,
1048-
openib_btl->device_name, openib_btl->port_num);
1049-
return OPAL_SUCCESS;
1050-
}
1051-
10521044
btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt);
10531045
if( 0 > btl_rank ){
10541046
return OPAL_ERR_NOT_FOUND;
@@ -1648,81 +1640,80 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
16481640
return OPAL_SUCCESS;
16491641
}
16501642

1651-
if (openib_btl->allowed) {
1652-
/* Release all QPs */
1653-
if (NULL != openib_btl->device->endpoints) {
1654-
for (ep_index=0;
1655-
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
1656-
ep_index++) {
1657-
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
1643+
/* Release all QPs */
1644+
if (NULL != openib_btl->device->endpoints) {
1645+
for (ep_index=0;
1646+
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
1647+
ep_index++) {
1648+
1649+
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
16581650
ep_index);
1659-
if(!endpoint) {
1660-
BTL_VERBOSE(("In finalize, got another null endpoint"));
1661-
continue;
1662-
}
1663-
if(endpoint->endpoint_btl != openib_btl) {
1664-
continue;
1665-
}
1666-
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
1667-
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
1668-
openib_btl->device->eager_rdma_buffers[i] = NULL;
1669-
OBJ_RELEASE(endpoint);
1670-
}
1651+
if(!endpoint) {
1652+
BTL_VERBOSE(("In finalize, got another null endpoint"));
1653+
continue;
1654+
}
1655+
if(endpoint->endpoint_btl != openib_btl) {
1656+
continue;
1657+
}
1658+
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
1659+
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
1660+
openib_btl->device->eager_rdma_buffers[i] = NULL;
1661+
OBJ_RELEASE(endpoint);
16711662
}
1672-
opal_pointer_array_set_item(openib_btl->device->endpoints,
1673-
ep_index, NULL);
1674-
assert(((opal_object_t*)endpoint)->obj_reference_count == 1);
1675-
OBJ_RELEASE(endpoint);
16761663
}
1664+
opal_pointer_array_set_item(openib_btl->device->endpoints,
1665+
ep_index, NULL);
1666+
assert(((opal_object_t*)endpoint)->obj_reference_count == 1);
1667+
OBJ_RELEASE(endpoint);
16771668
}
1669+
}
16781670

1679-
/* Release SRQ resources */
1680-
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
1681-
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {
1682-
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
1683-
&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
1684-
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
1685-
&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
1686-
if (NULL != openib_btl->qps[qp].u.srq_qp.srq) {
1687-
opal_mutex_t *lock =
1688-
&mca_btl_openib_component.srq_manager.lock;
1689-
1690-
opal_hash_table_t *srq_addr_table =
1691-
&mca_btl_openib_component.srq_manager.srq_addr_table;
1692-
1693-
opal_mutex_lock(lock);
1694-
if (OPAL_SUCCESS !=
1695-
opal_hash_table_remove_value_ptr(srq_addr_table,
1696-
&openib_btl->qps[qp].u.srq_qp.srq,
1697-
sizeof(struct ibv_srq *))) {
1698-
BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp));
1699-
rc = OPAL_ERROR;
1700-
}
1701-
opal_mutex_unlock(lock);
1702-
if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) {
1703-
BTL_VERBOSE(("Failed to close SRQ %d", qp));
1704-
rc = OPAL_ERROR;
1705-
}
1706-
}
1671+
/* Release SRQ resources */
1672+
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
1673+
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {
1674+
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
1675+
&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
1676+
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
1677+
&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
1678+
if (NULL != openib_btl->qps[qp].u.srq_qp.srq) {
1679+
opal_mutex_t *lock =
1680+
&mca_btl_openib_component.srq_manager.lock;
17071681

1708-
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
1709-
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
1710-
}
1711-
}
1682+
opal_hash_table_t *srq_addr_table =
1683+
&mca_btl_openib_component.srq_manager.srq_addr_table;
17121684

1713-
/* Finalize the CPC modules on this openib module */
1714-
for (i = 0; i < openib_btl->num_cpcs; ++i) {
1715-
if (NULL != openib_btl->cpcs[i]->cbm_finalize) {
1716-
openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]);
1685+
opal_mutex_lock(lock);
1686+
if (OPAL_SUCCESS !=
1687+
opal_hash_table_remove_value_ptr(srq_addr_table,
1688+
&openib_btl->qps[qp].u.srq_qp.srq,
1689+
sizeof(struct ibv_srq *))) {
1690+
BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp));
1691+
rc = OPAL_ERROR;
1692+
}
1693+
opal_mutex_unlock(lock);
1694+
if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) {
1695+
BTL_VERBOSE(("Failed to close SRQ %d", qp));
1696+
rc = OPAL_ERROR;
1697+
}
17171698
}
1718-
free(openib_btl->cpcs[i]);
1699+
1700+
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
1701+
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
17191702
}
1720-
free(openib_btl->cpcs);
1703+
}
17211704

1722-
/* Release device if there are no more users */
1723-
if(!(--openib_btl->device->allowed_btls)) {
1724-
OBJ_RELEASE(openib_btl->device);
1705+
/* Finalize the CPC modules on this openib module */
1706+
for (i = 0; i < openib_btl->num_cpcs; ++i) {
1707+
if (NULL != openib_btl->cpcs[i]->cbm_finalize) {
1708+
openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]);
17251709
}
1710+
free(openib_btl->cpcs[i]);
1711+
}
1712+
free(openib_btl->cpcs);
1713+
1714+
/* Release device if there are no more users */
1715+
if(!(--openib_btl->device->allowed_btls)) {
1716+
OBJ_RELEASE(openib_btl->device);
17261717
}
17271718

17281719
if (NULL != openib_btl->qps) {

opal/mca/btl/openib/btl_openib.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
* Copyright (c) 2014 Bull SAS. All rights reserved.
2121
* Copyright (c) 2015-2018 Research Organization for Information Science
2222
* and Technology (RIST). All rights reserved.
23+
* Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved.
24+
*
2325
* $COPYRIGHT$
2426
*
2527
* Additional copyrights may follow
@@ -506,8 +508,6 @@ struct mca_btl_openib_module_t {
506508
int local_procs; /** number of local procs */
507509

508510
bool atomic_ops_be; /** atomic result is big endian */
509-
510-
bool allowed; /** is this port allowed */
511511
};
512512
typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
513513

opal/mca/btl/openib/btl_openib_component.c

Lines changed: 35 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* Copyright (c) 2014-2018 Research Organization for Information Science
2323
* and Technology (RIST). All rights reserved.
2424
* Copyright (c) 2014 Bull SAS. All rights reserved.
25+
* Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved.
2526
* $COPYRIGHT$
2627
*
2728
* Additional copyrights may follow
@@ -278,9 +279,6 @@ static int btl_openib_modex_send(void)
278279
);
279280
/* For each module, add in the size of the per-CPC data */
280281
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
281-
if (! mca_btl_openib_component.openib_btls[i]->allowed) {
282-
continue;
283-
}
284282
for (j = 0;
285283
j < mca_btl_openib_component.openib_btls[i]->num_cpcs;
286284
++j) {
@@ -309,9 +307,6 @@ static int btl_openib_modex_send(void)
309307
/* Pack each of the modules */
310308
for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
311309

312-
if (! mca_btl_openib_component.openib_btls[i]->allowed) {
313-
continue;
314-
}
315310
/* Pack the modex common message struct. */
316311
size = modex_message_size;
317312

@@ -633,38 +628,26 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
633628
* unless the user specifically requested to override this
634629
* policy. For ancient OFED, only allow if user has set
635630
* the MCA parameter.
631+
*
632+
* We emit a help message if Open MPI was configured without
633+
* UCX support if the port is configured to use infiniband for link
634+
* layer. If UCX support is available, don't emit help message
635+
* since UCX PML has higher priority than OB1 and this BTL will
636+
* not be used.
636637
*/
637-
if (! mca_btl_openib_component.allow_ib
638+
if (false == mca_btl_openib_component.allow_ib
638639
#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET
639640
&& IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer
640641
#endif
641642
) {
642-
openib_btl = (mca_btl_openib_module_t *) calloc(1, sizeof(mca_btl_openib_module_t));
643-
if(NULL == openib_btl) {
644-
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
645-
return OPAL_ERR_OUT_OF_RESOURCE;
646-
}
647-
memcpy(openib_btl, &mca_btl_openib_module,
648-
sizeof(mca_btl_openib_module));
649-
ib_selected = OBJ_NEW(mca_btl_base_selected_module_t);
650-
ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl;
651-
openib_btl->port_num = (uint8_t) port_num;
652-
openib_btl->allowed = false;
653-
openib_btl->device = NULL;
654-
openib_btl->device_name = strdup(ibv_get_device_name(device->ib_dev));
655-
OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t);
656-
opal_list_append(btl_list, (opal_list_item_t*) ib_selected);
657-
opal_pointer_array_add(device->device_btls, (void*) openib_btl);
658-
++device->btls;
659-
++mca_btl_openib_component.ib_num_btls;
660-
if (-1 != mca_btl_openib_component.ib_max_btls &&
661-
mca_btl_openib_component.ib_num_btls >=
662-
mca_btl_openib_component.ib_max_btls) {
663-
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
664-
}
665-
return OPAL_SUCCESS;
666-
}
667-
643+
#if !HAVE_UCX
644+
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
645+
true, opal_process_info.nodename,
646+
ibv_get_device_name(device->ib_dev),
647+
port_num);
648+
#endif
649+
return OPAL_ERR_NOT_FOUND;
650+
}
668651

669652
/* Ensure that the requested GID index (via the
670653
btl_openib_gid_index MCA param) is within the GID table
@@ -901,8 +884,6 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
901884
}
902885
}
903886

904-
openib_btl->allowed = true;
905-
906887
opal_list_append(btl_list, (opal_list_item_t*) ib_selected);
907888
opal_pointer_array_add(device->device_btls, (void*) openib_btl);
908889
++device->btls;
@@ -2999,29 +2980,27 @@ btl_openib_component_init(int *num_btl_modules,
29992980
ib_selected = (mca_btl_base_selected_module_t*)item;
30002981
openib_btl = (mca_btl_openib_module_t*)ib_selected->btl_module;
30012982

3002-
if (openib_btl->allowed) {
3003-
/* Search for a CPC that can handle this port */
3004-
ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl);
3005-
/* If we get NOT_SUPPORTED, then no CPC was found for this
3006-
port. But that's not a fatal error -- just keep going;
3007-
let's see if we find any usable openib modules or not. */
3008-
if (OPAL_ERR_NOT_SUPPORTED == ret) {
3009-
continue;
3010-
} else if (OPAL_SUCCESS != ret) {
3011-
/* All others *are* fatal. Note that we already did a
3012-
show_help in the lower layer */
3013-
goto no_btls;
3014-
}
2983+
/* Search for a CPC that can handle this port */
2984+
ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl);
2985+
/* If we get NOT_SUPPORTED, then no CPC was found for this
2986+
port. But that's not a fatal error -- just keep going;
2987+
let's see if we find any usable openib modules or not. */
2988+
if (OPAL_ERR_NOT_SUPPORTED == ret) {
2989+
continue;
2990+
} else if (OPAL_SUCCESS != ret) {
2991+
/* All others *are* fatal. Note that we already did a
2992+
show_help in the lower layer */
2993+
goto no_btls;
2994+
}
30152995

3016-
if (mca_btl_openib_component.max_hw_msg_size > 0 &&
3017-
(uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) {
3018-
BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")",
3019-
mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz));
3020-
}
2996+
if (mca_btl_openib_component.max_hw_msg_size > 0 &&
2997+
(uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) {
2998+
BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")",
2999+
mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz));
3000+
}
30213001

3022-
if (finish_btl_init(openib_btl) != OPAL_SUCCESS) {
3023-
goto no_btls;
3024-
}
3002+
if (finish_btl_init(openib_btl) != OPAL_SUCCESS) {
3003+
goto no_btls;
30253004
}
30263005

30273006
mca_btl_openib_component.openib_btls[i] = openib_btl;

0 commit comments

Comments
 (0)