Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit 92cc5cd

Browse files
committed
configury: fix XRC detection on OFED < 3.12
since ibv_create_xrc_rcv_qp is now deprecated, and in order to be "future-proof", we have to consider the case in which only XRC Domains are supported. also, correctly handle distro that ship broken ibverbs devel headers Thanks Paul Hargrove for the detailled report. (cherry picked from commit open-mpi/ompi@f7cf7d5)
1 parent 5298108 commit 92cc5cd

File tree

6 files changed

+46
-20
lines changed

6 files changed

+46
-20
lines changed

config/opal_check_openfabrics.m4

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -148,21 +148,42 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS],[
148148
AC_CHECK_MEMBERS([struct ibv_device.transport_type], [], [],
149149
[#include <infiniband/verbs.h>])
150150

151+
# We have to check functions both exits *and* are declared
152+
# since some distro ship broken ibverbs devel headers
153+
# IBV_DEVICE_XRC is common to all OFED versions
151154
# ibv_create_xrc_rcv_qp was added in OFED 1.3
152155
# ibv_cmd_open_xrcd (aka XRC Domains) was added in OFED 3.12
153156
if test "$enable_connectx_xrc" = "yes"; then
154-
$1_have_xrc=1
155-
AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp],
156-
[], [$1_have_xrc=0])
157-
AC_CHECK_DECLS([IBV_SRQT_XRC],
158-
[], [$1_have_xrc=0],
157+
AC_CHECK_DECLS([IBV_DEVICE_XRC],
158+
[$1_have_xrc=1
159+
$1_have_xrc_domains=1],
160+
[],
159161
[#include <infiniband/verbs.h>])
160162
fi
161163
if test "$enable_connectx_xrc" = "yes" \
162164
&& test $$1_have_xrc -eq 1; then
163-
AC_CHECK_FUNCS([ibv_cmd_open_xrcd], [$1_have_xrc_domains=1])
165+
AC_CHECK_DECLS([ibv_create_xrc_rcv_qp],
166+
[AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp],
167+
[], [$1_have_xrc=0])],
168+
[$1_have_xrc=0],
169+
[#include <infiniband/driver.h>])
170+
fi
171+
if test "$enable_connectx_xrc" = "yes" \
172+
&& test $$1_have_xrc_domains -eq 1; then
173+
AC_CHECK_DECLS([ibv_cmd_open_xrcd],
174+
[AC_CHECK_DECLS([IBV_SRQT_XRC],
175+
[AC_CHECK_FUNCS([ibv_cmd_open_xrcd],
176+
[], [$1_have_xrc_domains=0])],
177+
[$1_have_xrc_domains=0],
178+
[#include <infiniband/verbs.h>])],
179+
[$1_have_xrc_domains=0],
180+
[#include <infiniband/driver.h>])
181+
# XRC and XRC Domains should be considered as exclusive
182+
if test "$$1_have_xrc" -eq 1 && \
183+
test "$$1_have_xrc_domains" -eq 1; then
184+
$1_have_xrc=0
185+
fi
164186
fi
165-
166187

167188
if test "no" != "$enable_openib_dynamic_sl"; then
168189
# We need ib_types.h file, which is installed with opensm-devel

opal/mca/btl/openib/btl_openib.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@
5353

5454
BEGIN_C_DECLS
5555

56-
#define HAVE_XRC (1 == OPAL_HAVE_CONNECTX_XRC)
57-
#define ENABLE_DYNAMIC_SL (1 == OPAL_ENABLE_DYNAMIC_SL)
56+
#define HAVE_XRC (OPAL_HAVE_CONNECTX_XRC || OPAL_HAVE_CONNECTX_XRC_DOMAINS)
57+
#define ENABLE_DYNAMIC_SL OPAL_ENABLE_DYNAMIC_SL
5858

5959
#define MCA_BTL_IB_LEAVE_PINNED 1
6060
#define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll

opal/mca/btl/openib/btl_openib_async.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ static mca_btl_openib_endpoint_t * qp2endpoint(struct ibv_qp *qp, mca_btl_openib
122122
return NULL;
123123
}
124124

125-
#if HAVE_XRC && !OPAL_HAVE_CONNECTX_XRC_DOMAINS
125+
#if OPAL_HAVE_CONNECTX_XRC
126126
/* XRC recive QP to endpoint */
127127
static mca_btl_openib_endpoint_t * xrc_qp2endpoint(uint32_t qp_num, mca_btl_openib_device_t *device)
128128
{
@@ -352,24 +352,21 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
352352
}
353353

354354
event_type = event.event_type;
355-
#if HAVE_XRC
355+
#if OPAL_HAVE_CONNECTX_XRC
356356
/* is it XRC event ?*/
357-
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
358-
#else
359357
bool xrc_event = false;
360358
if (IBV_XRC_QP_EVENT_FLAG & event.event_type) {
361359
xrc_event = true;
362360
/* Clean the bitnd handel as usual */
363361
event_type ^= IBV_XRC_QP_EVENT_FLAG;
364362
}
365-
#endif
366363
#endif
367364
switch(event_type) {
368365
case IBV_EVENT_PATH_MIG:
369366
BTL_ERROR(("Alternative path migration event reported"));
370367
if (APM_ENABLED) {
371368
BTL_ERROR(("Trying to find additional path..."));
372-
#if HAVE_XRC && !OPAL_HAVE_CONNECTX_XRC_DOMAINS
369+
#if OPAL_HAVE_CONNECTX_XRC
373370
if (xrc_event)
374371
mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num,
375372
xrc_qp2endpoint(event.element.xrc_qp_num, device));
@@ -653,7 +650,7 @@ void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep)
653650
qp->qp_num, strerror(errno), errno));
654651
}
655652

656-
#if HAVE_XRC && ! OPAL_HAVE_CONNECTX_XRC_DOMAINS
653+
#if OPAL_HAVE_CONNECTX_XRC
657654
void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep)
658655
{
659656
struct ibv_qp_init_attr qp_init_attr;

opal/mca/btl/openib/btl_openib_async.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
int start_async_event_thread(void);
2020
void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep);
2121
int btl_openib_async_command_done(int exp);
22-
#if HAVE_XRC && ! OPAL_HAVE_CONNECTX_XRC_DOMAINS
22+
#if OPAL_HAVE_CONNECTX_XRC
2323
void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep);
2424
#endif
2525

opal/mca/btl/openib/btl_openib_atomic.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,12 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st
7373

7474
#if HAVE_XRC
7575
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
76-
frag->sr_desc.xrc_remote_srq_num=endpoint->rem_info.rem_srqs[qp].rem_srq_num;
76+
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
77+
frag->sr_desc.qp_type.xrc.remote_srqn = endpoint->rem_info.rem_srqs[qp].rem_srq_num;
78+
#else
79+
frag->sr_desc.xrc_remote_srq_num = endpoint->rem_info.rem_srqs[qp].rem_srq_num;
80+
#endif
81+
7782
}
7883
#endif
7984

opal/mca/btl/openib/btl_openib_put.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,13 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
103103

104104
#if HAVE_XRC
105105
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
106-
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
106+
107+
#if OPAL_HAVE_CONNECTX_XRC
108+
to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
109+
#elif OPAL_HAVE_CONNECTX_XRC_DOMAINS
107110
to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
108111
#else
109-
to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
112+
#error "that should never happen"
110113
#endif
111114
}
112115
#endif

0 commit comments

Comments
 (0)