Skip to content

Commit 2c562b9

Browse files
committed
Merge pull request open-mpi#783 from hjelmn/v2.x_mlx5
btl/openib: add support for mlx5 atomic operations
2 parents 302a515 + 278712f commit 2c562b9

File tree

6 files changed

+99
-8
lines changed

6 files changed

+99
-8
lines changed

config/opal_check_openfabrics.m4

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,23 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS_CM],[
387387
fi
388388
])dnl
389389
390+
AC_DEFUN([OPAL_CHECK_EXP_VERBS],[
391+
OPAL_VAR_SCOPE_PUSH([have_struct_ibv_exp_send_wr])
392+
393+
AC_MSG_CHECKING([whether expanded verbs are available])
394+
AC_TRY_COMPILE([#include <infiniband/verbs_exp.h>], [struct ibv_exp_send_wr;],
395+
[have_struct_ibv_exp_send_wr=1
396+
AC_MSG_RESULT([yes])],
397+
[have_struct_ibv_exp_send_wr=0
398+
AC_MSG_RESULT([no])])
399+
400+
AC_DEFINE_UNQUOTED([HAVE_EXP_VERBS], [$have_struct_ibv_exp_send_wr], [Experimental verbs])
401+
AC_CHECK_DECLS([IBV_EXP_ATOMIC_HCA_REPLY_BE, IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY, ibv_exp_create_qp, ibv_exp_query_device],
402+
[], [], [#include <infiniband/verbs_exp.h>])
403+
AS_IF([test '$have_struct_ibv_exp_send_wr' = 1], [$1], [$2])
404+
OPAL_VAR_SCOPE_POP
405+
])dnl
406+
390407
AC_DEFUN([OPAL_CHECK_MLNX_OPENFABRICS],[
391408
$1_have_mverbs=0
392409
$1_have_mqe=0

opal/mca/btl/openib/btl_openib.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,9 @@ typedef struct mca_btl_openib_device_t {
371371
#endif
372372
opal_mutex_t device_lock; /* device level lock */
373373
struct ibv_context *ib_dev_context;
374+
#if HAVE_DECL_IBV_EXP_QUERY_DEVICE
375+
struct ibv_exp_device_attr ib_exp_dev_attr;
376+
#endif
374377
struct ibv_device_attr ib_dev_attr;
375378
struct ibv_pd *ib_pd;
376379
struct ibv_cq *ib_cq[2];
@@ -490,6 +493,8 @@ struct mca_btl_openib_module_t {
490493
mca_btl_openib_module_qp_t * qps;
491494

492495
int local_procs; /** number of local procs */
496+
497+
bool atomic_ops_be; /** atomic result is big endian */
493498
};
494499
typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
495500

opal/mca/btl/openib/btl_openib_atomic.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st
2727
{
2828
mca_btl_openib_get_frag_t* frag = NULL;
2929
int qp = order;
30+
int32_t rkey;
3031
int rc;
3132

3233
frag = to_get_frag(alloc_recv_user_frag());
@@ -61,15 +62,16 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st
6162
frag->sr_desc.wr.atomic.compare_add = operand;
6263
frag->sr_desc.wr.atomic.swap = operand2;
6364

65+
rkey = remote_handle->rkey;
66+
6467
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
6568
if((endpoint->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN)
6669
!= (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
67-
frag->sr_desc.wr.atomic.rkey = opal_swap_bytes4 (remote_handle->rkey);
68-
} else
69-
#endif
70-
{
71-
frag->sr_desc.wr.atomic.rkey = remote_handle->rkey;
70+
rkey = opal_swap_bytes4 (rkey);
7271
}
72+
#endif
73+
74+
frag->sr_desc.wr.atomic.rkey = rkey;
7375

7476
#if HAVE_XRC
7577
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {

opal/mca/btl/openib/btl_openib_component.c

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -822,13 +822,36 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
822822
openib_btl->super.btl_get_local_registration_threshold = 0;
823823

824824
#if HAVE_DECL_IBV_ATOMIC_HCA
825-
if (openib_btl->device->ib_dev_attr.atomic_cap == IBV_ATOMIC_NONE) {
825+
openib_btl->atomic_ops_be = false;
826+
827+
#if HAVE_DECL_IBV_EXP_QUERY_DEVICE
828+
/* check that 8-byte atomics are supported */
829+
if (!(device->ib_exp_dev_attr.ext_atom.log_atomic_arg_sizes & (1<<3ull))) {
826830
openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS;
827831
openib_btl->super.btl_atomic_flags = 0;
828832
openib_btl->super.btl_atomic_fop = NULL;
829833
openib_btl->super.btl_atomic_cswap = NULL;
830-
} else if (IBV_ATOMIC_GLOB == openib_btl->device->ib_dev_attr.atomic_cap) {
834+
}
835+
#endif
836+
837+
switch (openib_btl->device->ib_dev_attr.atomic_cap) {
838+
case IBV_ATOMIC_GLOB:
831839
openib_btl->super.btl_flags |= MCA_BTL_ATOMIC_SUPPORTS_GLOB;
840+
break;
841+
#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE
842+
case IBV_EXP_ATOMIC_HCA_REPLY_BE:
843+
openib_btl->atomic_ops_be = true;
844+
break;
845+
#endif
846+
case IBV_ATOMIC_HCA:
847+
break;
848+
case IBV_ATOMIC_NONE:
849+
default:
850+
/* no atomics or an unsupported atomic type */
851+
openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS;
852+
openib_btl->super.btl_atomic_flags = 0;
853+
openib_btl->super.btl_atomic_fop = NULL;
854+
openib_btl->super.btl_atomic_cswap = NULL;
832855
}
833856
#endif
834857

@@ -1626,7 +1649,13 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
16261649
ibv_get_device_name(device->ib_dev), strerror(errno)));
16271650
goto error;
16281651
}
1629-
1652+
#if HAVE_DECL_IBV_EXP_QUERY_DEVICE
1653+
if(ibv_exp_query_device(device->ib_dev_context, &device->ib_exp_dev_attr)){
1654+
BTL_ERROR(("error obtaining device attributes for %s errno says %s",
1655+
ibv_get_device_name(device->ib_dev), strerror(errno)));
1656+
goto error;
1657+
}
1658+
#endif
16301659
if(ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)){
16311660
BTL_ERROR(("error obtaining device attributes for %s errno says %s",
16321661
ibv_get_device_name(device->ib_dev), strerror(errno)));
@@ -3442,6 +3471,11 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
34423471

34433472
mca_btl_openib_get_frag_t *get_frag = to_get_frag(des);
34443473

3474+
/* check if atomic result needs to be byte swapped (mlx5) */
3475+
if (openib_btl->atomic_ops_be && IBV_WC_RDMA_READ != wc->opcode) {
3476+
*((int64_t *) frag->sg_entry.addr) = ntoh64 (*((int64_t *) frag->sg_entry.addr));
3477+
}
3478+
34453479
get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr,
34463480
get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data,
34473481
OPAL_SUCCESS);

opal/mca/btl/openib/configure.m4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ AC_DEFUN([MCA_opal_btl_openib_CONFIG],[
4646
[btl_openib_happy="yes"
4747
OPAL_CHECK_OPENFABRICS_CM([btl_openib])],
4848
[btl_openib_happy="no"])
49+
OPAL_CHECK_EXP_VERBS([btl_openib], [], [])
4950

5051
AS_IF([test "$btl_openib_happy" = "yes"],
5152
[# With the new openib flags, look for ibv_fork_init

opal/mca/btl/openib/connect/btl_openib_connect_udcm.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,7 +1307,11 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
13071307
uint32_t max_send_wr)
13081308
{
13091309
udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep);
1310+
#if HAVE_DECL_IBV_EXP_CREATE_QP
1311+
struct ibv_exp_qp_init_attr init_attr;
1312+
#else
13101313
struct ibv_qp_init_attr init_attr;
1314+
#endif
13111315
size_t req_inline;
13121316
int rc;
13131317

@@ -1328,6 +1332,32 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
13281332
}
13291333
init_attr.cap.max_send_wr = max_send_wr;
13301334

1335+
#if HAVE_DECL_IBV_EXP_CREATE_QP
1336+
/* use expanded verbs qp create to enable use of mlx5 atomics */
1337+
init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD;
1338+
init_attr.pd = m->btl->device->ib_pd;
1339+
1340+
init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG;
1341+
init_attr.max_atomic_arg = sizeof (int64_t);
1342+
1343+
#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE
1344+
if (IBV_EXP_ATOMIC_HCA_REPLY_BE == m->btl->device->ib_dev_attr.atomic_cap) {
1345+
init_attr.exp_create_flags = IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY;
1346+
init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS;
1347+
}
1348+
#endif
1349+
1350+
while (NULL == (lcl_ep->qps[qp].qp->lcl_qp = ibv_exp_create_qp (m->btl->device->ib_dev_context,
1351+
&init_attr))) {
1352+
/* NTH: this process may be out of registered memory. try evicting an item from
1353+
the lru of this btl's mpool */
1354+
if (false == mca_mpool_grdma_evict (m->btl->super.btl_mpool)) {
1355+
break;
1356+
}
1357+
}
1358+
1359+
#else
1360+
13311361
while (NULL == (lcl_ep->qps[qp].qp->lcl_qp = ibv_create_qp(m->btl->device->ib_pd,
13321362
&init_attr))) {
13331363
/* NTH: this process may be out of registered memory. try evicting an item from
@@ -1337,6 +1367,8 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
13371367
}
13381368
}
13391369

1370+
#endif
1371+
13401372
if (NULL == lcl_ep->qps[qp].qp->lcl_qp) {
13411373
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
13421374
"ibv_create_qp failed", true, opal_process_info.nodename,

0 commit comments

Comments
 (0)