Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit 81f5f89

Browse files
committed
Merge pull request #972 from hjelmn/v2.x_xrc
Fix XRC support
2 parents 88d8a8d + 71e56be commit 81f5f89

File tree

8 files changed

+127
-107
lines changed

8 files changed

+127
-107
lines changed

opal/mca/btl/openib/btl_openib.c

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2006-2015 Mellanox Technologies. All rights reserved.
15-
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
1616
* reserved.
1717
* Copyright (c) 2006-2007 Voltaire All rights reserved.
1818
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
@@ -399,14 +399,16 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
399399
}
400400
}
401401

402+
openib_btl->srqs_created = true;
403+
402404
return OPAL_SUCCESS;
403405
}
404406

405407
static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl)
406408
{
407409
int rc = OPAL_SUCCESS;
408410
opal_mutex_lock(&openib_btl->ib_lock);
409-
if (0 == openib_btl->num_peers &&
411+
if (!openib_btl->srqs_created &&
410412
(mca_btl_openib_component.num_srq_qps > 0 ||
411413
mca_btl_openib_component.num_xrc_qps > 0)) {
412414
rc = create_srq(openib_btl);
@@ -416,17 +418,12 @@ static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl)
416418
}
417419

418420

419-
static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, size_t nprocs)
421+
static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl)
420422
{
421423
uint32_t send_cqes, recv_cqes;
422424
int rc = OPAL_SUCCESS, qp;
423425
mca_btl_openib_device_t *device = openib_btl->device;
424426

425-
if( 0 == nprocs){
426-
/* nothing to do */
427-
return OPAL_SUCCESS;
428-
}
429-
430427
opal_mutex_lock(&openib_btl->ib_lock);
431428
/* figure out reasonable sizes for completion queues */
432429
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
@@ -435,7 +432,7 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, si
435432
recv_cqes = mca_btl_openib_component.qp_infos[qp].rd_num;
436433
} else {
437434
send_cqes = (mca_btl_openib_component.qp_infos[qp].rd_num +
438-
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * nprocs;
435+
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * openib_btl->num_peers;
439436
recv_cqes = send_cqes;
440437
}
441438

@@ -455,7 +452,6 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, si
455452
goto out;
456453
}
457454

458-
openib_btl->num_peers += nprocs;
459455
out:
460456
opal_mutex_unlock(&openib_btl->ib_lock);
461457
return rc;
@@ -1028,10 +1024,14 @@ int mca_btl_openib_add_procs(
10281024
return rc;
10291025
}
10301026

1031-
rc = openib_btl_prepare(openib_btl);
1032-
if (OPAL_SUCCESS != rc) {
1033-
BTL_ERROR(("could not prepare openib btl structure for usel"));
1034-
return rc;
1027+
if (0 == openib_btl->num_peers) {
1028+
/* ensure completion queues are created before attempting to
1029+
* make a loop-back queue pair */
1030+
rc = openib_btl_size_queues(openib_btl);
1031+
if (OPAL_SUCCESS != rc) {
1032+
BTL_ERROR(("error creating cqs"));
1033+
return rc;
1034+
}
10351035
}
10361036

10371037
/* prepare all proc's and account them properly */
@@ -1080,10 +1080,20 @@ int mca_btl_openib_add_procs(
10801080
}
10811081
}
10821082

1083-
/* account this procs if need */
1084-
rc = openib_btl_size_queues(openib_btl, nprocs_new);
1083+
if (nprocs_new) {
1084+
OPAL_THREAD_ADD32(&openib_btl->num_peers, nprocs_new);
1085+
1086+
/* adjust cq sizes given the new procs */
1087+
rc = openib_btl_size_queues (openib_btl);
1088+
if (OPAL_SUCCESS != rc) {
1089+
BTL_ERROR(("error creating cqs"));
1090+
return rc;
1091+
}
1092+
}
1093+
1094+
rc = openib_btl_prepare (openib_btl);
10851095
if (OPAL_SUCCESS != rc) {
1086-
BTL_ERROR(("error creating cqs"));
1096+
BTL_ERROR(("could not prepare openib btl module for use"));
10871097
return rc;
10881098
}
10891099

@@ -1156,23 +1166,15 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11561166
{
11571167
mca_btl_openib_module_t *openib_btl = (mca_btl_openib_module_t *) btl;
11581168
volatile mca_btl_base_endpoint_t *endpoint = NULL;
1169+
int local_port_cnt = 0, btl_rank, rc;
11591170
mca_btl_openib_proc_t *ib_proc;
1160-
int rc;
1161-
int local_port_cnt = 0, btl_rank;
1162-
size_t nprocs_new = 0;
11631171

11641172
rc = prepare_device_for_use (openib_btl->device);
11651173
if (OPAL_SUCCESS != rc) {
11661174
BTL_ERROR(("could not prepare openib device for use"));
11671175
return NULL;
11681176
}
11691177

1170-
rc = openib_btl_prepare(openib_btl);
1171-
if (OPAL_SUCCESS != rc) {
1172-
BTL_ERROR(("could not prepare openib btl structure for use"));
1173-
return NULL;
1174-
}
1175-
11761178
if (NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc))) {
11771179
/* if we don't have connection info for this process, it's
11781180
* okay because some other method might be able to reach it,
@@ -1189,7 +1191,8 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11891191

11901192
/* this is a new process to this openib btl
11911193
* account this procs if need */
1192-
rc = openib_btl_size_queues(openib_btl, nprocs_new);
1194+
OPAL_THREAD_ADD32(&openib_btl->num_peers, 1);
1195+
rc = openib_btl_size_queues(openib_btl);
11931196
if (OPAL_SUCCESS != rc) {
11941197
BTL_ERROR(("error creating cqs"));
11951198
return NULL;
@@ -1214,6 +1217,12 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
12141217
return NULL;
12151218
}
12161219

1220+
rc = openib_btl_prepare(openib_btl);
1221+
if (OPAL_SUCCESS != rc) {
1222+
BTL_ERROR(("could not prepare openib btl structure for use"));
1223+
goto exit;
1224+
}
1225+
12171226
for (size_t j = 0 ; j < ib_proc->proc_endpoint_count ; ++j) {
12181227
endpoint = ib_proc->proc_endpoints[j];
12191228
if (endpoint->endpoint_btl == openib_btl) {

opal/mca/btl/openib/btl_openib.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
15-
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
1616
* reserved.
1717
* Copyright (c) 2006-2007 Voltaire All rights reserved.
1818
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
@@ -465,6 +465,7 @@ struct mca_btl_openib_module_t {
465465
mca_btl_base_module_t super;
466466

467467
bool btl_inited;
468+
bool srqs_created;
468469

469470
/** Common information about all ports */
470471
mca_btl_openib_modex_message_t port_info;

opal/mca/btl/openib/btl_openib_atomic.c

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
3+
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
44
* reserved.
55
* Copyright (c) 2015 Research Organization for Information Science
66
* and Technology (RIST). All rights reserved.
@@ -73,16 +73,7 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st
7373

7474
frag->sr_desc.wr.atomic.rkey = rkey;
7575

76-
#if HAVE_XRC
77-
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
78-
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
79-
frag->sr_desc.qp_type.xrc.remote_srqn = endpoint->rem_info.rem_srqs[qp].rem_srq_num;
80-
#else
81-
frag->sr_desc.xrc_remote_srq_num = endpoint->rem_info.rem_srqs[qp].rem_srq_num;
82-
#endif
83-
84-
}
85-
#endif
76+
/* NTH: the SRQ# is set in mca_btl_get_internal */
8677

8778
if (endpoint->endpoint_state != MCA_BTL_IB_CONNECTED) {
8879
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);

opal/mca/btl/openib/btl_openib_endpoint.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
1313
* Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
14-
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
14+
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
1515
* reserved.
1616
* Copyright (c) 2006-2007 Voltaire All rights reserved.
1717
* Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved.
@@ -579,7 +579,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
579579

580580
opal_output(-1, "Now we are CONNECTED");
581581
if (MCA_BTL_XRC_ENABLED) {
582-
OPAL_THREAD_LOCK(&endpoint->ib_addr->addr_lock);
582+
opal_mutex_lock (&endpoint->ib_addr->addr_lock);
583583
if (MCA_BTL_IB_ADDR_CONNECTED == endpoint->ib_addr->status) {
584584
/* We are not xrc master */
585585
/* set our qp pointer to master qp */
@@ -622,7 +622,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
622622
}
623623
}
624624
}
625-
OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock);
625+
opal_mutex_unlock (&endpoint->ib_addr->addr_lock);
626626
}
627627

628628

opal/mca/btl/openib/btl_openib_get.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
15-
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
1616
* reserved.
1717
* Copyright (c) 2006-2007 Voltaire All rights reserved.
1818
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
@@ -92,16 +92,6 @@ int mca_btl_openib_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
9292
frag->sr_desc.wr.rdma.rkey = remote_handle->rkey;
9393
}
9494

95-
#if HAVE_XRC
96-
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
97-
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
98-
frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
99-
#else
100-
frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
101-
#endif
102-
}
103-
#endif
104-
10595
if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) {
10696
OPAL_THREAD_LOCK(&ep->endpoint_lock);
10797
rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_get_frags);
@@ -138,6 +128,19 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base
138128
int qp = to_base_frag(frag)->base.order;
139129
struct ibv_send_wr *bad_wr;
140130

131+
#if HAVE_XRC
132+
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
133+
/* NTH: the remote SRQ number is only available once the endpoint is connected. By
134+
* setting the value here instead of mca_btl_openib_get we guarantee the rem_srqs
135+
* array is initialized. */
136+
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
137+
frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
138+
#else
139+
frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
140+
#endif
141+
}
142+
#endif
143+
141144
/* check for a send wqe */
142145
if (qp_get_wqe(ep, qp) < 0) {
143146
qp_put_wqe(ep, qp);

opal/mca/btl/openib/btl_openib_put.c

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
15-
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
1616
* reserved.
1717
* Copyright (c) 2006-2007 Voltaire All rights reserved.
1818
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
@@ -49,7 +49,7 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
4949
qp = mca_btl_openib_component.rdma_qp;
5050
}
5151

52-
if (OPAL_UNLIKELY((ep->qps[qp].ib_inline_max < size && !local_handle) || !remote_handle ||
52+
if (OPAL_UNLIKELY((btl->btl_put_local_registration_threshold < size && !local_handle) || !remote_handle ||
5353
size > btl->btl_put_limit)) {
5454
return OPAL_ERR_BAD_PARAM;
5555
}
@@ -101,19 +101,6 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
101101
to_out_frag(frag)->sr_desc.wr.rdma.rkey = remote_handle->rkey;
102102
}
103103

104-
#if HAVE_XRC
105-
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
106-
107-
#if OPAL_HAVE_CONNECTX_XRC
108-
to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
109-
#elif OPAL_HAVE_CONNECTX_XRC_DOMAINS
110-
to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
111-
#else
112-
#error "that should never happen"
113-
#endif
114-
}
115-
#endif
116-
117104
if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) {
118105
OPAL_THREAD_LOCK(&ep->endpoint_lock);
119106
rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_put_frags);
@@ -153,6 +140,21 @@ int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base
153140
struct ibv_send_wr *bad_wr;
154141
int rc;
155142

143+
#if HAVE_XRC
144+
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
145+
/* NTH: the remote SRQ number is only available once the endpoint is connected. By
146+
* setting the value here instead of mca_btl_openib_put we guarantee the rem_srqs
147+
* array is initialized. */
148+
#if OPAL_HAVE_CONNECTX_XRC
149+
to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
150+
#elif OPAL_HAVE_CONNECTX_XRC_DOMAINS
151+
to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
152+
#else
153+
#error "that should never happen"
154+
#endif
155+
}
156+
#endif
157+
156158
/* check for a send wqe */
157159
if (qp_get_wqe(ep, qp) < 0) {
158160
qp_put_wqe(ep, qp);
@@ -164,7 +166,7 @@ int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base
164166

165167
if (0 != (rc = ibv_post_send(ep->qps[qp].qp->lcl_qp, &to_out_frag(frag)->sr_desc, &bad_wr))) {
166168
qp_put_wqe(ep, qp);
167-
return OPAL_ERROR;;
169+
return OPAL_ERROR;
168170
}
169171

170172
return OPAL_SUCCESS;

opal/mca/btl/openib/btl_openib_xrc.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
34
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
45
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
56
* Copyright (c) 2014-2015 Research Organization for Information Science
67
* and Technology (RIST). All rights reserved.
78
* Copyright (c) 2014 Bull SAS. All rights reserved.
9+
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
10+
* reserved.
811
* $COPYRIGHT$
912
*
1013
* Additional copyrights may follow
@@ -122,7 +125,10 @@ static void ib_address_constructor(ib_address_t *ib_addr)
122125
ib_addr->lid = 0;
123126
ib_addr->status = MCA_BTL_IB_ADDR_CLOSED;
124127
ib_addr->qp = NULL;
125-
OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_mutex_t);
128+
/* NTH: make the addr_lock recursive because mca_btl_openib_endpoint_connected can call
129+
* into the CPC with the lock held. The alternative would be to drop the lock but the
130+
* lock is never obtained in a critical path. */
131+
OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_recursive_mutex_t);
126132
OBJ_CONSTRUCT(&ib_addr->pending_ep, opal_list_t);
127133
}
128134

0 commit comments

Comments
 (0)