Skip to content

Commit 16140a3

Browse files
committed
btl/uct: remove the need for a BTL module for connection TLs
Connection TLs are only used to form connections for connect-to-endpoint TLs. They do not need to belong to the same memory domain as the one they are used with so there is no need to rely on a BTL module. This commit moves the pending_connection_reqs to the tl and changes the code to support a NULL module for the connection tl. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 2f3ffe3 commit 16140a3

File tree

6 files changed

+26
-25
lines changed

6 files changed

+26
-25
lines changed

opal/mca/btl/uct/btl_uct.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,6 @@ struct mca_btl_uct_module_t {
118118

119119
/** frags that were waiting on connections that are now ready to send */
120120
opal_list_t pending_frags;
121-
122-
/** pending connection requests */
123-
opal_fifo_t pending_connection_reqs;
124121
};
125122
typedef struct mca_btl_uct_module_t mca_btl_uct_module_t;
126123

opal/mca/btl/uct/btl_uct_component.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,6 @@ static mca_btl_uct_module_t *mca_btl_uct_alloc_module(const char *md_name, mca_b
387387
OBJ_CONSTRUCT(&module->max_frags, opal_free_list_t);
388388
OBJ_CONSTRUCT(&module->pending_frags, opal_list_t);
389389
OBJ_CONSTRUCT(&module->lock, opal_recursive_mutex_t);
390-
OBJ_CONSTRUCT(&module->pending_connection_reqs, opal_fifo_t);
391390

392391
module->md = md;
393392
module->md_name = strdup(md_name);
@@ -798,19 +797,19 @@ static int mca_btl_uct_component_progress_pending(mca_btl_uct_module_t *uct_btl)
798797
return completed;
799798
}
800799

801-
static int mca_btl_uct_component_progress_connections (mca_btl_uct_module_t *module) {
800+
static int mca_btl_uct_component_progress_connections (mca_btl_uct_tl_t *conn_tl) {
802801
mca_btl_uct_pending_connection_request_t *request;
803802
int ret;
804803

805-
if (module->conn_tl == NULL) {
804+
if (conn_tl == NULL) {
806805
return 0;
807806
}
808807

809-
ret = mca_btl_uct_tl_progress(module->conn_tl, 0);
808+
ret = mca_btl_uct_tl_progress(conn_tl, 0);
810809

811810
while (NULL
812811
!= (request = (mca_btl_uct_pending_connection_request_t *) opal_fifo_pop_atomic(
813-
&module->pending_connection_reqs))) {
812+
&conn_tl->pending_connection_reqs))) {
814813
mca_btl_uct_conn_req_t *conn_req = (mca_btl_uct_conn_req_t *) request->request_data;
815814
BTL_VERBOSE(("processing connection request...."));
816815
if (conn_req->module_index >= mca_btl_uct_component.module_count) {
@@ -819,7 +818,7 @@ static int mca_btl_uct_component_progress_connections (mca_btl_uct_module_t *mod
819818
}
820819
int rc = mca_btl_uct_process_connection_request(mca_btl_uct_component.modules[conn_req->module_index], conn_req);
821820
if (rc != OPAL_SUCCESS) {
822-
opal_fifo_push_atomic(&module->pending_connection_reqs, &request->super);
821+
opal_fifo_push_atomic(&conn_tl->pending_connection_reqs, &request->super);
823822
break;
824823
}
825824
OBJ_RELEASE(request);
@@ -849,15 +848,15 @@ static int mca_btl_uct_component_progress(void)
849848
ret += mca_btl_uct_tl_progress(module->am_tl, starting_index);
850849
}
851850

852-
mca_btl_uct_component_progress_connections (module);
851+
mca_btl_uct_component_progress_connections (module->conn_tl);
853852

854853
if (0 != opal_list_get_size(&module->pending_frags)) {
855854
mca_btl_uct_component_progress_pending(module);
856855
}
857856
}
858857

859858
if (NULL != mca_btl_uct_component.conn_module) {
860-
ret += mca_btl_uct_component_progress_connections (mca_btl_uct_component.conn_module);
859+
ret += mca_btl_uct_component_progress_connections (mca_btl_uct_component.conn_module->conn_tl);
861860
}
862861

863862
return (int) ret;

opal/mca/btl/uct/btl_uct_device_context.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,14 @@ mca_btl_uct_module_get_tl_context_specific(mca_btl_uct_module_t *module, mca_btl
9494
mca_btl_uct_device_context_t *context = tl->uct_dev_contexts[context_id];
9595

9696
if (OPAL_UNLIKELY(NULL == context)) {
97-
OPAL_THREAD_LOCK(&module->lock);
97+
OPAL_THREAD_LOCK(&tl->tl_lock);
9898
context = tl->uct_dev_contexts[context_id];
9999
if (OPAL_UNLIKELY(NULL == context)) {
100100
context = tl->uct_dev_contexts[context_id] = mca_btl_uct_context_create(module, tl,
101101
context_id,
102102
true);
103103
}
104-
OPAL_THREAD_UNLOCK(&module->lock);
104+
OPAL_THREAD_UNLOCK(&tl->tl_lock);
105105
}
106106

107107
return context;

opal/mca/btl/uct/btl_uct_module.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
1414
* reserved.
15-
* Copyright (c) 2020 Google, LLC. All rights reserved.
15+
* Copyright (c) 2020-2025 Google, LLC. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -284,7 +284,6 @@ int mca_btl_uct_finalize(mca_btl_base_module_t *btl)
284284
OBJ_DESTRUCT(&uct_module->max_frags);
285285
OBJ_DESTRUCT(&uct_module->pending_frags);
286286
OBJ_DESTRUCT(&uct_module->lock);
287-
OBJ_DESTRUCT(&uct_module->pending_connection_reqs);
288287

289288
if (uct_module->rcache) {
290289
mca_rcache_base_module_destroy(uct_module->rcache);

opal/mca/btl/uct/btl_uct_tl.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ static void mca_btl_uct_tl_constructor(mca_btl_uct_tl_t *tl)
144144
{
145145
memset((void *) ((uintptr_t) tl + sizeof(tl->super)), 0, sizeof(*tl) - sizeof(tl->super));
146146
OBJ_CONSTRUCT(&tl->tl_lock, opal_mutex_t);
147+
OBJ_CONSTRUCT(&tl->pending_connection_reqs, opal_fifo_t);
147148
}
148149

149150
static void mca_btl_uct_tl_destructor(mca_btl_uct_tl_t *tl)
@@ -172,21 +173,22 @@ static void mca_btl_uct_tl_destructor(mca_btl_uct_tl_t *tl)
172173
}
173174

174175
OBJ_DESTRUCT(&tl->tl_lock);
176+
OBJ_DESTRUCT(&tl->pending_connection_reqs);
175177
}
176178

177179
OBJ_CLASS_INSTANCE(mca_btl_uct_tl_t, opal_list_item_t, mca_btl_uct_tl_constructor,
178180
mca_btl_uct_tl_destructor);
179181

180182
static ucs_status_t mca_btl_uct_conn_req_cb(void *arg, void *data, size_t length, unsigned flags)
181183
{
182-
mca_btl_uct_module_t *module = (mca_btl_uct_module_t *) arg;
184+
mca_btl_uct_tl_t *tl = (mca_btl_uct_tl_t *) arg;
183185
mca_btl_uct_pending_connection_request_t *request = calloc(1, length + sizeof(request->super));
184186

185187
/* it is not safe to process the connection request from the callback so just save it for
186188
* later processing */
187189
OBJ_CONSTRUCT(request, mca_btl_uct_pending_connection_request_t);
188190
memcpy(&request->request_data, (void *) ((intptr_t) data + 8), length);
189-
opal_fifo_push_atomic(&module->pending_connection_reqs, &request->super);
191+
opal_fifo_push_atomic(&tl->pending_connection_reqs, &request->super);
190192

191193
return UCS_OK;
192194
}
@@ -241,20 +243,21 @@ int mca_btl_uct_process_connection_request(mca_btl_uct_module_t *module,
241243
return OPAL_SUCCESS;
242244
}
243245

244-
static int mca_btl_uct_setup_connection_tl(mca_btl_uct_module_t *module)
246+
static int mca_btl_uct_setup_connection_tl(mca_btl_uct_tl_t *tl)
245247
{
246248
ucs_status_t ucs_status;
247249

248-
if (NULL == module->conn_tl) {
250+
if (NULL == tl) {
249251
return OPAL_ERR_NOT_SUPPORTED;
250252
}
251253

252-
mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_tl_context_specific(module, module->conn_tl,
253-
/*context_id=*/0);
254+
mca_btl_uct_device_context_t *context =
255+
mca_btl_uct_module_get_tl_context_specific(/*module=*/NULL, tl,
256+
/*context_id=*/0);
254257

255258
ucs_status = uct_iface_set_am_handler(context->uct_iface,
256-
MCA_BTL_UCT_CONNECT_RDMA, mca_btl_uct_conn_req_cb, module,
257-
UCT_CB_FLAG_ASYNC);
259+
MCA_BTL_UCT_CONNECT_RDMA, mca_btl_uct_conn_req_cb,
260+
tl, UCT_CB_FLAG_ASYNC);
258261
if (UCS_OK != ucs_status) {
259262
BTL_ERROR(("could not set active message handler for uct tl"));
260263
}
@@ -383,7 +386,7 @@ mca_btl_uct_device_context_t *mca_btl_uct_context_create(mca_btl_uct_module_t *m
383386
return NULL;
384387
}
385388

386-
if (tl == module->am_tl) {
389+
if (module != NULL && tl == module->am_tl) {
387390
BTL_VERBOSE(("installing AM handler for tl %p context id %d", (void *) tl, context_id));
388391
uct_iface_set_am_handler(context->uct_iface, MCA_BTL_UCT_FRAG, mca_btl_uct_am_handler,
389392
context, MCA_BTL_UCT_CB_FLAG_SYNC);
@@ -524,7 +527,7 @@ static int mca_btl_uct_set_tl_conn(mca_btl_uct_module_t *module, mca_btl_uct_tl_
524527
BTL_VERBOSE(("tl %s is suitable for making connections", tl->uct_tl_name));
525528

526529
module->conn_tl = tl;
527-
rc = mca_btl_uct_setup_connection_tl(module);
530+
rc = mca_btl_uct_setup_connection_tl(tl);
528531
if (OPAL_SUCCESS != rc) {
529532
return rc;
530533
}

opal/mca/btl/uct/btl_uct_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,9 @@ struct mca_btl_uct_tl_t {
341341

342342
/** async context */
343343
ucs_async_context_t *ucs_async;
344+
345+
/** pending connection requests */
346+
opal_fifo_t pending_connection_reqs;
344347
};
345348

346349
typedef struct mca_btl_uct_tl_t mca_btl_uct_tl_t;

0 commit comments

Comments
 (0)