Skip to content

Commit f9ef4b4

Browse files
authored
Merge pull request #7632 from devreal/osc-ucx-progress
UCX osc: make progress on idle worker if none are active
2 parents b7b9254 + 581478d commit f9ef4b4

File tree

5 files changed

+40
-16
lines changed

5 files changed

+40
-16
lines changed

ompi/mca/osc/ucx/osc_ucx_active_target.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ int ompi_osc_ucx_post(struct ompi_group_t *group, int assert, struct ompi_win_t
279279
ompi_osc_ucx_handle_incoming_post(module, &(module->state.post_state[j]), NULL, 0);
280280
}
281281

282-
ucp_worker_progress(mca_osc_ucx_component.wpool->dflt_worker);
282+
opal_common_ucx_wpool_progress(mca_osc_ucx_component.wpool);
283283
usleep(100);
284284
} while (1);
285285
}

ompi/mca/osc/ucx/osc_ucx_comm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ static inline int start_atomicity(
279279
break;
280280
}
281281

282-
ucp_worker_progress(mca_osc_ucx_component.wpool->dflt_worker);
282+
opal_common_ucx_wpool_progress(mca_osc_ucx_component.wpool);
283283
}
284284

285285
*lock_acquired = true;

ompi/mca/osc/ucx/osc_ucx_passive_target.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ static inline int start_shared(ompi_osc_ucx_module_t *module, int target) {
4242
} else {
4343
break;
4444
}
45-
ucp_worker_progress(mca_osc_ucx_component.wpool->dflt_worker);
45+
opal_common_ucx_wpool_progress(mca_osc_ucx_component.wpool);
4646
}
4747

4848
return ret;
@@ -70,8 +70,7 @@ static inline int start_exclusive(ompi_osc_ucx_module_t *module, int target) {
7070
if (result_value == TARGET_LOCK_UNLOCKED) {
7171
return OMPI_SUCCESS;
7272
}
73-
74-
ucp_worker_progress(mca_osc_ucx_component.wpool->dflt_worker);
73+
opal_common_ucx_wpool_progress(mca_osc_ucx_component.wpool);
7574
}
7675
}
7776

opal/mca/common/ucx/common_ucx_wpool.c

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ _winfo_create(opal_common_ucx_wpool_t *wpool)
5959
goto exit;
6060
}
6161

62-
winfo = calloc(1, sizeof(*winfo));
62+
winfo = OBJ_NEW(opal_common_ucx_winfo_t);
6363
if (NULL == winfo) {
6464
MCA_COMMON_UCX_ERROR("Cannot allocate memory for worker info");
6565
goto release_worker;
@@ -194,9 +194,10 @@ opal_common_ucx_wpool_init(opal_common_ucx_wpool_t *wpool,
194194
rc = OPAL_ERROR;
195195
goto err_worker_create;
196196
}
197-
wpool->dflt_worker = winfo->worker;
197+
wpool->dflt_winfo = winfo;
198+
OBJ_RETAIN(wpool->dflt_winfo);
198199

199-
status = ucp_worker_get_address(wpool->dflt_worker,
200+
status = ucp_worker_get_address(wpool->dflt_winfo->worker,
200201
&wpool->recv_waddr, &wpool->recv_waddr_len);
201202
if (status != UCS_OK) {
202203
MCA_COMMON_UCX_VERBOSE(1, "ucp_worker_get_address failed: %d", status);
@@ -214,8 +215,10 @@ opal_common_ucx_wpool_init(opal_common_ucx_wpool_t *wpool,
214215
err_wpool_add:
215216
free(wpool->recv_waddr);
216217
err_get_addr:
217-
if (NULL != wpool->dflt_worker) {
218-
ucp_worker_destroy(wpool->dflt_worker);
218+
if (NULL != wpool) {
219+
OBJ_RELEASE(winfo);
220+
OBJ_RELEASE(wpool->dflt_winfo);
221+
wpool->dflt_winfo = NULL;
219222
}
220223
err_worker_create:
221224
ucp_cleanup(wpool->ucp_ctx);
@@ -233,7 +236,7 @@ void opal_common_ucx_wpool_finalize(opal_common_ucx_wpool_t *wpool)
233236

234237
/* Release the address here. recv worker will be released
235238
* below along with other idle workers */
236-
ucp_worker_release_address(wpool->dflt_worker, wpool->recv_waddr);
239+
ucp_worker_release_address(wpool->dflt_winfo->worker, wpool->recv_waddr);
237240

238241
/* Go over the list, free idle list items */
239242
if (!opal_list_is_empty(&wpool->idle_workers)) {
@@ -258,6 +261,9 @@ void opal_common_ucx_wpool_finalize(opal_common_ucx_wpool_t *wpool)
258261
}
259262
OBJ_DESTRUCT(&wpool->active_workers);
260263

264+
OBJ_RELEASE(wpool->dflt_winfo);
265+
wpool->dflt_winfo = NULL;
266+
261267
OBJ_DESTRUCT(&wpool->mutex);
262268
ucp_cleanup(wpool->ucp_ctx);
263269
return;
@@ -272,17 +278,33 @@ opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool)
272278
/* Go over all active workers and progress them
273279
* TODO: may want to have some partitioning to progress only part of
274280
* workers */
275-
opal_mutex_lock(&wpool->mutex);
281+
if (0 != opal_mutex_trylock(&wpool->mutex)) {
282+
return completed;
283+
}
284+
285+
bool progress_dflt_worker = true;
276286
OPAL_LIST_FOREACH_SAFE(winfo, next, &wpool->active_workers,
277287
opal_common_ucx_winfo_t) {
278-
opal_mutex_lock(&winfo->mutex);
288+
if (0 != opal_mutex_trylock(&winfo->mutex)) {
289+
continue;
290+
}
279291
do {
292+
if (winfo == wpool->dflt_winfo) {
293+
progress_dflt_worker = false;
294+
}
280295
progressed = ucp_worker_progress(winfo->worker);
281296
completed += progressed;
282297
} while (progressed);
283298
opal_mutex_unlock(&winfo->mutex);
284299
}
285300
opal_mutex_unlock(&wpool->mutex);
301+
302+
if (progress_dflt_worker) {
303+
/* make sure to progress at least some */
304+
opal_mutex_lock(&wpool->dflt_winfo->mutex);
305+
completed += ucp_worker_progress(wpool->dflt_winfo->worker);
306+
opal_mutex_unlock(&wpool->dflt_winfo->mutex);
307+
}
286308
return completed;
287309
}
288310

opal/mca/common/ucx/common_ucx_wpool.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030

3131
BEGIN_C_DECLS
3232

33+
/* fordward declaration */
34+
typedef struct opal_common_ucx_winfo opal_common_ucx_winfo_t;
35+
3336
/* Worker pool is a global object that that is allocated per component or can be
3437
* shared between multiple compatible components.
3538
* The lifetime of this object is normally equal to the lifetime of a component[s].
@@ -42,7 +45,7 @@ typedef struct {
4245

4346
/* UCX data */
4447
ucp_context_h ucp_ctx;
45-
ucp_worker_h dflt_worker;
48+
opal_common_ucx_winfo_t *dflt_winfo;
4649
ucp_address_t *recv_waddr;
4750
size_t recv_waddr_len;
4851

@@ -116,7 +119,7 @@ typedef struct {
116119
* in the Worker Pool lists (either active or idle).
117120
* One wpmem is intended per shared memory segment (i.e. MPI Window).
118121
*/
119-
typedef struct opal_common_ucx_winfo {
122+
struct opal_common_ucx_winfo {
120123
opal_list_item_t super;
121124
opal_recursive_mutex_t mutex;
122125
ucp_worker_h worker;
@@ -125,7 +128,7 @@ typedef struct opal_common_ucx_winfo {
125128
short *inflight_ops;
126129
short global_inflight_ops;
127130
ucs_status_ptr_t inflight_req;
128-
} opal_common_ucx_winfo_t;
131+
};
129132
OBJ_CLASS_DECLARATION(opal_common_ucx_winfo_t);
130133

131134
typedef void (*opal_common_ucx_user_req_handler_t)(void *request);

0 commit comments

Comments
 (0)