Skip to content

Commit 1480892

Browse files
committed
osc/ucx: Add support for the no_locks info key
Signed-off-by: Nysal Jan K.A <[email protected]>
1 parent 06c6325 commit 1480892

File tree

3 files changed

+90
-5
lines changed

3 files changed

+90
-5
lines changed

ompi/mca/osc/ucx/osc_ucx.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ typedef struct ompi_osc_ucx_component {
3333
bool env_initialized; /* UCX environment is initialized or not */
3434
int num_incomplete_req_ops;
3535
int num_modules;
36+
bool no_locks; /* Default value of the no_locks info key for new windows */
3637
unsigned int priority;
3738
} ompi_osc_ucx_component_t;
3839

@@ -113,6 +114,7 @@ typedef struct ompi_osc_ucx_module {
113114
uint64_t req_result;
114115
int *start_grp_ranks;
115116
bool lock_all_is_nocheck;
117+
bool no_locks;
116118
opal_common_ucx_ctx_t *ctx;
117119
opal_common_ucx_wpmem_t *mem;
118120
opal_common_ucx_wpmem_t *state_mem;

ompi/mca/osc/ucx/osc_ucx_component.c

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,28 @@ ompi_osc_ucx_module_t ompi_osc_ucx_module_template = {
114114
}
115115
};
116116

117+
/* look up parameters for configuring this window. The code first
118+
looks in the info structure passed by the user, then it checks
119+
for a matching MCA variable. */
120+
static bool check_config_value_bool (char *key, opal_info_t *info)
121+
{
122+
int ret, flag, param;
123+
bool result = false;
124+
const bool *flag_value = &result;
125+
126+
ret = opal_info_get_bool (info, key, &result, &flag);
127+
if (OMPI_SUCCESS == ret && flag) {
128+
return result;
129+
}
130+
131+
param = mca_base_var_find("ompi", "osc", "ucx", key);
132+
if (0 <= param) {
133+
(void) mca_base_var_get_value(param, &flag_value, NULL, NULL);
134+
}
135+
136+
return flag_value[0];
137+
}
138+
117139
static int component_open(void) {
118140
return OMPI_SUCCESS;
119141
}
@@ -135,6 +157,16 @@ static int component_register(void) {
135157
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_ucx_component.priority);
136158
free(description_str);
137159

160+
mca_osc_ucx_component.no_locks = false;
161+
162+
opal_asprintf(&description_str, "Enable optimizations available only if MPI_LOCK is "
163+
"not used. Info key of same name overrides this value (default: %s)",
164+
mca_osc_ucx_component.no_locks ? "true" : "false");
165+
(void) mca_base_component_var_register(&mca_osc_ucx_component.super.osc_version, "no_locks", description_str,
166+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5,
167+
MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_ucx_component.no_locks);
168+
free(description_str);
169+
138170
opal_common_ucx_mca_var_register(&mca_osc_ucx_component.super.osc_version);
139171

140172
return OMPI_SUCCESS;
@@ -222,6 +254,38 @@ static void ompi_osc_ucx_unregister_progress()
222254
_osc_ucx_init_unlock();
223255
}
224256

257+
static char* ompi_osc_ucx_set_no_lock_info(opal_infosubscriber_t *obj, char *key, char *value)
258+
{
259+
260+
struct ompi_win_t *win = (struct ompi_win_t*) obj;
261+
ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module;
262+
bool temp;
263+
264+
temp = opal_str_to_bool(value);
265+
266+
if (temp && !module->no_locks) {
267+
/* clean up the lock hash. it is up to the user to ensure no lock is
268+
* outstanding from this process when setting the info key */
269+
OBJ_DESTRUCT(&module->outstanding_locks);
270+
module->no_locks = true;
271+
win->w_flags |= OMPI_WIN_NO_LOCKS;
272+
} else if (!temp && module->no_locks) {
273+
int comm_size = ompi_comm_size (module->comm);
274+
int ret;
275+
276+
OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t);
277+
ret = opal_hash_table_init (&module->outstanding_locks, comm_size);
278+
if (OPAL_SUCCESS != ret) {
279+
module->no_locks = true;
280+
} else {
281+
module->no_locks = false;
282+
}
283+
win->w_flags &= ~OMPI_WIN_NO_LOCKS;
284+
}
285+
module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module);
286+
return module->no_locks ? "true" : "false";
287+
}
288+
225289
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
226290
struct ompi_communicator_t *comm, struct opal_info_t *info,
227291
int flavor, int *model) {
@@ -324,6 +388,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
324388

325389
module->flavor = flavor;
326390
module->size = size;
391+
module->no_locks = check_config_value_bool ("no_locks", info);
327392

328393
/* share everyone's displacement units. Only do an allgather if
329394
strictly necessary, since it requires O(p) state. */
@@ -442,18 +507,24 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
442507
module->post_count = 0;
443508
module->start_group = NULL;
444509
module->post_group = NULL;
445-
OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t);
446510
OBJ_CONSTRUCT(&module->pending_posts, opal_list_t);
447511
module->start_grp_ranks = NULL;
448512
module->lock_all_is_nocheck = false;
449513

450-
ret = opal_hash_table_init(&module->outstanding_locks, comm_size);
451-
if (ret != OPAL_SUCCESS) {
452-
goto error;
514+
if (!module->no_locks) {
515+
OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t);
516+
ret = opal_hash_table_init(&module->outstanding_locks, comm_size);
517+
if (ret != OPAL_SUCCESS) {
518+
goto error;
519+
}
520+
} else {
521+
win->w_flags |= OMPI_WIN_NO_LOCKS;
453522
}
454523

455524
win->w_osc_module = &module->super;
456525

526+
opal_infosubscribe_subscribe(&win->super, "no_locks", "false", ompi_osc_ucx_set_no_lock_info);
527+
457528
/* sync with everyone */
458529

459530
ret = module->comm->c_coll->coll_barrier(module->comm,
@@ -598,7 +669,9 @@ int ompi_osc_ucx_free(struct ompi_win_t *win) {
598669

599670
assert(module->lock_count == 0);
600671
assert(opal_list_is_empty(&module->pending_posts) == true);
601-
OBJ_DESTRUCT(&module->outstanding_locks);
672+
if(!module->no_locks) {
673+
OBJ_DESTRUCT(&module->outstanding_locks);
674+
}
602675
OBJ_DESTRUCT(&module->pending_posts);
603676

604677
opal_common_ucx_wpmem_flush(module->mem, OPAL_COMMON_UCX_SCOPE_WORKER, 0);

ompi/mca/osc/ucx/osc_ucx_passive_target.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ int ompi_osc_ucx_lock(int lock_type, int target, int assert, struct ompi_win_t *
9999
ompi_osc_ucx_epoch_t original_epoch = module->epoch_type.access;
100100
int ret = OMPI_SUCCESS;
101101

102+
if (module->no_locks) {
103+
OSC_UCX_VERBOSE(1, "attempted to lock with no_locks set");
104+
return OMPI_ERR_RMA_SYNC;
105+
}
106+
102107
if (module->lock_count == 0) {
103108
if (module->epoch_type.access != NONE_EPOCH &&
104109
module->epoch_type.access != FENCE_EPOCH) {
@@ -188,6 +193,11 @@ int ompi_osc_ucx_lock_all(int assert, struct ompi_win_t *win) {
188193
ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module;
189194
int ret = OMPI_SUCCESS;
190195

196+
if (module->no_locks) {
197+
OSC_UCX_VERBOSE(1, "attempted to lock with no_locks set");
198+
return OMPI_ERR_RMA_SYNC;
199+
}
200+
191201
if (module->epoch_type.access != NONE_EPOCH &&
192202
module->epoch_type.access != FENCE_EPOCH) {
193203
return OMPI_ERR_RMA_SYNC;

0 commit comments

Comments
 (0)