Skip to content

Commit 38940b3

Browse files
committed
osc/sm: Fix detection of non-contig info
Remove the info subscribe for both the alloc_shared_noncontig and blocking_fence info keys. This change was inspired by issue #10175, which highlighted that we were not properly following the non-contig info key (our behavior was standards compliant, but not particularly helpful), because the info subscription was overwriting the provided value. In the investigation, it became clear that there is really no advantage to subscribing to a key that can't be changed, so drop the subscription code for the two keys that can't be changed and fix a bug and remove code all at the same time. Signed-off-by: Brian Barrett <[email protected]>
1 parent 96f6184 commit 38940b3

File tree

1 file changed

+13
-38
lines changed

1 file changed

+13
-38
lines changed

ompi/mca/osc/sm/osc_sm_component.c

Lines changed: 13 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* of Tennessee Research Foundation. All rights
1212
* reserved.
1313
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
14-
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
14+
* Copyright (c) 2018-2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
1515
* Copyright (c) 2020 FUJITSU LIMITED. All rights reserved.
1616
* Copyright (c) 2020 High Performance Computing Center Stuttgart,
1717
* University of Stuttgart. All rights reserved.
@@ -30,7 +30,6 @@
3030
#include "ompi/request/request.h"
3131
#include "opal/util/sys_limits.h"
3232
#include "opal/align.h"
33-
#include "opal/util/info_subscriber.h"
3433
#include "opal/util/printf.h"
3534
#include "opal/mca/mpool/base/base.h"
3635

@@ -46,9 +45,6 @@ static int component_register (void);
4645
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
4746
struct ompi_communicator_t *comm, struct opal_info_t *info,
4847
int flavor, int *model);
49-
static const char* component_set_blocking_fence_info(opal_infosubscriber_t *obj, const char *key, const char *val);
50-
static const char* component_set_alloc_shared_noncontig_info(opal_infosubscriber_t *obj, const char *key, const char *val);
51-
5248

5349
ompi_osc_sm_component_t mca_osc_sm_component = {
5450
{ /* ompi_osc_base_component_t */
@@ -219,9 +215,6 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
219215

220216
OBJ_CONSTRUCT(&module->lock, opal_mutex_t);
221217

222-
ret = opal_infosubscribe_subscribe(&(win->super), "alloc_shared_noncontig", "false", component_set_alloc_shared_noncontig_info);
223-
if (OPAL_SUCCESS != ret) goto error;
224-
225218
if (NULL != info) {
226219
ompi_osc_base_set_memory_alignment(info, &memory_alignment);
227220
}
@@ -264,24 +257,34 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
264257
size_t posts_size, post_size = (comm_size + OSC_SM_POST_MASK) / (OSC_SM_POST_MASK + 1);
265258
size_t data_base_size;
266259

267-
OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output,
268-
"allocating shared memory region of size %ld\n", (long) size));
260+
opal_output_verbose(MCA_BASE_VERBOSE_DEBUG, ompi_osc_base_framework.framework_output,
261+
"allocating shared memory region of size %ld\n", (long) size);
269262

270263
/* get the pagesize */
271264
pagesize = opal_getpagesize();
272265

273266
rbuf = malloc(sizeof(unsigned long) * comm_size);
274267
if (NULL == rbuf) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
275268

269+
/* Note that the alloc_shared_noncontig info key only has
270+
* meaning during window creation. Once the window is
271+
* created, we can't move memory around without making
272+
* everything miserable. So we intentionally do not subcribe
273+
* to updates on the info key, because there's no useful
274+
* update to occur. */
276275
module->noncontig = false;
277276
if (OMPI_SUCCESS != opal_info_get_bool(info, "alloc_shared_noncontig",
278277
&module->noncontig, &flag)) {
279278
goto error;
280279
}
281280

282281
if (module->noncontig) {
282+
opal_output_verbose(MCA_BASE_VERBOSE_DEBUG, ompi_osc_base_framework.framework_output,
283+
"allocating window using non-contiguous strategy");
283284
total = ((size - 1) / pagesize + 1) * pagesize;
284285
} else {
286+
opal_output_verbose(MCA_BASE_VERBOSE_DEBUG, ompi_osc_base_framework.framework_output,
287+
"allocating window using contiguous strategy");
285288
total = size;
286289
}
287290
ret = module->comm->c_coll->coll_allgather(&total, 1, MPI_UNSIGNED_LONG,
@@ -446,11 +449,6 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
446449
#endif
447450
}
448451

449-
ret = opal_infosubscribe_subscribe(&(win->super), "blocking_fence", module->global_state->use_barrier_for_fence ? "true" : "false",
450-
component_set_blocking_fence_info);
451-
452-
if (OPAL_SUCCESS != ret) goto error;
453-
454452
ret = module->comm->c_coll->coll_barrier(module->comm,
455453
module->comm->c_coll->coll_barrier_module);
456454
if (OMPI_SUCCESS != ret) goto error;
@@ -582,29 +580,6 @@ ompi_osc_sm_set_info(struct ompi_win_t *win, struct opal_info_t *info)
582580
}
583581

584582

585-
static const char*
586-
component_set_blocking_fence_info(opal_infosubscriber_t *obj, const char *key, const char *val)
587-
{
588-
ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) ((struct ompi_win_t*) obj)->w_osc_module;
589-
/*
590-
* Assuming that you can't change the default.
591-
*/
592-
return module->global_state->use_barrier_for_fence ? "true" : "false";
593-
}
594-
595-
596-
static const char*
597-
component_set_alloc_shared_noncontig_info(opal_infosubscriber_t *obj, const char *key, const char *val)
598-
{
599-
600-
ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) ((struct ompi_win_t*) obj)->w_osc_module;
601-
/*
602-
* Assuming that you can't change the default.
603-
*/
604-
return module->noncontig ? "true" : "false";
605-
}
606-
607-
608583
int
609584
ompi_osc_sm_get_info(struct ompi_win_t *win, struct opal_info_t **info_used)
610585
{

0 commit comments

Comments
 (0)