Skip to content

Commit 3f41e0f

Browse files
authored
Merge pull request #9991 from bwbarrett/bugfix/osc-sm
Fix some SM OSC component initialization issues
2 parents 524029d + 1112744 commit 3f41e0f

File tree

10 files changed

+322
-51
lines changed

10 files changed

+322
-51
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@ test/class/ompi_rb_tree
633633
test/class/ompi_bitmap
634634
test/class/opal_bitmap
635635
test/class/opal_fifo
636+
test/class/opal_cstring
636637
test/class/opal_hash_table
637638
test/class/opal_lifo
638639
test/class/opal_list

ompi/mca/osc/base/osc_base_init.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,6 @@ ompi_osc_base_select(ompi_win_t *win,
5757
priority = component->osc_query(win, base, size, disp_unit, comm,
5858
win->super.s_info, flavor);
5959
if (priority < 0) {
60-
if (MPI_WIN_FLAVOR_SHARED == flavor && OMPI_ERR_RMA_SHARED == priority) {
61-
/* NTH: quick fix to return OMPI_ERR_RMA_SHARED */
62-
return OMPI_ERR_RMA_SHARED;
63-
}
6460
continue;
6561
}
6662

ompi/mca/osc/monitoring/osc_monitoring_component.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,6 @@ static int mca_osc_monitoring_component_select(struct ompi_win_t *win, void **ba
9090

9191
priority = component->osc_query(win, base, size, disp_unit, comm, info, flavor);
9292
if (priority < 0) {
93-
if (MPI_WIN_FLAVOR_SHARED == flavor && OMPI_ERR_RMA_SHARED == priority) {
94-
/* NTH: quick fix to return OMPI_ERR_RMA_SHARED */
95-
return OMPI_ERR_RMA_SHARED;
96-
}
9793
continue;
9894
}
9995

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s
383383
{
384384

385385
if (MPI_WIN_FLAVOR_SHARED == flavor) {
386-
return OMPI_ERR_RMA_SHARED;
386+
return -1;
387387
}
388388

389389
#if OPAL_CUDA_SUPPORT
@@ -403,7 +403,7 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s
403403
return mca_osc_rdma_component.priority;
404404
}
405405

406-
return OMPI_ERROR;
406+
return -1;
407407
}
408408

409409
static int ompi_osc_rdma_initialize_region (ompi_osc_rdma_module_t *module, void **base, size_t size) {

ompi/mca/osc/sm/osc_sm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ typedef struct ompi_osc_sm_node_state_t ompi_osc_sm_node_state_t;
5353
struct ompi_osc_sm_component_t {
5454
ompi_osc_base_component_t super;
5555

56+
/** Priority of the osc/sm component */
57+
unsigned int priority;
58+
5659
char *backing_directory;
5760
};
5861
typedef struct ompi_osc_sm_component_t ompi_osc_sm_component_t;

ompi/mca/osc/sm/osc_sm_component.c

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = {
115115

116116
static int component_register (void)
117117
{
118+
char *description_str;
119+
118120
if (0 == access ("/dev/shm", W_OK)) {
119121
mca_osc_sm_component.backing_directory = "/dev/shm";
120122
} else {
@@ -128,6 +130,16 @@ static int component_register (void)
128130
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
129131
MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_sm_component.backing_directory);
130132

133+
mca_osc_sm_component.priority = 100;
134+
opal_asprintf(&description_str, "Priority of the osc/sm component (default: %d)",
135+
mca_osc_sm_component.priority);
136+
(void)mca_base_component_var_register(&mca_osc_sm_component.super.osc_version,
137+
"priority", description_str,
138+
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
139+
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_GROUP,
140+
&mca_osc_sm_component.priority);
141+
free(description_str);
142+
131143
return OPAL_SUCCESS;
132144
}
133145

@@ -154,36 +166,32 @@ component_finalize(void)
154166
}
155167

156168

157-
static int
158-
check_win_ok(ompi_communicator_t *comm, int flavor)
159-
{
160-
if (! (MPI_WIN_FLAVOR_SHARED == flavor
161-
|| MPI_WIN_FLAVOR_ALLOCATE == flavor) ) {
162-
return OMPI_ERR_NOT_SUPPORTED;
163-
}
164-
165-
if (ompi_group_have_remote_peers (comm->c_local_group)) {
166-
return OMPI_ERR_RMA_SHARED;
167-
}
168-
169-
return OMPI_SUCCESS;
170-
}
171-
172-
173169
static int
174170
component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
175171
struct ompi_communicator_t *comm, struct opal_info_t *info,
176172
int flavor)
177173
{
178174
int ret;
179-
if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) {
180-
if (OMPI_ERR_NOT_SUPPORTED == ret) {
175+
176+
/* component only supports shared or allocate flavors */
177+
if (! (MPI_WIN_FLAVOR_SHARED == flavor ||
178+
MPI_WIN_FLAVOR_ALLOCATE == flavor)) {
179+
return -1;
180+
}
181+
182+
/* If flavor is win_allocate, we can't run if there are remote
183+
* peers in the group. The same check for flavor_shared happens
184+
* in select(), so that we can return an error to the user (since
185+
* we should be able to run for all flavor_shared use cases.
186+
* There's no way to return an error from component_query to the
187+
* user, hence the delayed check. */
188+
if (MPI_WIN_FLAVOR_ALLOCATE == flavor) {
189+
if (ompi_group_have_remote_peers(comm->c_local_group)) {
181190
return -1;
182191
}
183-
return ret;
184192
}
185193

186-
return 100;
194+
return mca_osc_sm_component.priority;
187195
}
188196

189197

@@ -198,8 +206,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
198206
int ret = OMPI_ERROR;
199207
size_t memory_alignment = OPAL_ALIGN_MIN;
200208

201-
if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) {
202-
return ret;
209+
assert(MPI_WIN_FLAVOR_SHARED == flavor || MPI_WIN_FLAVOR_ALLOCATE == flavor);
210+
211+
if (ompi_group_have_remote_peers(comm->c_local_group)) {
212+
return OMPI_ERR_RMA_SHARED;
203213
}
204214

205215
/* create module structure */

opal/class/opal_cstring.c

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ static void opal_cstring_ctor(opal_cstring_t *obj)
3333
{
3434
*(size_t *) &(obj->length) = 0;
3535
/* make sure the string is null-terminated */
36-
((char *) obj->string)[0] = '\n';
36+
((char *) obj->string)[0] = '\0';
3737
}
3838

3939
static inline size_t opal_cstring_alloc_size(size_t len)
@@ -90,10 +90,18 @@ int opal_cstring_to_int(opal_cstring_t *string, int *interp)
9090
if (*endp != '\0') {
9191
return OPAL_ERR_BAD_PARAM;
9292
}
93-
/* underflow */
93+
/* base errors */
9494
if (tmp == 0 && errno == EINVAL) {
9595
return OPAL_ERR_BAD_PARAM;
9696
}
97+
/* overflow/underflow of long int (return of strtol) */
98+
if (errno == ERANGE && (tmp == LONG_MIN || tmp == LONG_MAX)) {
99+
return OPAL_ERR_BAD_PARAM;
100+
}
101+
/* overflow/underflow of int (for cases where long int is larger) */
102+
if (tmp < INT_MIN || tmp > INT_MAX) {
103+
return OPAL_ERR_BAD_PARAM;
104+
}
97105

98106
*interp = (int) tmp;
99107

@@ -104,24 +112,28 @@ static int opal_str_to_bool_impl(const char *string, bool *interp)
104112
{
105113
const char *ptr = string;
106114

107-
/* Trim leading whitespace */
108-
while (isspace(*ptr)) {
109-
++ptr;
110-
}
115+
if (NULL != string) {
116+
/* Trim leading whitespace */
117+
while (isspace(*ptr)) {
118+
++ptr;
119+
}
111120

112-
if ('\0' != *ptr) {
113-
if (isdigit(*ptr)) {
114-
*interp = (bool) atoi(ptr);
115-
} else if (0 == strncasecmp(ptr, "yes", 3) || 0 == strncasecmp(ptr, "true", 4)) {
116-
*interp = true;
117-
} else if (0 == strncasecmp(ptr, "no", 2) && 0 == strncasecmp(ptr, "false", 5)) {
118-
*interp = false;
119-
} else {
120-
*interp = false;
121-
return OPAL_ERR_BAD_PARAM;
121+
if ('\0' != *ptr) {
122+
if (isdigit(*ptr)) {
123+
*interp = (bool) atoi(ptr);
124+
return OPAL_SUCCESS;
125+
} else if (0 == strncasecmp(ptr, "yes", 3) || 0 == strncasecmp(ptr, "true", 4)) {
126+
*interp = true;
127+
return OPAL_SUCCESS;
128+
} else if (0 == strncasecmp(ptr, "no", 2) || 0 == strncasecmp(ptr, "false", 5)) {
129+
*interp = false;
130+
return OPAL_SUCCESS;
131+
}
122132
}
123133
}
124-
return OPAL_SUCCESS;
134+
135+
*interp = false;
136+
return OPAL_ERR_BAD_PARAM;
125137
}
126138

127139
int opal_cstring_to_bool(opal_cstring_t *string, bool *interp)

opal/class/opal_cstring.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
#ifndef OPAL_STRING_H
4444
#define OPAL_STRING_H
4545

46-
#include "opal_config.h"
4746
#include "opal/class/opal_object.h"
4847
#include "opal/mca/base/mca_base_var_enum.h"
4948

test/class/Makefile.am

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ check_PROGRAMS = \
3535
opal_value_array \
3636
opal_pointer_array \
3737
opal_lifo \
38-
opal_fifo
38+
opal_fifo \
39+
opal_cstring
3940

4041
TESTS = $(check_PROGRAMS)
4142

@@ -94,6 +95,12 @@ opal_fifo_LDADD = \
9495
$(top_builddir)/test/support/libsupport.a
9596
opal_fifo_DEPENDENCIES = $(opal_fifo_LDADD)
9697

98+
opal_cstring_SOURCES = opal_cstring.c
99+
opal_cstring_LDADD = \
100+
$(top_builddir)/opal/lib@[email protected] \
101+
$(top_builddir)/test/support/libsupport.a
102+
opal_cstring_DEPENDENCIES = $(opal_cstring_LDADD)
103+
97104
clean-local:
98105
rm -f opal_bitmap_test_out.txt opal_hash_table_test_out.txt opal_proc_table_test_out.txt
99106

0 commit comments

Comments
 (0)