Skip to content

Commit 38d2f12

Browse files
authored
Merge pull request #8220 from devreal/fix-coll-base-preference
Fix preference treatment in coll/base
2 parents 2877932 + 1cdc855 commit 38d2f12

File tree

7 files changed

+29
-35
lines changed

7 files changed

+29
-35
lines changed

ompi/mca/coll/adapt/coll_adapt_module.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
* of Tennessee Research Foundation. All rights
44
* reserved.
55
* $COPYRIGHT$
6-
*
6+
*
77
* Additional copyrights may follow
8-
*
8+
*
99
* $HEADER$
1010
*/
1111

@@ -146,7 +146,7 @@ mca_coll_base_module_t *ompi_coll_adapt_comm_query(struct ompi_communicator_t *
146146
/* Get the priority level attached to this module.
147147
If priority is less than or equal to 0, then the module is unavailable. */
148148
*priority = mca_coll_adapt_component.adapt_priority;
149-
if (mca_coll_adapt_component.adapt_priority <= 0) {
149+
if (mca_coll_adapt_component.adapt_priority < 0) {
150150
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
151151
"coll:adapt:comm_query (%d/%s): priority too low; "
152152
"disqualifying myself",

ompi/mca/coll/base/coll_base_comm_select.c

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ static opal_list_t *check_components(opal_list_t * components,
337337
ompi_communicator_t * comm)
338338
{
339339
int priority, flag;
340+
int count_include = 0;
340341
const mca_base_component_t *component;
341342
mca_base_component_list_item_t *cli;
342343
mca_coll_base_module_2_3_0_t *module;
@@ -363,7 +364,8 @@ static opal_list_t *check_components(opal_list_t * components,
363364
if(NULL == coll_argv) {
364365
goto proceed_to_select;
365366
}
366-
int idx2, count_include = opal_argv_count(coll_argv);
367+
int idx2;
368+
count_include = opal_argv_count(coll_argv);
367369
/* Allocate the coll_include argv */
368370
coll_include = (char**)malloc((count_include + 1) * sizeof(char*));
369371
coll_include[count_include] = NULL; /* NULL terminated array */
@@ -385,15 +387,6 @@ static opal_list_t *check_components(opal_list_t * components,
385387
}
386388
coll_include[idx] = coll_argv[idx];
387389
}
388-
/* Reverse the order of the coll_inclide argv to faciliate the ordering of
389-
* the selected components reverse.
390-
*/
391-
for( idx2 = 0; idx2 < (count_include - 1); idx2++ ) {
392-
char* temp = coll_include[idx2];
393-
coll_include[idx2] = coll_include[count_include - 1];
394-
coll_include[count_include - 1] = temp;
395-
count_include--;
396-
}
397390
}
398391
proceed_to_select:
399392
/* Make a list of the components that query successfully */
@@ -453,14 +446,17 @@ static opal_list_t *check_components(opal_list_t * components,
453446

454447
/* For all valid component reorder them not on their provided priorities but on
455448
* the order requested in the info key. As at this point the coll_include is
456-
* already ordered backward we can simply prepend the components.
449+
* already ordered backward we can simply append the components.
450+
* Note that the last element in selectable will have the highest priorty.
457451
*/
458-
mca_coll_base_avail_coll_t *item, *item_next;
459-
OPAL_LIST_FOREACH_SAFE(item, item_next,
460-
selectable, mca_coll_base_avail_coll_t) {
461-
if( component_in_argv(coll_include, item->ac_component_name) ) {
462-
opal_list_remove_item(selectable, &item->super);
463-
opal_list_prepend(selectable, &item->super);
452+
for (int idx = count_include-1; idx >= 0; --idx) {
453+
mca_coll_base_avail_coll_t *item;
454+
OPAL_LIST_FOREACH(item, selectable, mca_coll_base_avail_coll_t) {
455+
if (0 == strcmp(item->ac_component_name, coll_include[idx])) {
456+
opal_list_remove_item(selectable, &item->super);
457+
opal_list_append(selectable, &item->super);
458+
break;
459+
}
464460
}
465461
}
466462

ompi/mca/coll/han/coll_han_component.c

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ ompi_coll_han_components available_components[COMPONENTS_COUNT] = {
3939
{ LIBNBC, "libnbc", NULL },
4040
{ TUNED, "tuned", NULL },
4141
{ SM, "sm", NULL },
42-
{ SHARED, "shared", NULL },
4342
{ ADAPT, "adapt", NULL },
4443
{ HAN, "han", NULL }
4544
};
@@ -179,12 +178,12 @@ static int han_register(void)
179178

180179
cs->han_bcast_low_module = 0;
181180
(void) mca_base_component_var_register(c, "bcast_low_module",
182-
"low level module for bcast, 0 sm, 1 solo",
181+
"low level module for bcast, 0 tuned, 1 sm",
183182
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
184183
OPAL_INFO_LVL_9,
185184
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_low_module);
186185

187-
cs->han_reduce_segsize = 524288;
186+
cs->han_reduce_segsize = 65536;
188187
(void) mca_base_component_var_register(c, "reduce_segsize",
189188
"segment size for reduce",
190189
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@@ -200,11 +199,11 @@ static int han_register(void)
200199

201200
cs->han_reduce_low_module = 0;
202201
(void) mca_base_component_var_register(c, "reduce_low_module",
203-
"low level module for allreduce, 0 sm, 1 shared",
202+
"low level module for allreduce, 0 tuned, 1 sm",
204203
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
205204
OPAL_INFO_LVL_9,
206205
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_low_module);
207-
cs->han_allreduce_segsize = 524288;
206+
cs->han_allreduce_segsize = 65536;
208207
(void) mca_base_component_var_register(c, "allreduce_segsize",
209208
"segment size for allreduce",
210209
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@@ -220,7 +219,7 @@ static int han_register(void)
220219

221220
cs->han_allreduce_low_module = 0;
222221
(void) mca_base_component_var_register(c, "allreduce_low_module",
223-
"low level module for allreduce, 0 sm, 1 shared",
222+
"low level module for allreduce, 0 tuned, 1 sm",
224223
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
225224
OPAL_INFO_LVL_9,
226225
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_low_module);
@@ -234,7 +233,7 @@ static int han_register(void)
234233

235234
cs->han_allgather_low_module = 0;
236235
(void) mca_base_component_var_register(c, "allgather_low_module",
237-
"low level module for allgather, 0 sm, 1 shared",
236+
"low level module for allgather, 0 tuned, 1 sm",
238237
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
239238
OPAL_INFO_LVL_9,
240239
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allgather_low_module);
@@ -248,7 +247,7 @@ static int han_register(void)
248247

249248
cs->han_gather_low_module = 0;
250249
(void) mca_base_component_var_register(c, "gather_low_module",
251-
"low level module for gather, 0 sm, 1 shared",
250+
"low level module for gather, 0 tuned, 1 sm",
252251
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
253252
OPAL_INFO_LVL_9,
254253
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_low_module);
@@ -262,7 +261,7 @@ static int han_register(void)
262261

263262
cs->han_scatter_low_module = 0;
264263
(void) mca_base_component_var_register(c, "scatter_low_module",
265-
"low level module for scatter, 0 sm, 1 shared",
264+
"low level module for scatter, 0 tuned, 1 sm",
266265
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
267266
OPAL_INFO_LVL_9,
268267
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_scatter_low_module);

ompi/mca/coll/han/coll_han_dynamic.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ typedef enum COMPONENTS {
102102
LIBNBC,
103103
TUNED,
104104
SM,
105-
SHARED,
106105
ADAPT,
107106
HAN,
108107
COMPONENTS_COUNT

ompi/mca/coll/han/coll_han_module.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ mca_coll_han_comm_query(struct ompi_communicator_t * comm, int *priority)
188188
/* Get the priority level attached to this module. If priority is less
189189
* than or equal to 0, then the module is unavailable. */
190190
*priority = mca_coll_han_component.han_priority;
191-
if (mca_coll_han_component.han_priority <= 0) {
191+
if (mca_coll_han_component.han_priority < 0) {
192192
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
193193
"coll:han:comm_query (%d/%s): priority too low; disqualifying myself",
194194
comm->c_contextid, comm->c_name);

ompi/mca/coll/han/coll_han_subcomms.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm,
258258
* Upgrade sm module priority to set up low_comms[0] with sm module
259259
* This sub-communicator contains the ranks that share my node.
260260
*/
261-
opal_info_set(&comm_info, "ompi_comm_coll_preference", "sm,^han");
261+
opal_info_set(&comm_info, "ompi_comm_coll_preference", "tuned,^han");
262262
ompi_comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0,
263263
&comm_info, &(low_comms[0]));
264264

@@ -272,7 +272,7 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm,
272272
* Upgrade shared module priority to set up low_comms[1] with shared module
273273
* This sub-communicator contains the ranks that share my node.
274274
*/
275-
opal_info_set(&comm_info, "ompi_comm_coll_preference", "shared,^han");
275+
opal_info_set(&comm_info, "ompi_comm_coll_preference", "sm,^han");
276276
ompi_comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0,
277277
&comm_info, &(low_comms[1]));
278278

ompi/mca/coll/sm/coll_sm_module.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,10 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority)
182182
/* Get the priority level attached to this module. If priority is less
183183
* than or equal to 0, then the module is unavailable. */
184184
*priority = mca_coll_sm_component.sm_priority;
185-
if (mca_coll_sm_component.sm_priority <= 0) {
185+
if (mca_coll_sm_component.sm_priority < 0) {
186186
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
187187
"coll:sm:comm_query (%d/%s): priority too low; disqualifying myself", comm->c_contextid, comm->c_name);
188-
return NULL;
188+
return NULL;
189189
}
190190

191191
sm_module = OBJ_NEW(mca_coll_sm_module_t);

0 commit comments

Comments
 (0)