Skip to content

Commit 98d0ecf

Browse files
authored
Merge pull request #6814 from brminich/tuned_all2all_select
COLL/TUNED: Update alltoall selection rule for mellanox platform
2 parents 888f3ec + 65618f8 commit 98d0ecf

File tree

5 files changed

+41
-7
lines changed

5 files changed

+41
-7
lines changed

contrib/platform/mellanox/optimized.conf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,8 @@ opal_event_include=epoll
7878

7979
bml_r2_show_unreach_errors = 0
8080

81+
# alltoall algorithm selection settings for tuned coll mca
82+
coll_tuned_alltoall_large_msg = 250000
83+
coll_tuned_alltoall_min_procs = 2048
84+
coll_tuned_alltoall_algorithm_max_requests = 8
85+

ompi/mca/coll/tuned/coll_tuned.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ extern int ompi_coll_tuned_init_chain_fanout;
3838
extern int ompi_coll_tuned_init_max_requests;
3939
extern int ompi_coll_tuned_alltoall_small_msg;
4040
extern int ompi_coll_tuned_alltoall_intermediate_msg;
41+
extern int ompi_coll_tuned_alltoall_large_msg;
42+
extern int ompi_coll_tuned_alltoall_min_procs;
43+
extern int ompi_coll_tuned_alltoall_max_requests;
4144

4245
/* forced algorithm choices */
4346
/* this structure is for storing the indexes to the forced algorithm mca params... */

ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
/* alltoall algorithm variables */
2929
static int coll_tuned_alltoall_forced_algorithm = 0;
3030
static int coll_tuned_alltoall_segment_size = 0;
31-
static int coll_tuned_alltoall_max_requests;
3231
static int coll_tuned_alltoall_tree_fanout;
3332
static int coll_tuned_alltoall_chain_fanout;
3433

@@ -115,25 +114,39 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm
115114
MCA_BASE_VAR_SCOPE_ALL,
116115
&coll_tuned_alltoall_chain_fanout);
117116

118-
coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */
117+
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
118+
"alltoall_large_msg",
119+
"use pairwise exchange algorithm for messages larger than this value",
120+
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
121+
OPAL_INFO_LVL_6,
122+
MCA_BASE_VAR_SCOPE_READONLY,
123+
&ompi_coll_tuned_alltoall_large_msg);
124+
125+
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
126+
"alltoall_min_procs",
127+
"use pairwise exchange algorithm for communicators larger than this value",
128+
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
129+
OPAL_INFO_LVL_6,
130+
MCA_BASE_VAR_SCOPE_READONLY,
131+
&ompi_coll_tuned_alltoall_min_procs);
132+
119133
mca_param_indices->max_requests_param_index =
120134
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
121135
"alltoall_algorithm_max_requests",
122136
"Maximum number of outstanding send or recv requests. Only has meaning for synchronized algorithms.",
123137
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
124138
OPAL_INFO_LVL_5,
125139
MCA_BASE_VAR_SCOPE_ALL,
126-
&coll_tuned_alltoall_max_requests);
140+
&ompi_coll_tuned_alltoall_max_requests);
127141
if (mca_param_indices->max_requests_param_index < 0) {
128142
return mca_param_indices->max_requests_param_index;
129143
}
130144

131-
if (coll_tuned_alltoall_max_requests < 0) {
145+
if (ompi_coll_tuned_alltoall_max_requests < 0) {
132146
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
133-
opal_output( 0, "Maximum outstanding requests must be positive number greater than 1. Switching to system level default %d \n",
134-
ompi_coll_tuned_init_max_requests );
147+
opal_output( 0, "Maximum outstanding requests must be positive number greater than 1. Switching to 0 \n");
135148
}
136-
coll_tuned_alltoall_max_requests = 0;
149+
ompi_coll_tuned_alltoall_max_requests = 0;
137150
}
138151

139152
return (MPI_SUCCESS);

ompi/mca/coll/tuned/coll_tuned_component.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,13 @@ int ompi_coll_tuned_init_max_requests = 128;
5757
int ompi_coll_tuned_alltoall_small_msg = 200;
5858
int ompi_coll_tuned_alltoall_intermediate_msg = 3000;
5959

60+
/* Set it to the same value as intermediate msg by default, so it does not affect
61+
* default algorithm selection. Changing this value will force using linear with
62+
* sync algorithm on certain message sizes. */
63+
int ompi_coll_tuned_alltoall_large_msg = 3000;
64+
int ompi_coll_tuned_alltoall_min_procs = 0; /* disable by default */
65+
int ompi_coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */
66+
6067
/* forced alogrithm variables */
6168
/* indices for the MCA parameters */
6269
coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT] = {{0}};

ompi/mca/coll/tuned/coll_tuned_decision_fixed.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(const void *sbuf, int scount,
136136
return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype,
137137
rbuf, rcount, rdtype,
138138
comm, module);
139+
} else if ((block_dsize < (size_t) ompi_coll_tuned_alltoall_large_msg) &&
140+
(communicator_size <= ompi_coll_tuned_alltoall_min_procs)) {
141+
return ompi_coll_base_alltoall_intra_linear_sync(sbuf, scount, sdtype,
142+
rbuf, rcount, rdtype,
143+
comm, module,
144+
ompi_coll_tuned_alltoall_max_requests);
139145
}
140146

141147
return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype,

0 commit comments

Comments
 (0)