@@ -201,6 +201,7 @@ ompi_coll_base_alltoallv_intra_pairwise(const void *sbuf, const int *scounts, co
201201 int line = -1 , err = 0 , rank , size , step = 0 , sendto , recvfrom ;
202202 size_t sdtype_size , rdtype_size ;
203203 void * psnd , * prcv ;
204+ ompi_request_t * req ;
204205 ptrdiff_t sext , rext ;
205206
206207 if (MPI_IN_PLACE == sbuf ) {
@@ -217,16 +218,12 @@ ompi_coll_base_alltoallv_intra_pairwise(const void *sbuf, const int *scounts, co
217218 ompi_datatype_type_size (sdtype , & sdtype_size );
218219 ompi_datatype_type_size (rdtype , & rdtype_size );
219220
220- if (0 == sdtype_size || 0 == rdtype_size ) {
221- /* Nothing to exchange */
222- return MPI_SUCCESS ;
223- }
224-
225221 ompi_datatype_type_extent (sdtype , & sext );
226222 ompi_datatype_type_extent (rdtype , & rext );
227223
228224 /* Perform pairwise exchange starting from 1 since local exchange is done */
229225 for (step = 0 ; step < size ; step ++ ) {
226+ req = MPI_REQUEST_NULL ;
230227
231228 /* Determine sender and receiver for this step. */
232229 sendto = (rank + step ) % size ;
@@ -237,12 +234,31 @@ ompi_coll_base_alltoallv_intra_pairwise(const void *sbuf, const int *scounts, co
237234 prcv = (char * )rbuf + (ptrdiff_t )rdisps [recvfrom ] * rext ;
238235
239236 /* send and receive */
240- err = ompi_coll_base_sendrecv ( psnd , scounts [sendto ], sdtype , sendto ,
241- MCA_COLL_BASE_TAG_ALLTOALLV ,
242- prcv , rcounts [recvfrom ], rdtype , recvfrom ,
243- MCA_COLL_BASE_TAG_ALLTOALLV ,
244- comm , MPI_STATUS_IGNORE , rank );
245- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
237+ if (0 < rcounts [recvfrom ] && 0 < rdtype_size ) {
238+ err = MCA_PML_CALL (irecv (prcv , rcounts [recvfrom ], rdtype , recvfrom ,
239+ MCA_COLL_BASE_TAG_ALLTOALLV , comm , & req ));
240+ if (MPI_SUCCESS != err ) {
241+ line = __LINE__ ;
242+ goto err_hndl ;
243+ }
244+ }
245+
246+ if (0 < scounts [sendto ] && 0 < sdtype_size ) {
247+ err = MCA_PML_CALL (send (psnd , scounts [sendto ], sdtype , sendto ,
248+ MCA_COLL_BASE_TAG_ALLTOALLV , MCA_PML_BASE_SEND_STANDARD , comm ));
249+ if (MPI_SUCCESS != err ) {
250+ line = __LINE__ ;
251+ goto err_hndl ;
252+ }
253+ }
254+
255+ if (MPI_REQUEST_NULL != req ) {
256+ err = ompi_request_wait (& req , MPI_STATUS_IGNORE );
257+ if (MPI_SUCCESS != err ) {
258+ line = __LINE__ ;
259+ goto err_hndl ;
260+ }
261+ }
246262 }
247263
248264 return MPI_SUCCESS ;
@@ -293,18 +309,13 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
293309 ompi_datatype_type_size (rdtype , & rdtype_size );
294310 ompi_datatype_type_size (sdtype , & sdtype_size );
295311
296- if (0 == rdtype_size || 0 == sdtype_size ) {
297- /* Nothing to exchange */
298- return MPI_SUCCESS ;
299- }
300-
301312 ompi_datatype_type_extent (sdtype , & sext );
302313 ompi_datatype_type_extent (rdtype , & rext );
303314
304315 /* Simple optimization - handle send to self first */
305316 psnd = ((char * ) sbuf ) + (ptrdiff_t )sdisps [rank ] * sext ;
306317 prcv = ((char * ) rbuf ) + (ptrdiff_t )rdisps [rank ] * rext ;
307- if (0 < scounts [rank ]) {
318+ if (0 < scounts [rank ] && 0 < sdtype_size ) {
308319 err = ompi_datatype_sndrcv (psnd , scounts [rank ], sdtype ,
309320 prcv , rcounts [rank ], rdtype );
310321 if (MPI_SUCCESS != err ) {
@@ -328,7 +339,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
328339 continue ;
329340 }
330341
331- if (0 < rcounts [i ]) {
342+ if (0 < rcounts [i ] && 0 < rdtype_size ) {
332343 ++ nreqs ;
333344 prcv = ((char * ) rbuf ) + (ptrdiff_t )rdisps [i ] * rext ;
334345 err = MCA_PML_CALL (irecv_init (prcv , rcounts [i ], rdtype ,
@@ -344,7 +355,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
344355 continue ;
345356 }
346357
347- if (0 < scounts [i ]) {
358+ if (0 < scounts [i ] && 0 < sdtype_size ) {
348359 ++ nreqs ;
349360 psnd = ((char * ) sbuf ) + (ptrdiff_t )sdisps [i ] * sext ;
350361 err = MCA_PML_CALL (isend_init (psnd , scounts [i ], sdtype ,
0 commit comments