1414 *
1515 */
1616
17+ #include "opal/include/opal/align.h"
1718#include "ompi/op/op.h"
1819
1920#include "nbc_internal.h"
2021
21- static inline int red_sched_binomial (int rank , int p , int root , const void * sendbuf , void * redbuf , int count , MPI_Datatype datatype ,
22+ static inline int red_sched_binomial (int rank , int p , int root , const void * sendbuf , void * redbuf , char tmpredbuf , int count , MPI_Datatype datatype ,
2223 MPI_Op op , char inplace , NBC_Schedule * schedule , NBC_Handle * handle );
2324static inline int red_sched_chain (int rank , int p , int root , const void * sendbuf , void * recvbuf , int count , MPI_Datatype datatype ,
2425 MPI_Op op , int ext , size_t size , NBC_Schedule * schedule , NBC_Handle * handle , int fragsize );
@@ -55,6 +56,7 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
5556 MPI_Aint ext ;
5657 NBC_Schedule * schedule ;
5758 char * redbuf = NULL , inplace ;
59+ char tmpredbuf = 0 ;
5860 enum { NBC_RED_BINOMIAL , NBC_RED_CHAIN } alg ;
5961 NBC_Handle * handle ;
6062 ompi_coll_libnbc_module_t * libnbc_module = (ompi_coll_libnbc_module_t * ) module ;
@@ -104,8 +106,10 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
104106 redbuf = recvbuf ;
105107 } else {
106108 /* recvbuf may not be valid on non-root nodes */
107- handle -> tmpbuf = malloc (2 * span );
108- redbuf = (char * ) handle -> tmpbuf + span - gap ;
109+ ptrdiff_t span_align = OPAL_ALIGN (span , datatype -> super .align , ptrdiff_t );
110+ handle -> tmpbuf = malloc (span_align + span );
111+ redbuf = (char * )span_align - gap ;
112+ tmpredbuf = 1 ;
109113 }
110114 } else {
111115 handle -> tmpbuf = malloc (span );
@@ -142,7 +146,7 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
142146
143147 switch (alg ) {
144148 case NBC_RED_BINOMIAL :
145- res = red_sched_binomial (rank , p , root , sendbuf , redbuf , count , datatype , op , inplace , schedule , handle );
149+ res = red_sched_binomial (rank , p , root , sendbuf , redbuf , tmpredbuf , count , datatype , op , inplace , schedule , handle );
146150 break ;
147151 case NBC_RED_CHAIN :
148152 res = red_sched_chain (rank , p , root , sendbuf , recvbuf , count , datatype , op , ext , size , schedule , handle , segsize );
@@ -289,10 +293,10 @@ int ompi_coll_libnbc_ireduce_inter(const void* sendbuf, void* recvbuf, int count
289293 if (vrank == 0) rank = root; \
290294 if (vrank == root) rank = 0; \
291295}
292- static inline int red_sched_binomial (int rank , int p , int root , const void * sendbuf , void * redbuf , int count , MPI_Datatype datatype ,
296+ static inline int red_sched_binomial (int rank , int p , int root , const void * sendbuf , void * redbuf , char tmpredbuf , int count , MPI_Datatype datatype ,
293297 MPI_Op op , char inplace , NBC_Schedule * schedule , NBC_Handle * handle ) {
294298 int vroot , vrank , vpeer , peer , res , maxr ;
295- char * rbuf , * lbuf , * buf ;
299+ char * rbuf , * lbuf , * buf , tmpbuf ;
296300 int tmprbuf , tmplbuf ;
297301 ptrdiff_t gap ;
298302 (void )opal_datatype_span (& datatype -> super , count , & gap );
@@ -310,12 +314,12 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen
310314 rbuf = (void * )(- gap );
311315 tmprbuf = true;
312316 lbuf = redbuf ;
313- tmplbuf = false ;
317+ tmplbuf = tmpredbuf ;
314318 } else {
315319 lbuf = (void * )(- gap );
316320 tmplbuf = true;
317321 rbuf = redbuf ;
318- tmprbuf = false ;
322+ tmprbuf = tmpredbuf ;
319323 if (inplace ) {
320324 res = NBC_Copy (rbuf , count , datatype , ((char * )handle -> tmpbuf )- gap , count , datatype , MPI_COMM_SELF );
321325 if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
@@ -352,7 +356,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen
352356 }
353357 /* swap left and right buffers */
354358 buf = rbuf ; rbuf = lbuf ; lbuf = buf ;
355- tmprbuf ^= 1 ; tmplbuf ^= 1 ;
359+ tmpbuf = tmprbuf ; tmprbuf = tmplbuf ; tmplbuf = tmpbuf ;
356360 }
357361 } else {
358362 /* we have to send this round */
@@ -377,9 +381,9 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen
377381 /* send to root if vroot ! root */
378382 if (vroot != root ) {
379383 if (0 == rank ) {
380- res = NBC_Sched_send (redbuf , false , count , datatype , root , schedule , false);
384+ res = NBC_Sched_send (redbuf , tmpredbuf , count , datatype , root , schedule , false);
381385 } else if (root == rank ) {
382- res = NBC_Sched_recv (redbuf , false , count , datatype , vroot , schedule , false);
386+ res = NBC_Sched_recv (redbuf , tmpredbuf , count , datatype , vroot , schedule , false);
383387 }
384388 }
385389
0 commit comments