1616 * Author(s): Torsten Hoefler <[email protected] > 1717 * 
1818 */ 
19+ #include  "opal/include/opal/align.h" 
20+ 
1921#include  "nbc_internal.h" 
2022
2123/* an reduce_csttare schedule can not be cached easily because the contents 
@@ -40,7 +42,7 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i
4042                                     struct  mca_coll_base_module_2_1_0_t  * module ) {
4143  int  peer , rank , maxr , p , res , count ;
4244  MPI_Aint  ext ;
43-   ptrdiff_t  gap , span ;
45+   ptrdiff_t  gap , span ,  span_align ;
4446  char  * sbuf , inplace ;
4547  NBC_Schedule  * schedule ;
4648  NBC_Handle  * handle ;
@@ -84,14 +86,15 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i
8486  maxr  =  (int ) ceil  ((log ((double ) p ) / LOG2 ));
8587
8688  span  =  opal_datatype_span (& datatype -> super , count , & gap );
87-   handle -> tmpbuf  =  malloc  (span  *  2 );
89+   span_align  =  OPAL_ALIGN (span , datatype -> super .align , ptrdiff_t );
90+   handle -> tmpbuf  =  malloc  (span_align  +  span );
8891  if  (OPAL_UNLIKELY (NULL  ==  handle -> tmpbuf )) {
8992    NBC_Return_handle  (handle );
9093    return  OMPI_ERR_OUT_OF_RESOURCE ;
9194  }
9295
9396  rbuf  =  (char  * )(- gap );
94-   lbuf  =  (char  * )(span  -  gap );
97+   lbuf  =  (char  * )(span_align  -  gap );
9598
9699  schedule  =  OBJ_NEW (NBC_Schedule );
97100  if  (OPAL_UNLIKELY (NULL  ==  schedule )) {
@@ -205,7 +208,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
205208                                            struct  mca_coll_base_module_2_1_0_t  * module ) {
206209  int  rank , res , count , lsize , rsize ;
207210  MPI_Aint  ext ;
208-   ptrdiff_t  gap , span ;
211+   ptrdiff_t  gap , span ,  span_align ;
209212  NBC_Schedule  * schedule ;
210213  NBC_Handle  * handle ;
211214  ompi_coll_libnbc_module_t  * libnbc_module  =  (ompi_coll_libnbc_module_t * ) module ;
@@ -226,14 +229,15 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
226229  }
227230
228231  span  =  opal_datatype_span (& datatype -> super , count , & gap );
232+   span_align  =  OPAL_ALIGN (span , datatype -> super .align , ptrdiff_t );
229233
230234  res  =  NBC_Init_handle (comm , & handle , libnbc_module );
231235  if  (OPAL_UNLIKELY (OMPI_SUCCESS  !=  res )) {
232236    return  res ;
233237  }
234238
235239  if  (count  >  0 ) {
236-     handle -> tmpbuf  =  malloc  (2   *  span );
240+     handle -> tmpbuf  =  malloc  (span_align   +  span );
237241    if  (OPAL_UNLIKELY (NULL  ==  handle -> tmpbuf )) {
238242      NBC_Return_handle  (handle );
239243      return  OMPI_ERR_OUT_OF_RESOURCE ;
@@ -259,7 +263,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
259263  if  (0  ==  rank ) {
260264    char  * lbuf , * rbuf ;
261265    lbuf  =  (char  * )(- gap );
262-     rbuf  =  (char  * )(span - gap );
266+     rbuf  =  (char  * )(span_align - gap );
263267    res  =  NBC_Sched_recv  (lbuf , true, count , datatype , 0 , schedule , true);
264268    if  (OPAL_UNLIKELY (OMPI_SUCCESS  !=  res )) {
265269      NBC_Return_handle  (handle );
0 commit comments