@@ -172,7 +172,10 @@ mca_coll_han_reduce_intra(const void *sbuf,
172172 mca_coll_task_t * t_next_seg = OBJ_NEW (mca_coll_task_t );
173173 /* Setup up t_next_seg task arguments */
174174 t -> cur_task = t_next_seg ;
175- t -> sbuf = (char * ) t -> sbuf + extent * t -> seg_count ;
175+ if (t -> sbuf != MPI_IN_PLACE ) {
176+ t -> sbuf = (char * ) t -> sbuf + extent * t -> seg_count ;
177+ }
178+
176179 if (up_rank == root_up_rank ) {
177180 t -> rbuf = (char * ) t -> rbuf + extent * t -> seg_count ;
178181 }
@@ -242,6 +245,7 @@ int mca_coll_han_reduce_t1_task(void *task_args) {
242245 if (next_seg <= t -> num_segments - 1 ) {
243246 int tmp_count = t -> seg_count ;
244247 char * tmp_rbuf = NULL ;
248+ char * tmp_sbuf = NULL ;
245249 if (next_seg == t -> num_segments - 1 && t -> last_seg_count != t -> seg_count ) {
246250 tmp_count = t -> last_seg_count ;
247251 }
@@ -250,7 +254,10 @@ int mca_coll_han_reduce_t1_task(void *task_args) {
250254 } else if (NULL != t -> rbuf ) {
251255 tmp_rbuf = (char * )t -> rbuf + extent * t -> seg_count ;
252256 }
253- t -> low_comm -> c_coll -> coll_reduce ((char * ) t -> sbuf + extent * t -> seg_count ,
257+
258+ tmp_sbuf = (t -> sbuf == MPI_IN_PLACE ) ? MPI_IN_PLACE : (char * )t -> sbuf + extent * t -> seg_count ;
259+
260+ t -> low_comm -> c_coll -> coll_reduce ((char * ) tmp_sbuf ,
254261 (char * ) tmp_rbuf , tmp_count ,
255262 t -> dtype , t -> op , t -> root_low_rank , t -> low_comm ,
256263 t -> low_comm -> c_coll -> coll_reduce_module );
0 commit comments