@@ -87,18 +87,19 @@ int mca_coll_spacc_allreduce_intra_redscat_allgather(
8787 int comm_size = ompi_comm_size (comm );
8888 int rank = ompi_comm_rank (comm );
8989
90- OPAL_OUTPUT (( ompi_coll_spacc_stream ,
91- "coll:spacc:allreduce_intra_redscat_allgather: rank %d/%d" ,
92- rank , comm_size ) );
90+ opal_output_verbose ( 30 , mca_coll_spacc_stream ,
91+ "coll:spacc:allreduce_intra_redscat_allgather: rank %d/%d" ,
92+ rank , comm_size );
9393
9494 /* Find nearest power-of-two less than or equal to comm_size */
9595 int nsteps = opal_hibit (comm_size , comm -> c_cube_dim + 1 ); /* ilog2(comm_size) */
96+ assert (nsteps >= 0 );
9697 int nprocs_pof2 = 1 << nsteps ; /* flp2(comm_size) */
9798
9899 if (count < nprocs_pof2 || !ompi_op_is_commute (op )) {
99- OPAL_OUTPUT (( ompi_coll_spacc_stream ,
100- "coll:spacc:allreduce_intra_redscat_allgather: rank %d/%d count %d switching to base allreduce" ,
101- rank , comm_size , count ) );
100+ opal_output_verbose ( 20 , mca_coll_spacc_stream ,
101+ "coll:spacc:allreduce_intra_redscat_allgather: rank %d/%d count %d switching to base allreduce" ,
102+ rank , comm_size , count );
102103 return ompi_coll_base_allreduce_intra_basic_linear (sbuf , rbuf , count , dtype ,
103104 op , comm , module );
104105 }
@@ -275,27 +276,27 @@ int mca_coll_spacc_allreduce_intra_redscat_allgather(
275276 rcount [step ], dtype );
276277
277278 /* Move the current window to the received message */
278- rindex [step + 1 ] = rindex [step ];
279- sindex [step + 1 ] = rindex [step ];
280- wsize = rcount [step ];
281- step ++ ;
279+ if (step + 1 < nsteps ) {
280+ rindex [step + 1 ] = rindex [step ];
281+ sindex [step + 1 ] = rindex [step ];
282+ wsize = rcount [step ];
283+ step ++ ;
284+ }
282285 }
283- }
284- /*
285- * Assertion: each process has 1 / p' of the total reduction result:
286- * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...].
287- */
288-
289- /*
290- * Step 3. Allgather by the recursive doubling algorithm.
291- * Each process has 1 / p' of the total reduction result:
292- * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...].
293- * All exchanges are executed in reverse order relative
294- * to recursive doubling (previous step).
295- */
296-
297- if (vrank != -1 ) {
298- step = nsteps - 1 ; /* step = ilog2(p') - 1 */
286+ /*
287+ * Assertion: each process has 1 / p' of the total reduction result:
288+ * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...].
289+ */
290+
291+ /*
292+ * Step 3. Allgather by the recursive doubling algorithm.
293+ * Each process has 1 / p' of the total reduction result:
294+ * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...].
295+ * All exchanges are executed in reverse order relative
296+ * to recursive doubling (previous step).
297+ */
298+
299+ step -- ;
299300
300301 for (int mask = nprocs_pof2 >> 1 ; mask > 0 ; mask >>= 1 ) {
301302 int vdest = vrank ^ mask ;
0 commit comments