@@ -836,7 +836,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
836836
837837
838838 for (l = 0 ;l < data -> procs_per_group ;l ++ ){
839- data -> disp_index [l ] = 1 ;
839+ data -> disp_index [l ] = 0 ;
840840
841841 if ( data -> max_disp_index [l ] == 0 ) {
842842 data -> blocklen_per_process [l ] = (int * ) calloc (INIT_LEN , sizeof (int ));
@@ -915,8 +915,8 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
915915 if (data -> bytes_remaining <= data -> bytes_to_write_in_cycle ) {
916916 /* The data fits completely into the block */
917917 if (aggregator == rank ) {
918- data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] = data -> bytes_remaining ;
919- data -> displs_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] =
918+ data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ]] = data -> bytes_remaining ;
919+ data -> displs_per_process [data -> n ][data -> disp_index [data -> n ]] =
920920 (ptrdiff_t )data -> global_iov_array [data -> sorted [data -> current_index ]].iov_base +
921921 (data -> global_iov_array [data -> sorted [data -> current_index ]].iov_len
922922 - data -> bytes_remaining );
@@ -950,11 +950,12 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
950950 /* the remaining data from the previous cycle is larger than the
951951 data->bytes_to_write_in_cycle, so we have to segment again */
952952 if (aggregator == rank ) {
953- data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] = data -> bytes_to_write_in_cycle ;
954- data -> displs_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] =
953+ data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ]] = data -> bytes_to_write_in_cycle ;
954+ data -> displs_per_process [data -> n ][data -> disp_index [data -> n ]] =
955955 (ptrdiff_t )data -> global_iov_array [data -> sorted [data -> current_index ]].iov_base +
956956 (data -> global_iov_array [data -> sorted [data -> current_index ]].iov_len
957957 - data -> bytes_remaining );
958+ data -> disp_index [data -> n ] += 1 ;
958959 }
959960
960961 if (data -> procs_in_group [data -> n ] == rank ) {
@@ -971,9 +972,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
971972 (MPI_Aint ) data -> global_iov_array [data -> sorted [data -> current_index ]].iov_len ) {
972973 /* This entry has more data than we can sendin one cycle */
973974 if (aggregator == rank ) {
974- data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] = data -> bytes_to_write_in_cycle ;
975- data -> displs_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] =
975+ data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ]] = data -> bytes_to_write_in_cycle ;
976+ data -> displs_per_process [data -> n ][data -> disp_index [data -> n ]] =
976977 (ptrdiff_t )data -> global_iov_array [data -> sorted [data -> current_index ]].iov_base ;
978+ data -> disp_index [data -> n ] += 1 ;
977979 }
978980 if (data -> procs_in_group [data -> n ] == rank ) {
979981 bytes_sent += data -> bytes_to_write_in_cycle ;
@@ -987,9 +989,9 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
987989 else {
988990 /* Next data entry is less than data->bytes_to_write_in_cycle */
989991 if (aggregator == rank ) {
990- data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] =
992+ data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ]] =
991993 data -> global_iov_array [data -> sorted [data -> current_index ]].iov_len ;
992- data -> displs_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] = (ptrdiff_t )
994+ data -> displs_per_process [data -> n ][data -> disp_index [data -> n ]] = (ptrdiff_t )
993995 data -> global_iov_array [data -> sorted [data -> current_index ]].iov_base ;
994996
995997 data -> disp_index [data -> n ] += 1 ;
0 commit comments