@@ -801,7 +801,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
801801 }
802802
803803 for (l = 0 ;l < data -> procs_per_group ;l ++ ){
804- data -> disp_index [l ] = 1 ;
804+ data -> disp_index [l ] = 0 ;
805805
806806 if (data -> max_disp_index [l ] == 0 ) {
807807 data -> blocklen_per_process [l ] = (int * ) calloc (INIT_LEN , sizeof (int ));
@@ -880,8 +880,8 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
880880 if (data -> bytes_remaining <= data -> bytes_to_write_in_cycle ) {
881881 /* The data fits completely into the block */
882882 if (aggregator == rank ) {
883- data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] = data -> bytes_remaining ;
884- data -> displs_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] =
883+ data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ]] = data -> bytes_remaining ;
884+ data -> displs_per_process [data -> n ][data -> disp_index [data -> n ]] =
885885 (ptrdiff_t )data -> global_iov_array [data -> sorted [data -> current_index ]].iov_base +
886886 (data -> global_iov_array [data -> sorted [data -> current_index ]].iov_len
887887 - data -> bytes_remaining );
@@ -914,11 +914,12 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
914914 /* the remaining data from the previous cycle is larger than the
915915 data->bytes_to_write_in_cycle, so we have to segment again */
916916 if (aggregator == rank ) {
917- data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] = data -> bytes_to_write_in_cycle ;
918- data -> displs_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] =
917+ data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ]] = data -> bytes_to_write_in_cycle ;
918+ data -> displs_per_process [data -> n ][data -> disp_index [data -> n ]] =
919919 (ptrdiff_t )data -> global_iov_array [data -> sorted [data -> current_index ]].iov_base +
920920 (data -> global_iov_array [data -> sorted [data -> current_index ]].iov_len
921921 - data -> bytes_remaining );
922+ data -> disp_index [data -> n ] += 1 ;
922923 }
923924
924925 if (data -> procs_in_group [data -> n ] == rank ) {
@@ -935,9 +936,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
935936 (MPI_Aint ) data -> global_iov_array [data -> sorted [data -> current_index ]].iov_len ) {
936937 /* This entry has more data than we can sendin one cycle */
937938 if (aggregator == rank ) {
938- data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] = data -> bytes_to_write_in_cycle ;
939- data -> displs_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] =
939+ data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ]] = data -> bytes_to_write_in_cycle ;
940+ data -> displs_per_process [data -> n ][data -> disp_index [data -> n ]] =
940941 (ptrdiff_t )data -> global_iov_array [data -> sorted [data -> current_index ]].iov_base ;
942+ data -> disp_index [data -> n ] += 1 ;
941943 }
942944 if (data -> procs_in_group [data -> n ] == rank ) {
943945 bytes_sent += data -> bytes_to_write_in_cycle ;
@@ -951,9 +953,9 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
951953 else {
952954 /* Next data entry is less than data->bytes_to_write_in_cycle */
953955 if (aggregator == rank ) {
954- data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] =
956+ data -> blocklen_per_process [data -> n ][data -> disp_index [data -> n ]] =
955957 data -> global_iov_array [data -> sorted [data -> current_index ]].iov_len ;
956- data -> displs_per_process [data -> n ][data -> disp_index [data -> n ] - 1 ] = (ptrdiff_t )
958+ data -> displs_per_process [data -> n ][data -> disp_index [data -> n ]] = (ptrdiff_t )
957959 data -> global_iov_array [data -> sorted [data -> current_index ]].iov_base ;
958960
959961 data -> disp_index [data -> n ] += 1 ;
0 commit comments