11/* ---------------------------------------------------------------- */
2- /* (C)Copyright IBM Corp. 2007, 2008 */
2+ /* (C)Copyright IBM Corp. 2007, 2008, 2019 */
33/* ---------------------------------------------------------------- */
44/**
55 * \file ad_gpfs_aggrs.c
@@ -663,16 +663,6 @@ void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs,
663663 /* Parameters for MPI_Alltoallv */
664664 int * scounts , * sdispls , * rcounts , * rdispls ;
665665
666- /* Parameters for MPI_Alltoallv. These are the buffers, which
667- * are later computed to be the lowest address of all buffers
668- * to be sent/received for offsets and lengths. Initialize to
669- * the highest possible address which is the current minimum.
670- */
671- void * sendBufForOffsets = (void * )0xFFFFFFFFFFFFFFFF ,
672- * sendBufForLens = (void * )0xFFFFFFFFFFFFFFFF ,
673- * recvBufForOffsets = (void * )0xFFFFFFFFFFFFFFFF ,
674- * recvBufForLens = (void * )0xFFFFFFFFFFFFFFFF ;
675-
676666/* first find out how much to send/recv and from/to whom */
677667#ifdef AGGREGATION_PROFILE
678668 MPE_Log_event (5026 , 0 , NULL );
@@ -719,11 +709,6 @@ void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs,
719709 others_req [i ].lens =
720710 ADIOI_Malloc (count_others_req_per_proc [i ]* sizeof (ADIO_Offset ));
721711
722- if ( (MPIU_Upint )others_req [i ].offsets < (MPIU_Upint )recvBufForOffsets )
723- recvBufForOffsets = others_req [i ].offsets ;
724- if ( (MPIU_Upint )others_req [i ].lens < (MPIU_Upint )recvBufForLens )
725- recvBufForLens = others_req [i ].lens ;
726-
727712 others_req [i ].mem_ptrs = (MPI_Aint * )
728713 ADIOI_Malloc (count_others_req_per_proc [i ]* sizeof (MPI_Aint ));
729714
@@ -736,102 +721,88 @@ void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs,
736721 others_req [i ].lens = NULL ;
737722 }
738723 }
739- /* If no recv buffer was allocated in the loop above, make it NULL */
740- if ( recvBufForOffsets == (void * )0xFFFFFFFFFFFFFFFF ) recvBufForOffsets = NULL ;
741- if ( recvBufForLens == (void * )0xFFFFFFFFFFFFFFFF ) recvBufForLens = NULL ;
742724
743725 /* Now send the calculated offsets and lengths to respective processes */
744726
745727 /************************/
746728 /* Exchange the offsets */
747729 /************************/
748730
749- /* Determine the lowest sendBufForOffsets/Lens */
750- for (i = 0 ; i < nprocs ; i ++ )
751- {
752- if ( (my_req [i ].count ) &&
753- ((MPIU_Upint )my_req [i ].offsets <= (MPIU_Upint )sendBufForOffsets ) )
754- {
755- sendBufForOffsets = my_req [i ].offsets ;
756- }
757-
758- if ( (my_req [i ].count ) &&
759- ((MPIU_Upint )my_req [i ].lens <= (MPIU_Upint )sendBufForLens ) )
760- {
761- sendBufForLens = my_req [i ].lens ;
762- }
763- }
731+ // Figure out the layout for the sendbuf and recvbuf.
732+ // scounts[] and sdisps[] / rcounts[] and rdisps[] define the layout,
733+ // and the data for each section will come from from my_req[i].offsets
734+ // or others_req[i].offsets.
764735
765- /* If no send buffer was found in the loop above, make it NULL */
766- if ( sendBufForOffsets == (void * )0xFFFFFFFFFFFFFFFF ) sendBufForOffsets = NULL ;
767- if ( sendBufForLens == (void * )0xFFFFFFFFFFFFFFFF ) sendBufForLens = NULL ;
768-
769- /* Calculate the displacements from the sendBufForOffsets/Lens */
736+ int scount_total = 0 ;
737+ int rcount_total = 0 ;
770738 for (i = 0 ; i < nprocs ; i ++ )
771739 {
772740 /* Send these offsets to process i.*/
773741 scounts [i ] = count_my_req_per_proc [i ];
774- if ( scounts [i ] == 0 )
775- sdispls [i ] = 0 ;
776- else
777- sdispls [i ] = (int )
778- ( ( (MPIU_Upint )my_req [i ].offsets -
779- (MPIU_Upint )sendBufForOffsets ) /
780- (MPIU_Upint )sizeof (ADIO_Offset ) );
742+ sdispls [i ] = scount_total ;
743+ scount_total += scounts [i ];
781744
782745 /* Receive these offsets from process i.*/
783746 rcounts [i ] = count_others_req_per_proc [i ];
784- if ( rcounts [i ] == 0 )
785- rdispls [i ] = 0 ;
786- else
787- rdispls [i ] = (int )
788- ( ( (MPIU_Upint )others_req [i ].offsets -
789- (MPIU_Upint )recvBufForOffsets ) /
790- (MPIU_Upint )sizeof (ADIO_Offset ) );
747+ rdispls [i ] = rcount_total ;
748+ rcount_total += rcounts [i ];
749+ }
750+
751+ void * sbuf_copy_of_req_info ;
752+ void * rbuf_copy_of_req_info ;
753+
754+ sbuf_copy_of_req_info = (ADIO_Offset * ) ADIOI_Malloc (scount_total * sizeof (ADIO_Offset ));
755+ rbuf_copy_of_req_info = (ADIO_Offset * ) ADIOI_Malloc (rcount_total * sizeof (ADIO_Offset ));
756+ for (i = 0 ; i < nprocs ; i ++ )
757+ {
758+ // I haven't timed it, I'm just assuming a memcpy(,,0) is fast for
759+ // the entries that don't have data to contribute so I didn't bother
760+ // with an 'if' statement
761+ memcpy (sbuf_copy_of_req_info + sdispls [i ] * sizeof (ADIO_Offset ),
762+ my_req [i ].offsets ,
763+ scounts [i ] * sizeof (ADIO_Offset ));
791764 }
792765
793766 /* Exchange the offsets */
794- MPI_Alltoallv (sendBufForOffsets ,
767+ MPI_Alltoallv (sbuf_copy_of_req_info ,
795768 scounts , sdispls , ADIO_OFFSET ,
796- recvBufForOffsets ,
769+ rbuf_copy_of_req_info ,
797770 rcounts , rdispls , ADIO_OFFSET ,
798771 fd -> comm );
772+ for (i = 0 ; i < nprocs ; i ++ )
773+ {
774+ memcpy (others_req [i ].offsets ,
775+ rbuf_copy_of_req_info + rdispls [i ] * sizeof (ADIO_Offset ),
776+ rcounts [i ] * sizeof (ADIO_Offset ));
777+ }
799778
800779 /************************/
801780 /* Exchange the lengths */
802781 /************************/
803782
804783 for (i = 0 ; i < nprocs ; i ++ )
805784 {
806- /* Send these lengths to process i.*/
807- scounts [i ] = count_my_req_per_proc [i ];
808- if ( scounts [i ] == 0 )
809- sdispls [i ] = 0 ;
810- else
811- sdispls [i ] = (int )
812- ( ( (MPIU_Upint )my_req [i ].lens -
813- (MPIU_Upint )sendBufForLens ) /
814- (MPIU_Upint ) sizeof (ADIO_Offset ) );
815-
816- /* Receive these offsets from process i. */
817- rcounts [i ] = count_others_req_per_proc [i ];
818- if ( rcounts [i ] == 0 )
819- rdispls [i ] = 0 ;
820- else
821- rdispls [i ] = (int )
822- ( ( (MPIU_Upint )others_req [i ].lens -
823- (MPIU_Upint )recvBufForLens ) /
824- (MPIU_Upint ) sizeof (ADIO_Offset ) );
785+ memcpy (sbuf_copy_of_req_info + sdispls [i ] * sizeof (ADIO_Offset ),
786+ my_req [i ].lens ,
787+ scounts [i ] * sizeof (ADIO_Offset ));
825788 }
826789
827790 /* Exchange the lengths */
828- MPI_Alltoallv (sendBufForLens ,
791+ MPI_Alltoallv (sbuf_copy_of_req_info ,
829792 scounts , sdispls , ADIO_OFFSET ,
830- recvBufForLens ,
793+ rbuf_copy_of_req_info ,
831794 rcounts , rdispls , ADIO_OFFSET ,
832795 fd -> comm );
796+ for (i = 0 ; i < nprocs ; i ++ )
797+ {
798+ memcpy (others_req [i ].lens ,
799+ rbuf_copy_of_req_info + rdispls [i ] * sizeof (ADIO_Offset ),
800+ rcounts [i ] * sizeof (ADIO_Offset ));
801+ }
833802
834803 /* Clean up */
804+ ADIOI_Free (sbuf_copy_of_req_info );
805+ ADIOI_Free (rbuf_copy_of_req_info );
835806 ADIOI_Free (count_others_req_per_proc );
836807 ADIOI_Free (scounts );
837808 ADIOI_Free (sdispls );
0 commit comments