@@ -569,7 +569,6 @@ void MPMesh::startCommunication(){
569569 Kokkos::deep_copy (numOwnersTot, owner_count);
570570 Kokkos::deep_copy (numHalosTot, halo_count);
571571 assert (numHalosTot+numOwnersTot == numEntities);
572- printf (" Rank %d owners %d halo %d\n " , self, numOwnersTot, numHalosTot);
573572 int num_ints_per_copy = 2 ;
574573
575574 // #Halo Cells/proc which are owners on other process
@@ -782,7 +781,7 @@ void MPMesh::reconstruct_coeff_full(){
782781 }
783782 };
784783 p_MPs->parallel_for (assemble, " assembly" );
785-
784+ Kokkos::fence ();
786785 pumipic::RecordTime (" Assemble Matrix Per Process" + std::to_string (self), timer.seconds ());
787786 // Mode 0 is Gather: Halos Send to Owners
788787 // Mode 1 is Scatter: Owners Send to Halos
@@ -840,7 +839,7 @@ void MPMesh::solveMatrix(const Kokkos::View<double**>& vtxMatrices, double& radi
840839 VtxCoeffs (vtx,i)=coeff[i];
841840 });
842841 this ->precomputedVtxCoeffs = VtxCoeffs;
843-
842+ Kokkos::fence ();
844843 pumipic::RecordTime (" SolveMatrix" + std::to_string (self), timer.seconds ());
845844}
846845
@@ -900,6 +899,7 @@ void MPMesh::reconstruct_full() {
900899 }
901900 };
902901 p_MPs->parallel_for (reconstruct, " reconstruct" );
902+ Kokkos::fence ();
903903 pumipic::RecordTime (" Assemble Field per process" + std::to_string (self), timer.seconds ());
904904
905905 timer.reset ();
@@ -916,20 +916,20 @@ void MPMesh::communicate_and_take_halo_contributions(const Kokkos::View<double**
916916
917917 Kokkos::Timer timer;
918918 auto reconVals_host = Kokkos::create_mirror_view_and_copy (Kokkos::HostSpace (), meshField);
919+ Kokkos::fence ();
919920 std::vector<std::vector<double >> fieldData (nEntities, std::vector<double >(numEntries, 0.0 ));
920921 for (int i = 0 ; i < nEntities; ++i) {
921922 for (int j = 0 ; j < numEntries; ++j) {
922923 fieldData[i][j] = reconVals_host (i, j);
923924 }
924925 }
925-
926+ pumipic::RecordTime (" Communication-GPU to CPU-E-" + std::to_string (numEntries) + " -" + std::to_string (self), timer.seconds ());
927+
928+ timer.reset ();
926929 std::vector<std::vector<int >> recvIDVec;
927930 std::vector<std::vector<double >> recvDataVec;
928- pumipic::RecordTime (" Communication-GPU to CPU-E-" + std::to_string (numEntries) + std::to_string (self), timer.seconds ());
929-
930- timer.reset ();
931931 communicateFields (fieldData, nEntities, numEntries, mode, recvIDVec, recvDataVec);
932- pumipic::RecordTime (" Communication-InterProcess-E-" + std::to_string (numEntries) + std::to_string (self), timer.seconds ());
932+ pumipic::RecordTime (" Communication-InterProcess-E-" + std::to_string (numEntries) + " - " + std::to_string (self), timer.seconds ());
933933
934934 timer.reset ();
935935 int numProcsTot = recvIDVec.size ();
@@ -964,13 +964,13 @@ void MPMesh::communicate_and_take_halo_contributions(const Kokkos::View<double**
964964 auto hostView_data= Kokkos::View<double *, Kokkos::HostSpace>(" recvDataCPU" , totalSize_data);
965965 std::copy (flatDataVec.begin (), flatDataVec.end (), hostView_data.data ());
966966 Kokkos::deep_copy (recvDataGPU, hostView_data);
967-
967+ Kokkos::fence ();
968968 // Assertions
969969 assert (totalSize_data == totalSize*numEntries);
970970 for (int i=0 ; i<numProcsTot; i++){
971971 assert (recvDataVec[i].size () == recvIDVec[i].size () * numEntries);
972972 }
973- pumipic::RecordTime (" Communication-CPU to GPU-E-" + std::to_string (numEntries) + std::to_string (self), timer.seconds ());
973+ pumipic::RecordTime (" Communication-CPU to GPU-E-" + std::to_string (numEntries) + " - " + std::to_string (self), timer.seconds ());
974974
975975 // Take contributions from other procs
976976 timer.reset ();
@@ -981,7 +981,8 @@ void MPMesh::communicate_and_take_halo_contributions(const Kokkos::View<double**
981981 if (op==1 ) meshField (vertex, k) = recvDataGPU (i * numEntries + k);
982982 }
983983 });
984- pumipic::RecordTime (" Communication-GPU reduction-E-" + std::to_string (numEntries) + std::to_string (self), timer.seconds ());
984+ Kokkos::fence ();
985+ pumipic::RecordTime (" Communication-GPU reduction-E-" + std::to_string (numEntries) + " -" + std::to_string (self), timer.seconds ());
985986
986987 if (p_MPs->getOpMode () != polyMPO::MP_DEBUG)
987988 return ;
0 commit comments