Skip to content

Commit a591f11

Browse files
committed
Multi process debugging
1 parent 5da1001 commit a591f11

File tree

3 files changed

+59
-12
lines changed

3 files changed

+59
-12
lines changed

src/pmpo_MPMesh.cpp

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,12 @@ void MPMesh::CVTTrackingElmCenterBased(const int printVTPIndex){
127127
auto MPs2Elm = p_MPs->getData<MPF_Tgt_Elm_ID>();
128128
auto MPs2Proc = p_MPs->getData<MPF_Tgt_Proc_ID>();
129129
auto elm2Process = p_mesh->getElm2Process();
130-
130+
131+
Kokkos::parallel_for("countProcess", numElms, KOKKOS_LAMBDA(const int iElm){
132+
int pp_id=elm2Process(iElm);
133+
printf("Mesh elm %d owning element %d \n", iElm, pp_id);
134+
});
135+
131136
if(printVTPIndex>=0) {
132137
printVTP_mesh(printVTPIndex);
133138
}
@@ -324,15 +329,37 @@ bool getAnyIsMigrating(MaterialPoints* p_MPs, bool isMigrating) {
324329
}
325330

326331
void MPMesh::push(){
332+
static int count=0;
333+
std::cout<<__FUNCTION__<<" "<<count<<std::endl;
334+
count++;
335+
if(count>1) exit(1);
327336
Kokkos::Timer timer;
328337
p_mesh->computeRotLatLonIncr();
338+
339+
assert(cudaDeviceSynchronize() == cudaSuccess);
340+
MPI_Barrier(MPI_COMM_WORLD);
341+
printf("FooPush\n");
342+
329343
sphericalInterpolation<MeshF_RotLatLonIncr>(*this);
344+
assert(cudaDeviceSynchronize() == cudaSuccess);
345+
MPI_Barrier(MPI_COMM_WORLD);
346+
printf("FooPush \n");
347+
330348
p_MPs->updateRotLatLonAndXYZ2Tgt(p_mesh->getSphereRadius()); // set Tgt_XYZ
349+
assert(cudaDeviceSynchronize() == cudaSuccess);
350+
MPI_Barrier(MPI_COMM_WORLD);
351+
printf("FooPush\n");
352+
331353
auto elm2Process = p_mesh->getElm2Process();
354+
assert(cudaDeviceSynchronize() == cudaSuccess);
355+
MPI_Barrier(MPI_COMM_WORLD);
356+
printf("FooPush \n");
332357

333358
bool anyIsMigrating = false;
334359
do {
335360
CVTTrackingElmCenterBased(); // move to Tgt_XYZ
361+
assert(cudaDeviceSynchronize() == cudaSuccess);
362+
printf("FooPushInside\n");
336363
p_MPs->updateMPSlice<MPF_Cur_Pos_XYZ, MPF_Tgt_Pos_XYZ>(); // Tgt_XYZ becomes Cur_XYZ
337364
p_MPs->updateMPSlice<MPF_Cur_Pos_Rot_Lat_Lon, MPF_Tgt_Pos_Rot_Lat_Lon>(); // Tgt becomes Cur
338365
if (elm2Process.size() > 0)
@@ -343,6 +370,9 @@ void MPMesh::push(){
343370
reconstructSlices();
344371
}
345372
while (anyIsMigrating);
373+
assert(cudaDeviceSynchronize() == cudaSuccess);
374+
MPI_Barrier(MPI_COMM_WORLD);
375+
printf("FooPush\n");
346376

347377
pumipic::RecordTime("PolyMPO_push", timer.seconds());
348378
}

src/pmpo_MPMesh_assembly.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ void MPMesh::resetPreComputeFlag(){
104104
}
105105

106106
void MPMesh::computeMatricesAndSolve(){
107-
107+
Kokkos::Timer timer;
108108
//Mesh Information
109109
auto elm2VtxConn = p_mesh->getElm2VtxConn();
110110
int numVtx = p_mesh->getNumVertices();
@@ -205,11 +205,12 @@ void MPMesh::computeMatricesAndSolve(){
205205
VtxCoeffs(vtx,i)=coeff[i];
206206
});
207207
this->precomputedVtxCoeffs = VtxCoeffs;
208+
pumipic::RecordTime("PolyMPO_Calculate_MLS_Coeff", timer.seconds());
208209
}
209210

210211
template <MeshFieldIndex meshFieldIndex>
211212
void MPMesh::assemblyVtx1() {
212-
213+
Kokkos::Timer timer;
213214
//If no reconstruction till now calculate the coeffs
214215
if (!isPreComputed) {
215216
computeMatricesAndSolve();
@@ -270,6 +271,7 @@ void MPMesh::assemblyVtx1() {
270271
for(int k=0; k<numEntries; k++)
271272
meshField(vtx, k) = reconVals(vtx,k);
272273
});
274+
pumipic::RecordTime("PolyMPO_Reconstruct_Vtx1", timer.seconds());
273275
}
274276

275277
template <MeshFieldIndex meshFieldIndex>

src/pmpo_c.cpp

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,17 +61,19 @@ void polympo_setMPICommunicator_f(MPMesh_ptr p_mpmesh, MPI_Fint fcomm){
6161

6262
void polympo_createMPs_f(MPMesh_ptr p_mpmesh,
6363
const int numElms,
64-
const int numMPs, // total number of MPs which is GREATER than or equal to number of active MPs
64+
const int numMPs, // total nof of MPs which is >= no of active MPs
6565
int* mpsPerElm,
6666
const int* mp2Elm,
6767
const int* isMPActive) {
6868
checkMPMeshValid(p_mpmesh);
69-
69+
std::cout<<__FUNCTION__<<std::endl;
7070
//the mesh must be fixed/set before adding MPs
7171
auto p_mesh = ((polyMPO::MPMesh*)p_mpmesh)->p_mesh;
7272
PMT_ALWAYS_ASSERT(!p_mesh->meshEditable());
7373
PMT_ALWAYS_ASSERT(p_mesh->getNumElements() == numElms);
7474

75+
//Find the total no of MPs across all ranks
76+
//And loop over all MPs and find the smallest element id associated across a MP
7577
int numActiveMPs = 0;
7678
int minElmID = numElms+1;
7779
for(int i = 0; i < numMPs; i++) {
@@ -82,20 +84,27 @@ void polympo_createMPs_f(MPMesh_ptr p_mpmesh,
8284
}
8385
}
8486
}
85-
//TODO do we care about empty ranks? check just in case...
86-
PMT_ALWAYS_ASSERT(numActiveMPs>0);
87-
88-
int firstElmWithMPs=-1;
87+
long long globalNumActiveMPs = 0;
88+
int globalMinElmID;
89+
MPI_Allreduce(&numActiveMPs, &globalNumActiveMPs, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
90+
MPI_Allreduce(&minElmID, &globalMinElmID, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
91+
PMT_ALWAYS_ASSERT(globalNumActiveMPs>0);
92+
93+
//Loop over all mesh elements 0,1,... and find the first element that has an associated MP
94+
int firstElmWithMPs=numElms+1;
8995
for (int i=0; i<numElms; i++) {
9096
if(mpsPerElm[i]) {
9197
firstElmWithMPs = i;
9298
break;
9399
}
94100
}
101+
int globalFirstElmWithMPs;
102+
MPI_Allreduce(&firstElmWithMPs, &globalFirstElmWithMPs, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
103+
95104
int offset = -1;
96-
if(minElmID-firstElmWithMPs==1) {
105+
if(globalMinElmID-globalFirstElmWithMPs==1) {
97106
offset = 1;
98-
}else if (minElmID-firstElmWithMPs==0){
107+
}else if (globalMinElmID-globalFirstElmWithMPs==0){
99108
offset = 0;
100109
}else {
101110
fprintf(stderr,"The minElmID is incorrect! Offset is wrong!\n");
@@ -116,12 +125,18 @@ void polympo_createMPs_f(MPMesh_ptr p_mpmesh,
116125
auto mpsPerElm_d = create_mirror_view_and_copy(mpsPerElm, numElms);
117126
auto active_mp2Elm_d = create_mirror_view_and_copy(active_mp2Elm.data(), numActiveMPs);
118127
auto active_mpIDs_d = create_mirror_view_and_copy(active_mpIDs.data(), numActiveMPs);
119-
128+
120129
delete ((polyMPO::MPMesh*)p_mpmesh)->p_MPs;
121130
((polyMPO::MPMesh*)p_mpmesh)->p_MPs =
122131
new polyMPO::MaterialPoints(numElms, numActiveMPs, mpsPerElm_d, active_mp2Elm_d, active_mpIDs_d);
132+
123133
auto p_MPs = ((polyMPO::MPMesh*)p_mpmesh)->p_MPs;
124134
p_MPs->setElmIDoffset(offset);
135+
136+
assert(cudaDeviceSynchronize() == cudaSuccess);
137+
MPI_Barrier(MPI_COMM_WORLD);
138+
printf("Foo1\n");
139+
125140
}
126141

127142
void polympo_startRebuildMPs_f(MPMesh_ptr p_mpmesh,

0 commit comments

Comments
 (0)