Skip to content

Commit 3a8c942

Browse files
committed
Merge remote-tracking branch 'memmett/feature/fix-mpi'
* memmett/feature/fix-mpi: mpi: More MPI guards. Don't cancel, just assert. pfasst: Always recv during cycle_up. Signed-off-by: Torbjörn Klatt <[email protected]>
2 parents f31aa1e + fbb0b41 commit 3a8c942

File tree

2 files changed

+32
-97
lines changed

2 files changed

+32
-97
lines changed

src/pfasst/controller/pfasst_impl.hpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,8 @@ namespace pfasst
118118
auto trns = level_iter.transfer();
119119

120120
trns->interpolate(fine, crse, true);
121-
122-
if (this->comm->status->previous_is_iterating()) {
123-
fine->recv(comm, tag(level_iter), false);
124-
trns->interpolate_initial(fine, crse);
125-
}
121+
fine->recv(comm, tag(level_iter), false);
122+
trns->interpolate_initial(fine, crse);
126123

127124
if (level_iter < this->finest()) {
128125
perform_sweeps(level_iter.level);
@@ -224,9 +221,11 @@ namespace pfasst
224221
template<typename time>
225222
void PFASST<time>::post()
226223
{
227-
this->comm->status->post();
228-
for (auto l = this->coarsest() + 1; l <= this->finest(); ++l) {
229-
l.current()->post(comm, tag(l));
224+
if (this->comm->status->previous_is_iterating()) {
225+
this->comm->status->post();
226+
for (auto l = this->coarsest() + 1; l <= this->finest(); ++l) {
227+
l.current()->post(comm, tag(l));
228+
}
230229
}
231230
}
232231
} // ::pfasst

src/pfasst/encap/vector_impl.hpp

Lines changed: 25 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33

44
#include "pfasst/encap/vector.hpp"
55

6+
#ifdef WITH_MPI
7+
#define CHKMPIERR(err) if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
8+
#endif
9+
610
namespace pfasst
711
{
812
namespace encap
@@ -38,41 +42,13 @@ namespace pfasst
3842
VectorEncapsulation<scalar, time>::~VectorEncapsulation()
3943
{
4044
#ifdef WITH_MPI
41-
// TODO: refactor that request handler cleanup
42-
43-
int err = MPI_SUCCESS;
44-
45-
// check and finalize old request
46-
if (this->recv_request != MPI_REQUEST_NULL) {
47-
int old_complete = -1;
48-
MPI_Status test_old_stat;
49-
err = MPI_Request_get_status(this->recv_request, &old_complete, &test_old_stat);
50-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
51-
if (!(bool)old_complete) {
52-
err = MPI_Cancel(&(this->recv_request));
53-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
54-
}
55-
// cleanup resources
56-
err = MPI_Request_free(&(this->recv_request));
57-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
58-
assert(this->recv_request == MPI_REQUEST_NULL); // just to make sure
59-
}
60-
61-
// check and finalize old request
6245
if (this->send_request != MPI_REQUEST_NULL) {
63-
int old_complete = -1;
64-
MPI_Status test_old_stat;
65-
err = MPI_Request_get_status(this->send_request, &old_complete, &test_old_stat);
66-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
67-
if (!(bool)old_complete) {
68-
err = MPI_Cancel(&(this->send_request));
69-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
70-
}
71-
// cleanup resources
72-
err = MPI_Request_free(&(this->send_request));
73-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
74-
assert(this->send_request == MPI_REQUEST_NULL); // just to make sure
46+
MPI_Status stat;
47+
int err = MPI_Wait(&(this->send_request), &stat);
48+
CHKMPIERR(err);
7549
}
50+
assert(this->recv_request == MPI_REQUEST_NULL);
51+
assert(this->send_request == MPI_REQUEST_NULL);
7652
#endif
7753
}
7854

@@ -214,33 +190,14 @@ namespace pfasst
214190
if (mpi.size() == 1) { return; }
215191
if (mpi.rank() == 0) { return; }
216192

217-
int err = MPI_SUCCESS;
218-
219-
MPI_Request new_recv_request = MPI_REQUEST_NULL;
220-
int src = (mpi.rank() - 1) % mpi.size();
221-
err = MPI_Irecv(this->data(), sizeof(scalar) * this->size(), MPI_CHAR,
222-
src, tag, mpi.comm, &new_recv_request);
223-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
224-
225-
// check and finalize old request
226193
if (this->recv_request != MPI_REQUEST_NULL) {
227-
int old_complete = -1;
228-
MPI_Status test_old_stat;
229-
err = MPI_Request_get_status(this->recv_request, &old_complete, &test_old_stat);
230-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
231-
if (!(bool)old_complete) {
232-
err = MPI_Cancel(&(this->recv_request));
233-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
234-
}
235-
// cleanup resources
236-
err = MPI_Request_free(&(this->recv_request));
237-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
238-
assert(this->recv_request == MPI_REQUEST_NULL); // just to make sure
194+
throw MPIError();
239195
}
240196

241-
// keep the new request handler
242-
std::swap(this->recv_request, new_recv_request);
243-
assert(new_recv_request == MPI_REQUEST_NULL);
197+
int src = (mpi.rank() - 1) % mpi.size();
198+
int err = MPI_Irecv(this->data(), sizeof(scalar) * this->size(), MPI_CHAR,
199+
src, tag, mpi.comm, &this->recv_request);
200+
CHKMPIERR(err);
244201
}
245202

246203
template<typename scalar, typename time>
@@ -250,23 +207,21 @@ namespace pfasst
250207
if (mpi.size() == 1) { return; }
251208
if (mpi.rank() == 0) { return; }
252209

210+
MPI_Status stat;
253211
int err = MPI_SUCCESS;
254212

255213
if (blocking) {
256-
MPI_Status stat;
257214
int src = (mpi.rank() - 1) % mpi.size();
258215
err = MPI_Recv(this->data(), sizeof(scalar) * this->size(), MPI_CHAR,
259216
src, tag, mpi.comm, &stat);
217+
CHKMPIERR(err);
260218
} else {
261-
MPI_Status stat;
262219
if (this->recv_request != MPI_REQUEST_NULL) {
263220
CLOG(DEBUG, "Encap") << "waiting on last recv request";
264-
err = MPI_Wait(&(this->recv_request), &stat);
221+
err = MPI_Wait(&(this->recv_request), &stat); CHKMPIERR(err);
265222
CLOG(DEBUG, "Encap") << "waiting done: " << stat;
266223
}
267224
}
268-
269-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
270225
}
271226

272227
template<typename scalar, typename time>
@@ -276,49 +231,30 @@ namespace pfasst
276231
if (mpi.size() == 1) { return; }
277232
if (mpi.rank() == mpi.size() - 1) { return; }
278233

234+
MPI_Status stat;
279235
int err = MPI_SUCCESS;
280236
int dest = (mpi.rank() + 1) % mpi.size();
281237

282238
if (blocking) {
283239
err = MPI_Send(this->data(), sizeof(scalar) * this->size(), MPI_CHAR, dest, tag, mpi.comm);
240+
CHKMPIERR(err);
284241
} else {
285-
MPI_Request new_send_request = MPI_REQUEST_NULL;
242+
err = MPI_Wait(&(this->send_request), &stat);
243+
CHKMPIERR(err);
286244
err = MPI_Isend(this->data(), sizeof(scalar) * this->size(), MPI_CHAR,
287-
dest, tag, mpi.comm, &new_send_request);
288-
289-
// check and finalize old request
290-
if (this->send_request != MPI_REQUEST_NULL) {
291-
int old_complete = -1;
292-
MPI_Status test_old_stat;
293-
err = MPI_Request_get_status(this->send_request, &old_complete, &test_old_stat);
294-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
295-
296-
if (!(bool)old_complete) {
297-
err = MPI_Cancel(&(this->send_request));
298-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
299-
}
300-
301-
// cleanup resources
302-
err = MPI_Request_free(&(this->send_request));
303-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
304-
assert(this->send_request == MPI_REQUEST_NULL); // just to make sure
305-
}
306-
307-
// keep the new request handler
308-
std::swap(this->send_request, new_send_request);
245+
dest, tag, mpi.comm, &(this->send_request));
246+
CHKMPIERR(err);
309247
}
310248

311-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
312249
}
313250

314251
template<typename scalar, typename time>
315252
void VectorEncapsulation<scalar, time>::broadcast(ICommunicator* comm)
316253
{
317254
auto& mpi = as_mpi(comm);
318255
int err = MPI_Bcast(this->data(), sizeof(scalar) * this->size(), MPI_CHAR,
319-
comm->size()-1, mpi.comm);
320-
321-
if (err != MPI_SUCCESS) { throw MPIError::from_code(err); }
256+
comm->size()-1, mpi.comm); CHKMPIERR(err);
257+
CHKMPIERR(err);
322258
}
323259
#endif
324260

0 commit comments

Comments
 (0)