Skip to content

Commit 2b4215f

Browse files
committed
logging: improved logging for MPI communicators
mpi comm wrappers now support named communicators Signed-off-by: Torbjörn Klatt <[email protected]>
1 parent 5386a21 commit 2b4215f

File tree

2 files changed

+38
-6
lines changed

2 files changed

+38
-6
lines changed

include/pfasst/mpi_communicator.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ using namespace std;
88
#include <mpi.h>
99

1010
#include "pfasst/interfaces.hpp"
11+
#include "pfasst/logging.hpp"
1112

1213

1314
namespace pfasst
@@ -33,6 +34,7 @@ namespace pfasst
3334
//! @{
3435
int _rank;
3536
int _size;
37+
string _name;
3638
//! @}
3739

3840
public:
@@ -49,6 +51,7 @@ namespace pfasst
4951
virtual void set_comm(MPI_Comm comm);
5052
virtual int size();
5153
virtual int rank();
54+
virtual string name();
5255
//! @}
5356
};
5457

@@ -72,6 +75,9 @@ namespace pfasst
7275
} // ::pfasst::mpi
7376
} // ::pfasst
7477

78+
79+
inline MAKE_LOGGABLE(MPI_Status, mpi_status, os);
80+
7581
#include "pfasst/mpi_communicator_impl.hpp"
7682

7783
#endif

src/pfasst/mpi_communicator_impl.hpp

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ namespace pfasst
3030
this->comm = comm;
3131
MPI_Comm_size(this->comm, &(this->_size));
3232
MPI_Comm_rank(this->comm, &(this->_rank));
33+
int len = 0;
34+
char buff[MPI_MAX_OBJECT_NAME];
35+
MPI_Comm_get_name(this->comm, buff, &len);
36+
if (len == 0) {
37+
this->_name = string("world");
38+
} else {
39+
this->_name = string(buff, len);
40+
}
3341

3442
shared_ptr<MPIStatus> status = make_shared<MPIStatus>();
3543
this->status = status;
@@ -46,6 +54,11 @@ namespace pfasst
4654
return this->_rank;
4755
}
4856

57+
string MPICommunicator::name()
58+
{
59+
return this->_name;
60+
}
61+
4962

5063
void MPIStatus::set_comm(ICommunicator* comm)
5164
{
@@ -62,7 +75,7 @@ namespace pfasst
6275

6376
void MPIStatus::set_converged(bool converged)
6477
{
65-
LOG(DEBUG) << "mpi rank " << this->comm->rank() << " set converged to " << converged;
78+
CLOG(DEBUG, "Controller") << "set converged to " << boolalpha << converged;
6679
this->converged.at(this->comm->rank()) = converged;
6780
}
6881

@@ -84,10 +97,12 @@ namespace pfasst
8497

8598
int iconverged = converged.at(mpi->rank()) ? 1 : 0;
8699

87-
LOG(DEBUG) << "mpi rank " << this->comm->rank() << " status send " << iconverged;
100+
int dest_rank = (mpi->rank() + 1) % mpi->size();
101+
CLOG(DEBUG, "Controller") << "sending status " << iconverged
102+
<< " to " << dest_rank << " of communicator " << mpi->name();
88103

89104
int err = MPI_Send(&iconverged, sizeof(int), MPI_INT,
90-
(mpi->rank() + 1) % mpi->size(), 1, mpi->comm);
105+
dest_rank, 1, mpi->comm);
91106

92107
if (err != MPI_SUCCESS) {
93108
throw MPIError();
@@ -101,22 +116,33 @@ namespace pfasst
101116
if (mpi->rank() == 0) { return; }
102117

103118
if (get_converged(mpi->rank()-1)) {
104-
LOG(DEBUG) << "mpi rank " << this->comm->rank() << " skipping status recv";
119+
CLOG(DEBUG, "Controller") << "skipping status recv";
105120
return;
106121
}
107122

108123
MPI_Status stat;
109124
int iconverged;
125+
int src_rank = (mpi->rank() - 1) % mpi->size();
110126
int err = MPI_Recv(&iconverged, sizeof(iconverged), MPI_INT,
111-
(mpi->rank() - 1) % mpi->size(), 1, mpi->comm, &stat);
127+
src_rank, 1, mpi->comm, &stat);
112128

113129
if (err != MPI_SUCCESS) {
114130
throw MPIError();
115131
}
116132

117133
converged.at(mpi->rank()-1) = iconverged == 1 ? true : false;
118134

119-
LOG(DEBUG) << "mpi rank " << this->comm->rank() << " status recv " << iconverged;
135+
CLOG(DEBUG, "Controller") << "recieved status " << iconverged
136+
<< " from rank " << src_rank << " of communicator " << mpi->name();
120137
}
121138
} // ::pfasst::mpi
122139
} // ::pfasst
140+
141+
142+
MAKE_LOGGABLE(MPI_Status, mpi_status, os)
143+
{
144+
os << "MPI_Status(source=" << mpi_status.MPI_SOURCE << ", "
145+
<< "tag=" << mpi_status.MPI_TAG << ", "
146+
<< "error=" << mpi_status.MPI_ERROR << ")";
147+
return os;
148+
}

0 commit comments

Comments
 (0)