@@ -16,6 +16,14 @@ namespace pfasst
1616 return (string (" mpi error: " ) + string (runtime_error::what ())).c_str ();
1717 }
1818
19+ MPIError MPIError::from_code (const int err_code)
20+ {
21+ char err_str[MPI_MAX_ERROR_STRING];
22+ int err_len = 0 ;
23+ MPI_Error_string (err_code, err_str, &err_len);
24+ return MPIError (" MPI Error: " + string (err_str, err_len) + " (code=" + to_string (err_code) + " )" );
25+ }
26+
1927
2028 MPICommunicator::MPICommunicator ()
2129 {}
@@ -75,8 +83,10 @@ namespace pfasst
7583
7684 void MPIStatus::set_converged (bool converged)
7785 {
78- CLOG (DEBUG, " Controller" ) << " set converged to " << boolalpha << converged;
86+ CLOG (DEBUG, " Controller" ) << " set converged for rank " << this ->comm ->rank () << " to "
87+ << " '" << boolalpha << converged << " '" ;
7988 this ->converged .at (this ->comm ->rank ()) = converged;
89+ assert (this ->converged .at (this ->comm ->rank ()) == converged);
8090 }
8191
8292 bool MPIStatus::get_converged (int rank)
@@ -95,18 +105,11 @@ namespace pfasst
95105 if (mpi->size () == 1 ) { return ; }
96106 if (mpi->rank () == mpi->size () - 1 ) { return ; }
97107
98- int iconverged = converged.at (mpi->rank ()) ? 1 : 0 ;
99-
108+ int iconverged = converged.at (mpi->rank ()) ? IStatus::CONVERGED : IStatus::NOT_CONVERGED;
100109 int dest_rank = (mpi->rank () + 1 ) % mpi->size ();
101- CLOG (DEBUG, " Controller" ) << " sending status " << iconverged
102- << " to " << dest_rank << " of communicator " << mpi->name ();
103-
104- int err = MPI_Send (&iconverged, sizeof (int ), MPI_INT,
105- dest_rank, 1 , mpi->comm );
106110
107- if (err != MPI_SUCCESS) {
108- throw MPIError ();
109- }
111+ int err = MPI_Send (&iconverged, sizeof (int ), MPI_INT, dest_rank, 1 , mpi->comm );
112+ if (err != MPI_SUCCESS) { throw MPIError::from_code (err); }
110113 }
111114
112115 void MPIStatus::recv ()
@@ -115,34 +118,33 @@ namespace pfasst
115118 if (mpi->size () == 1 ) { return ; }
116119 if (mpi->rank () == 0 ) { return ; }
117120
118- if (get_converged (mpi->rank ()- 1 )) {
119- CLOG (DEBUG, " Controller" ) << " skipping status recv" ;
121+ if (get_converged (mpi->rank () - 1 )) {
122+ CLOG (DEBUG, " Controller" ) << " skipping status recv as previous is stored as converged " ;
120123 return ;
121124 }
122125
123126 MPI_Status stat;
124127 int iconverged;
125128 int src_rank = (mpi->rank () - 1 ) % mpi->size ();
126- int err = MPI_Recv (&iconverged, sizeof (iconverged), MPI_INT,
127- src_rank, 1 , mpi->comm , &stat);
128-
129- if (err != MPI_SUCCESS) {
130- throw MPIError ();
131- }
132-
133- converged.at (mpi->rank ()-1 ) = iconverged == 1 ? true : false ;
129+ int err = MPI_Recv (&iconverged, sizeof (iconverged), MPI_INT, src_rank, 1 , mpi->comm , &stat);
130+ if (err != MPI_SUCCESS) { throw MPIError::from_code (err); }
134131
135- CLOG (DEBUG, " Controller" ) << " recieved status " << iconverged
136- << " from rank " << src_rank << " of communicator " << mpi->name ();
132+ converged.at (mpi->rank () - 1 ) = (iconverged == IStatus::CONVERGED) ? true : false ;
137133 }
138134 } // ::pfasst::mpi
139135} // ::pfasst
140136
141137
142138MAKE_LOGGABLE (MPI_Status, mpi_status, os)
143139{
144- os << " MPI_Status(source=" << mpi_status.MPI_SOURCE << " , "
145- << " tag=" << mpi_status.MPI_TAG << " , "
146- << " error=" << mpi_status.MPI_ERROR << " )" ;
140+ if ( mpi_status.MPI_TAG == MPI_ANY_TAG
141+ && mpi_status.MPI_SOURCE == MPI_ANY_SOURCE
142+ && mpi_status.MPI_ERROR == MPI_SUCCESS) {
143+ os << " MPI_Status(empty)" ;
144+ } else {
145+ os << " MPI_Status(source=" << to_string (mpi_status.MPI_SOURCE ) << " , "
146+ << " tag=" << to_string (mpi_status.MPI_TAG ) << " , "
147+ << " error=" << to_string (mpi_status.MPI_ERROR ) << " )" ;
148+ }
147149 return os;
148150}
0 commit comments