33 * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
44 * University Research and Technology
55 * Corporation. All rights reserved.
6- * Copyright (c) 2004-2020 The University of Tennessee and The University
6+ * Copyright (c) 2004-2022 The University of Tennessee and The University
77 * of Tennessee Research Foundation. All rights
88 * reserved.
99 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -202,6 +202,43 @@ int mca_pml_ob1_enable(bool enable)
202202 return OMPI_SUCCESS ;
203203}
204204
205+ static const char *
206+ mca_pml_ob1_set_allow_overtake (opal_infosubscriber_t * obj ,
207+ const char * key ,
208+ const char * value )
209+ {
210+ ompi_communicator_t * ompi_comm = (ompi_communicator_t * ) obj ;
211+ bool allow_overtake_was_set = OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE (ompi_comm );
212+
213+ /* As we keep the out-of-sequence messages ordered by their sequence, as a receiver we
214+ * can just move the previously considered out-of-order messages into the unexpected queue,
215+ * and we maintain some form of logical consistency with the message order.
216+ */
217+ if (opal_str_to_bool (value )) {
218+ if (!allow_overtake_was_set ) {
219+ ompi_comm -> c_flags |= OMPI_COMM_ASSERT_ALLOW_OVERTAKE ;
220+ mca_pml_ob1_merge_cant_match (ompi_comm );
221+ }
222+ return "true" ;
223+ }
224+ if (allow_overtake_was_set ) {
225+ /* However, in the case we are trying to turn off allow_overtake, it is not clear what
226+ * should be done with the previous messages that are pending on our peers, nor with
227+ * the messages currently in the network. Similarly, if one process turns off allow
228+ * overtake, before any potential sender start sending valid sequence numbers there
229+ * is no way to order the messages in a sensible order.
230+ * The possible solution is cumbersome, it would force a network quiescence followed by
231+ * a synchronization of all processes in the communicator, and then all peers will
232+ * start sending messages starting with sequence number 0.
233+ * A lot of code for minimal benefit, especially taking in account that the MPI standard
234+ * does not define this. Instead, refuse to disable allow overtake, and at least the
235+ * user has the opportunity to check if we accepted to change it.
236+ */
237+ return "true" ;
238+ }
239+ return "false" ;
240+ }
241+
205242int mca_pml_ob1_add_comm (ompi_communicator_t * comm )
206243{
207244 /* allocate pml specific comm data */
@@ -221,11 +258,14 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
221258 }
222259
223260 ompi_comm_assert_subscribe (comm , OMPI_COMM_ASSERT_NO_ANY_SOURCE );
224- ompi_comm_assert_subscribe (comm , OMPI_COMM_ASSERT_ALLOW_OVERTAKE );
225261
226262 mca_pml_ob1_comm_init_size (pml_comm , comm -> c_remote_group -> grp_proc_count );
227263 comm -> c_pml_comm = pml_comm ;
228264
265+ /* Register the subscriber alert for the mpi_assert_allow_overtaking info. */
266+ opal_infosubscribe_subscribe (& comm -> super , "mpi_assert_allow_overtaking" ,
267+ "false" , mca_pml_ob1_set_allow_overtake );
268+
229269 /* Grab all related messages from the non_existing_communicator pending queue */
230270 OPAL_LIST_FOREACH_SAFE (frag , next_frag , & mca_pml_ob1 .non_existing_communicator_pending , mca_pml_ob1_recv_frag_t ) {
231271 hdr = & frag -> hdr .hdr_match ;
0 commit comments