1010 * University of Stuttgart. All rights reserved.
1111 * Copyright (c) 2004-2005 The Regents of the University of California.
1212 * All rights reserved.
13- * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
13+ * Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
1414 * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
1515 * reserved.
1616 * Copyright (c) 2006 University of Houston. All rights reserved.
@@ -226,7 +226,7 @@ int ompi_mpi_finalize(void)
226226 have many other, much higher priority issues to handle that deal
227227 with non-erroneous cases. */
228228
229- /* Wait for everyone to reach this point. This is a grpcomm
229+ /* Wait for everyone to reach this point. This is a PMIx
230230 barrier instead of an MPI barrier for (at least) two reasons:
231231
232232 1. An MPI barrier doesn't ensure that all messages have been
@@ -247,12 +247,24 @@ int ompi_mpi_finalize(void)
247247 https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
248248 more details). */
249249 if (NULL != opal_pmix .fence_nb ) {
250- active = true;
251- opal_pmix .fence_nb (NULL , 0 , fence_cbfunc , (void * )& active );
252- OMPI_WAIT_FOR_COMPLETION (active );
250+ active = true;
251+ /* Note that the non-blocking PMIx fence will cycle calling
252+ opal_progress(), which will allow any other pending
253+ communications/actions to complete. See
254+ https://github.com/open-mpi/ompi/issues/1576 for the
255+ original bug report. */
256+ opal_pmix .fence_nb (NULL , 0 , fence_cbfunc , (void * )& active );
257+ OMPI_WAIT_FOR_COMPLETION (active );
253258 } else {
254- MPI_Barrier (MPI_COMM_WORLD );
255- opal_pmix .fence (NULL , 0 );
259+ /* However, we cannot guarantee that the provided PMIx has
260+ fence_nb. If it doesn't, then do the best we can: an MPI
261+ barrier on COMM_WORLD (which isn't the best because of the
262+ reasons cited above), followed by a blocking PMIx fence
263+ (which may not necessarily call opal_progress()). */
264+ ompi_communicator_t * comm = & ompi_mpi_comm_world .comm ;
265+ comm -> c_coll .coll_barrier (comm , comm -> c_coll .coll_barrier_module );
266+
267+ opal_pmix .fence (NULL , 0 );
256268 }
257269
258270 /* check for timing request - get stop time and report elapsed
0 commit comments