1010 * University of Stuttgart. All rights reserved.
1111 * Copyright (c) 2004-2005 The Regents of the University of California.
1212 * All rights reserved.
13- * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
13+ * Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
1414 * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
1515 * reserved.
1616 * Copyright (c) 2006 University of Houston. All rights reserved.
8484extern bool ompi_enable_timing ;
8585extern bool ompi_enable_timing_ext ;
8686
87+ static void fence_cbfunc (int status , void * cbdata )
88+ {
89+ volatile bool * active = (volatile bool * )cbdata ;
90+ * active = false;
91+ }
92+
8793int ompi_mpi_finalize (void )
8894{
8995 int ret = MPI_SUCCESS ;
9096 opal_list_item_t * item ;
9197 ompi_proc_t * * procs ;
9298 size_t nprocs ;
99+ volatile bool active ;
93100 OPAL_TIMING_DECLARE (tm );
94101 OPAL_TIMING_INIT_EXT (& tm , OPAL_TIMING_GET_TIME_OF_DAY );
95102
@@ -213,7 +220,7 @@ int ompi_mpi_finalize(void)
213220 have many other, much higher priority issues to handle that deal
214221 with non-erroneous cases. */
215222
216- /* Wait for everyone to reach this point. This is a grpcomm
223+ /* Wait for everyone to reach this point. This is a PMIx
217224 barrier instead of an MPI barrier for (at least) two reasons:
218225
219226 1. An MPI barrier doesn't ensure that all messages have been
@@ -233,7 +240,26 @@ int ompi_mpi_finalize(void)
233240 del_procs behavior around May of 2014 (see
234241 https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
235242 more details). */
236- opal_pmix .fence (NULL , 0 );
243+ if (NULL != opal_pmix .fence_nb ) {
244+ active = true;
245+ /* Note that the non-blocking PMIx fence will cycle calling
246+ opal_progress(), which will allow any other pending
247+ communications/actions to complete. See
248+ https://github.com/open-mpi/ompi/issues/1576 for the
249+ original bug report. */
250+ opal_pmix .fence_nb (NULL , 0 , fence_cbfunc , (void * )& active );
251+ OMPI_WAIT_FOR_COMPLETION (active );
252+ } else {
253+ /* However, we cannot guarantee that the provided PMIx has
254+ fence_nb. If it doesn't, then do the best we can: an MPI
255+ barrier on COMM_WORLD (which isn't the best because of the
256+ reasons cited above), followed by a blocking PMIx fence
257+ (which may not necessarily call opal_progress()). */
258+ ompi_communicator_t * comm = & ompi_mpi_comm_world .comm ;
259+ comm -> c_coll .coll_barrier (comm , comm -> c_coll .coll_barrier_module );
260+
261+ opal_pmix .fence (NULL , 0 );
262+ }
237263
238264 /* check for timing request - get stop time and report elapsed
239265 time if so */
0 commit comments