Skip to content

Commit 4771c9e

Browse files
committed
Merge pull request #1617 from jladd-mlnx/topic/disable-hcoll-barrier-in-finalize-ompi-trunk
HCOLL: fix hang in hcoll barrier called from finalize for MXM/yalla
2 parents 0f54a95 + cafd55f commit 4771c9e

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

ompi/mca/coll/hcoll/coll_hcoll_ops.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,21 @@
1818
int mca_coll_hcoll_barrier(struct ompi_communicator_t *comm,
1919
mca_coll_base_module_t *module){
2020
int rc;
21-
HCOL_VERBOSE(20,"RUNNING HCOL BARRIER");
2221
mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module;
22+
HCOL_VERBOSE(20,"RUNNING HCOL BARRIER");
23+
24+
if (OPAL_UNLIKELY(ompi_mpi_finalize_started)) {
25+
HCOL_VERBOSE(5, "In finalize, reverting to previous barrier");
26+
goto orig_barrier;
27+
}
2328
rc = hcoll_collectives.coll_barrier(hcoll_module->hcoll_context);
2429
if (HCOLL_SUCCESS != rc){
2530
HCOL_VERBOSE(20,"RUNNING FALLBACK BARRIER");
2631
rc = hcoll_module->previous_barrier(comm,hcoll_module->previous_barrier_module);
2732
}
2833
return rc;
34+
orig_barrier:
35+
return hcoll_module->previous_barrier(comm,hcoll_module->previous_barrier_module);
2936
}
3037

3138
int mca_coll_hcoll_bcast(void *buff, int count,

0 commit comments

Comments
 (0)