Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit f16374b

Browse files
committed
Merge pull request #1135 from jladd-mlnx/topic/disable-hcoll-barrier-in-finalize-v2.0.1
HCOLL: fix hang in hcoll barrier called from finalize for MXM/yalla
2 parents f0e283d + 5c5f1e7 commit f16374b

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

ompi/mca/coll/hcoll/coll_hcoll_ops.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,21 @@
1818
int mca_coll_hcoll_barrier(struct ompi_communicator_t *comm,
1919
mca_coll_base_module_t *module){
2020
int rc;
21-
HCOL_VERBOSE(20,"RUNNING HCOL BARRIER");
2221
mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module;
22+
HCOL_VERBOSE(20,"RUNNING HCOL BARRIER");
23+
24+
if (OPAL_UNLIKELY(ompi_mpi_finalize_started)) {
25+
HCOL_VERBOSE(5, "In finalize, reverting to previous barrier");
26+
goto orig_barrier;
27+
}
2328
rc = hcoll_collectives.coll_barrier(hcoll_module->hcoll_context);
2429
if (HCOLL_SUCCESS != rc){
2530
HCOL_VERBOSE(20,"RUNNING FALLBACK BARRIER");
2631
rc = hcoll_module->previous_barrier(comm,hcoll_module->previous_barrier_module);
2732
}
2833
return rc;
34+
orig_barrier:
35+
return hcoll_module->previous_barrier(comm,hcoll_module->previous_barrier_module);
2936
}
3037

3138
int mca_coll_hcoll_bcast(void *buff, int count,

0 commit comments

Comments
 (0)