Skip to content

Commit 8933171

Browse files
authored
Merge pull request #6918 from hoopoepg/topic/fixed-hand-on-shmem-finalize
SPML/UCX: fixed hang in SHMEM_FINALIZE
2 parents 69bd945 + 01dacaa commit 8933171

File tree

2 files changed

+11
-8
lines changed

2 files changed

+11
-8
lines changed

opal/mca/common/ucx/common_ucx.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,11 @@ static void opal_common_ucx_wait_all_requests(void **reqs, int count, ucp_worker
187187
}
188188
}
189189

190-
OPAL_DECLSPEC int opal_common_ucx_del_procs_nofence(opal_common_ucx_del_proc_t *procs, size_t count,
191-
size_t my_rank, size_t max_disconnect, ucp_worker_h worker) {
190+
OPAL_DECLSPEC int opal_common_ucx_del_procs_nofence(opal_common_ucx_del_proc_t *procs,
191+
size_t count, size_t my_rank,
192+
size_t max_disconnect,
193+
ucp_worker_h worker)
194+
{
192195
size_t num_reqs;
193196
size_t max_reqs;
194197
void *dreq, **dreqs;

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,18 +124,16 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs)
124124
mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn = NULL;
125125
}
126126

127-
ret = opal_common_ucx_del_procs(del_procs, nprocs, oshmem_my_proc_id(),
128-
mca_spml_ucx.num_disconnect,
129-
mca_spml_ucx_ctx_default.ucp_worker);
130-
127+
ret = opal_common_ucx_del_procs_nofence(del_procs, nprocs, oshmem_my_proc_id(),
128+
mca_spml_ucx.num_disconnect,
129+
mca_spml_ucx_ctx_default.ucp_worker);
130+
/* No need to barrier here - barrier is called in _shmem_finalize */
131131
free(del_procs);
132132
free(mca_spml_ucx.remote_addrs_tbl);
133133
free(mca_spml_ucx_ctx_default.ucp_peers);
134134

135135
mca_spml_ucx_ctx_default.ucp_peers = NULL;
136136

137-
opal_common_ucx_mca_proc_added();
138-
139137
return ret;
140138
}
141139

@@ -323,6 +321,8 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs)
323321
free(wk_roffs);
324322

325323
SPML_UCX_VERBOSE(50, "*** ADDED PROCS ***");
324+
325+
opal_common_ucx_mca_proc_added();
326326
return OSHMEM_SUCCESS;
327327

328328
error2:

0 commit comments

Comments
 (0)