File tree Expand file tree Collapse file tree 1 file changed +10
-10
lines changed Expand file tree Collapse file tree 1 file changed +10
-10
lines changed Original file line number Diff line number Diff line change @@ -381,14 +381,19 @@ static void mca_pml_ucx_waitall(void **reqs, size_t *count_p)
381381 * count_p = 0 ;
382382}
383383
384+ static void mca_pml_fence_complete_cb (int status , void * fenced )
385+ {
386+ * (int * )fenced = 1 ;
387+ }
388+
384389int mca_pml_ucx_del_procs (struct ompi_proc_t * * procs , size_t nprocs )
385390{
391+ int fenced = 0 ;
386392 ompi_proc_t * proc ;
387393 size_t num_reqs , max_reqs ;
388394 void * dreq , * * dreqs ;
389395 ucp_ep_h ep ;
390396 size_t i ;
391- ucs_status_t ret ;
392397
393398 max_reqs = ompi_pml_ucx .num_disconnect ;
394399 if (max_reqs > nprocs ) {
@@ -430,16 +435,11 @@ int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs)
430435
431436 mca_pml_ucx_waitall (dreqs , & num_reqs );
432437 free (dreqs );
433- /* flush worker to allow all pending operations to complete.
434- * ignore error (we can do nothing here), just try to
435- * finalize gracefully */
436- ret = ucp_worker_flush (ompi_pml_ucx .ucp_worker );
437- if (UCS_OK != ret ) {
438- PML_UCX_ERROR ("ucp_worker_flush failed: %s" ,
439- ucs_status_string (ret ));
440- }
441438
442- opal_pmix .fence (NULL , 0 );
439+ opal_pmix .fence_nb (NULL , 0 , mca_pml_fence_complete_cb , & fenced );
440+ while (!fenced ) {
441+ ucp_worker_progress (ompi_pml_ucx .ucp_worker );
442+ }
443443
444444 return OMPI_SUCCESS ;
445445}
You can’t perform that action at this time.
0 commit comments