@@ -97,6 +97,18 @@ int ompi_dpm_init(void)
9797 return OMPI_SUCCESS ;
9898}
9999
100+ static int compare_pmix_proc (const void * a , const void * b )
101+ {
102+ const pmix_proc_t * proc_a = (pmix_proc_t * )a ;
103+ const pmix_proc_t * proc_b = (pmix_proc_t * )b ;
104+
105+ int nspace_dif = strncmp (proc_a -> nspace , proc_b -> nspace , PMIX_MAX_NSLEN );
106+ if (nspace_dif != 0 )
107+ return nspace_dif ;
108+
109+ return proc_a -> rank - proc_b -> rank ;
110+ }
111+
100112int ompi_dpm_connect_accept (ompi_communicator_t * comm , int root ,
101113 const char * port_string , bool send_first ,
102114 ompi_communicator_t * * newcomm )
@@ -378,6 +390,11 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
378390 * so that add_procs will not result in a slew of lookups */
379391 PMIX_INFO_CONSTRUCT (& tinfo );
380392 PMIX_INFO_LOAD (& tinfo , PMIX_TIMEOUT , & ompi_pmix_connect_timeout , PMIX_UINT32 );
393+
394+ /*
395+ * sort procs so that all ranks call PMIx_Connect() with the processes in same order
396+ */
397+ qsort (procs , nprocs , sizeof (pmix_proc_t ), compare_pmix_proc );
381398 pret = PMIx_Connect (procs , nprocs , & tinfo , 1 );
382399 PMIX_INFO_DESTRUCT (& tinfo );
383400 PMIX_PROC_FREE (procs , nprocs );
0 commit comments