@@ -97,6 +97,18 @@ int ompi_dpm_init(void)
97
97
return OMPI_SUCCESS ;
98
98
}
99
99
100
+ static int compare_pmix_proc (const void * a , const void * b )
101
+ {
102
+ const pmix_proc_t * proc_a = (pmix_proc_t * )a ;
103
+ const pmix_proc_t * proc_b = (pmix_proc_t * )b ;
104
+
105
+ int nspace_dif = strncmp (proc_a -> nspace , proc_b -> nspace , PMIX_MAX_NSLEN );
106
+ if (nspace_dif != 0 )
107
+ return nspace_dif ;
108
+
109
+ return proc_a -> rank - proc_b -> rank ;
110
+ }
111
+
100
112
int ompi_dpm_connect_accept (ompi_communicator_t * comm , int root ,
101
113
const char * port_string , bool send_first ,
102
114
ompi_communicator_t * * newcomm )
@@ -378,6 +390,11 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
378
390
* so that add_procs will not result in a slew of lookups */
379
391
PMIX_INFO_CONSTRUCT (& tinfo );
380
392
PMIX_INFO_LOAD (& tinfo , PMIX_TIMEOUT , & ompi_pmix_connect_timeout , PMIX_UINT32 );
393
+
394
+ /*
395
+ * sort procs so that all ranks call PMIx_Connect() with the processes in same order
396
+ */
397
+ qsort (procs , nprocs , sizeof (pmix_proc_t ), compare_pmix_proc );
381
398
pret = PMIx_Connect (procs , nprocs , & tinfo , 1 );
382
399
PMIX_INFO_DESTRUCT (& tinfo );
383
400
PMIX_PROC_FREE (procs , nprocs );
0 commit comments