@@ -171,8 +171,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
171171 OPAL_MODEX_RECV_VALUE (err , OPAL_PMIX_NODEID , & (proc -> super .proc_name ), & pval , OPAL_UINT32 );
172172 if ( OPAL_SUCCESS != err ) {
173173 opal_output (0 , "Unable to extract peer %s nodeid from the modex.\n" ,
174- OMPI_NAME_PRINT (& (proc -> super . proc_name )));
175- vpids [ i ] = colors [i ] = -1 ;
174+ OMPI_NAME_PRINT (& (proc -> super )));
175+ colors [i ] = -1 ;
176176 continue ;
177177 }
178178 vpids [i ] = colors [i ] = (int )val ;
@@ -396,38 +396,37 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
396396 * If weights have been provided take them in account. Otherwise rely
397397 * solely on HWLOC information.
398398 */
399- if (0 == rank ) {
399+ if ( 0 == rank ) {
400400
401+ #ifdef __DEBUG__
401402 fprintf (stderr ,"========== Centralized Reordering ========= \n" );
402-
403+ #endif
403404 local_pattern = (double * )calloc (size * size ,sizeof (double ));
404- if ( true == topo -> weighted ) {
405- for (i = 0 ; i < topo -> indegree ; i ++ )
406- local_pattern [topo -> in [i ]] += topo -> inw [i ];
407- for (i = 0 ; i < topo -> outdegree ; i ++ )
408- local_pattern [topo -> out [i ]] += topo -> outw [i ];
409- if (OMPI_SUCCESS != (err = comm_old -> c_coll -> coll_gather (MPI_IN_PLACE , size , MPI_DOUBLE ,
410- local_pattern , size , MPI_DOUBLE ,
411- 0 , comm_old ,
412- comm_old -> c_coll -> coll_gather_module )))
413- return err ;
414- }
415405 } else {
416406 local_pattern = (double * )calloc (size ,sizeof (double ));
417- if ( true == topo -> weighted ) {
418- for (i = 0 ; i < topo -> indegree ; i ++ )
419- local_pattern [topo -> in [i ]] += topo -> inw [i ];
420- for (i = 0 ; i < topo -> outdegree ; i ++ )
421- local_pattern [topo -> out [i ]] += topo -> outw [i ];
422- if (OMPI_SUCCESS != (err = comm_old -> c_coll -> coll_gather (local_pattern , size , MPI_DOUBLE ,
423- NULL ,0 ,0 ,
424- 0 , comm_old ,
425- comm_old -> c_coll -> coll_gather_module )))
426- return err ;
427- }
407+ }
408+ if ( true == topo -> weighted ) {
409+ for (i = 0 ; i < topo -> indegree ; i ++ )
410+ local_pattern [topo -> in [i ]] += topo -> inw [i ];
411+ for (i = 0 ; i < topo -> outdegree ; i ++ )
412+ local_pattern [topo -> out [i ]] += topo -> outw [i ];
413+ }
414+ if (0 == rank ) {
415+ err = comm_old -> c_coll -> coll_gather (MPI_IN_PLACE , size , MPI_DOUBLE ,
416+ local_pattern , size , MPI_DOUBLE ,
417+ 0 , comm_old ,
418+ comm_old -> c_coll -> coll_gather_module );
419+ } else {
420+ err = comm_old -> c_coll -> coll_gather (local_pattern , size , MPI_DOUBLE ,
421+ NULL ,0 ,0 ,
422+ 0 , comm_old ,
423+ comm_old -> c_coll -> coll_gather_module );
424+ }
425+ if (OMPI_SUCCESS != err ) {
426+ return err ;
428427 }
429428
430- if ( rank == local_procs [0 ]) {
429+ if ( rank == local_procs [0 ] ) {
431430 tm_topology_t * tm_topology = NULL ;
432431 tm_topology_t * tm_opt_topology = NULL ;
433432 int * obj_to_rank_in_comm = NULL ;
@@ -708,7 +707,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
708707 char set_as_string [64 ];
709708 opal_value_t kv ;
710709
711- if (OMPI_SUCCESS != (err = ompi_comm_split (comm_old ,colors [rank ],ompi_process_info .my_local_rank ,& localcomm , false)))
710+ if (OMPI_SUCCESS != (err = ompi_comm_split (comm_old , colors [rank ],
711+ ompi_process_info .my_local_rank ,
712+ & localcomm , false)))
712713 return err ;
713714
714715 for (i = 0 ; i < num_procs_in_node ; i ++ )
@@ -718,64 +719,64 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
718719 for (i = 0 ; i < size ; i ++ )
719720 grank_to_lrank [i ] = -1 ;
720721
721- if (OMPI_SUCCESS != (err = localcomm -> c_coll -> coll_allgather (& rank ,1 , MPI_INT ,
722- lrank_to_grank ,1 , MPI_INT ,
723- localcomm ,
724- localcomm -> c_coll -> coll_allgather_module )))
722+ if (OMPI_SUCCESS != (err = localcomm -> c_coll -> coll_allgather (& rank , 1 , MPI_INT ,
723+ lrank_to_grank , 1 , MPI_INT ,
724+ localcomm ,
725+ localcomm -> c_coll -> coll_allgather_module )))
725726 return err ;
726727
727728 for (i = 0 ; i < num_procs_in_node ; i ++ )
728729 grank_to_lrank [lrank_to_grank [i ]] = i ;
729730
730- if (rank == local_procs [0 ]){
731- tm_topology_t * tm_topology = NULL ;
732- tm_topology_t * tm_opt_topology = NULL ;
733- tree_t * comm_tree = NULL ;
734- double * * comm_pattern = NULL ;
735-
731+ /* Discover the local patterns */
732+ if (rank == local_procs [0 ]) {
736733#ifdef __DEBUG__
737734 fprintf (stderr ,"========== Partially Distributed Reordering ========= \n" );
738735#endif
736+ local_pattern = (double * )calloc (num_procs_in_node * num_procs_in_node ,sizeof (double ));
737+ } else {
738+ local_pattern = (double * )calloc (num_procs_in_node ,sizeof (double ));
739+ }
740+ for (i = 0 ; i < topo -> indegree ; i ++ )
741+ if (grank_to_lrank [topo -> in [i ]] != -1 )
742+ local_pattern [grank_to_lrank [topo -> in [i ]]] += topo -> inw [i ];
743+ for (i = 0 ; i < topo -> outdegree ; i ++ )
744+ if (grank_to_lrank [topo -> out [i ]] != -1 )
745+ local_pattern [grank_to_lrank [topo -> out [i ]]] += topo -> outw [i ];
746+ if (OMPI_SUCCESS != (err = localcomm -> c_coll -> coll_gather ((rank == local_procs [0 ] ? MPI_IN_PLACE : local_pattern ),
747+ num_procs_in_node , MPI_DOUBLE ,
748+ local_pattern , num_procs_in_node , MPI_DOUBLE ,
749+ 0 , localcomm ,
750+ localcomm -> c_coll -> coll_gather_module )))
751+ ERR_EXIT (err );
739752
740- local_pattern = (double * )calloc (num_procs_in_node * num_procs_in_node ,sizeof (double ));
741- for (i = 0 ; i < num_procs_in_node * num_procs_in_node ; i ++ )
742- local_pattern [i ] = 0.0 ;
743-
744- if ( true == topo -> weighted ) {
745- for (i = 0 ; i < topo -> indegree ; i ++ )
746- if (grank_to_lrank [topo -> in [i ]] != -1 )
747- local_pattern [grank_to_lrank [topo -> in [i ]]] += topo -> inw [i ];
748- for (i = 0 ; i < topo -> outdegree ; i ++ )
749- if (grank_to_lrank [topo -> out [i ]] != -1 )
750- local_pattern [grank_to_lrank [topo -> out [i ]]] += topo -> outw [i ];
751- if (OMPI_SUCCESS != (err = localcomm -> c_coll -> coll_gather (MPI_IN_PLACE , num_procs_in_node , MPI_DOUBLE ,
752- local_pattern , num_procs_in_node , MPI_DOUBLE ,
753- 0 ,localcomm ,
754- localcomm -> c_coll -> coll_gather_module )))
755- ERR_EXIT (err );
756- }
753+ /* The root has now the entire information, so let's crunch it */
754+ if (rank == local_procs [0 ]) {
755+ tm_topology_t * tm_topology = NULL ;
756+ tm_topology_t * tm_opt_topology = NULL ;
757+ tree_t * comm_tree = NULL ;
758+ double * * comm_pattern = NULL ;
757759
758760 comm_pattern = (double * * )malloc (num_procs_in_node * sizeof (double * ));
759761 for (i = 0 ; i < num_procs_in_node ; i ++ ){
760- comm_pattern [i ] = (double * )calloc (num_procs_in_node ,sizeof (double ));
761- memcpy ((void * )comm_pattern [i ],(void * )(local_pattern + i * num_procs_in_node ),num_procs_in_node * sizeof (double ));
762+ comm_pattern [i ] = (double * )calloc (num_procs_in_node , sizeof (double ));
763+ memcpy ((void * )comm_pattern [i ],
764+ (void * )(local_pattern + i * num_procs_in_node ),
765+ num_procs_in_node * sizeof (double ));
762766 }
763767 /* Matrix needs to be symmetric */
764768 for ( i = 0 ; i < num_procs_in_node ; i ++ )
765769 for (j = i ; j < num_procs_in_node ; j ++ ){
766- comm_pattern [i ][j ] += comm_pattern [j ][ i ] ;
767- comm_pattern [j ][i ] = comm_pattern [i ][j ];
770+ comm_pattern [i ][j ] = ( comm_pattern [i ][ j ] + comm_pattern [ j ][ i ]) / 2 ;
771+ comm_pattern [j ][i ] = comm_pattern [i ][j ];
768772 }
769- for ( i = 0 ; i < num_procs_in_node ; i ++ )
770- for (j = 0 ; j < num_procs_in_node ; j ++ )
771- comm_pattern [i ][j ] /= 2 ;
772773
773774#ifdef __DEBUG__
774775 fprintf (stdout ,"========== COMM PATTERN ============= \n" );
775776 for (i = 0 ; i < num_procs_in_node ; i ++ ){
776777 fprintf (stdout ," %i : " ,i );
777778 for (j = 0 ; j < num_procs_in_node ; j ++ )
778- fprintf (stdout ," %f " ,comm_pattern [i ][j ]);
779+ fprintf (stdout ," %f " , comm_pattern [i ][j ]);
779780 fprintf (stdout ,"\n" );
780781 }
781782 fprintf (stdout ,"======================= \n" );
@@ -830,24 +831,6 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
830831 free (tm_topology -> arity );
831832 free (tm_topology );
832833 FREE_topology (tm_opt_topology );
833- } else {
834- local_pattern = (double * )calloc (num_procs_in_node ,sizeof (double ));
835- for (i = 0 ; i < num_procs_in_node ; i ++ )
836- local_pattern [i ] = 0.0 ;
837-
838- if ( true == topo -> weighted ) {
839- for (i = 0 ; i < topo -> indegree ; i ++ )
840- if (grank_to_lrank [topo -> in [i ]] != -1 )
841- local_pattern [grank_to_lrank [topo -> in [i ]]] += topo -> inw [i ];
842- for (i = 0 ; i < topo -> outdegree ; i ++ )
843- if (grank_to_lrank [topo -> out [i ]] != -1 )
844- local_pattern [grank_to_lrank [topo -> out [i ]]] += topo -> outw [i ];
845- if (OMPI_SUCCESS != (err = localcomm -> c_coll -> coll_gather (local_pattern , num_procs_in_node , MPI_DOUBLE ,
846- NULL ,0 ,0 ,
847- 0 ,localcomm ,
848- localcomm -> c_coll -> coll_gather_module )))
849- ERR_EXIT (err );
850- }
851834 }
852835
853836 if (OMPI_SUCCESS != (err = localcomm -> c_coll -> coll_bcast (matching , num_procs_in_node ,
0 commit comments