33 * Copyright (c) 2011-2017 The University of Tennessee and The University
44 * of Tennessee Research Foundation. All rights
55 * reserved.
6- * Copyright (c) 2011-2016 INRIA . All rights reserved.
7- * Copyright (c) 2012-2017 Bordeaux Polytechnic Institute
6+ * Copyright (c) 2011-2018 Inria . All rights reserved.
7+ * Copyright (c) 2011-2018 Bordeaux Polytechnic Institute
88 * Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
99 * Copyright (c) 2015-2017 Research Organization for Information Science
1010 * and Technology (RIST). All rights reserved.
3636
3737#include "opal/mca/pmix/pmix.h"
3838
39- /* #define __DEBUG__ 1 */
39+ /* #define __DEBUG__ 1 */
4040
4141/**
4242 * This function is a allreduce between all processes to detect for oversubscription.
@@ -320,7 +320,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
320320 }
321321
322322 reqs = (MPI_Request * )calloc (num_procs_in_node - 1 , sizeof (MPI_Request ));
323- if ( rank == lindex_to_grank [0 ] ) { /* local leader clean the hierarchy */
323+ if ( rank == lindex_to_grank [0 ] ) { /* local leader cleans the hierarchy */
324324 int array_size = effective_depth + 1 ;
325325 int * myhierarchy = (int * )calloc (array_size , sizeof (int ));
326326
@@ -449,7 +449,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
449449
450450 for (i = 0 ; i < num_nodes ; i ++ )
451451 num_objs_total += objs_per_node [i ];
452- obj_mapping = (int * )calloc (num_objs_total ,sizeof (int ));
452+ obj_mapping = (int * )malloc (num_objs_total * sizeof (int ));
453+ for (i = 0 ; i < num_objs_total ; i ++ )
454+ obj_mapping [i ] = -1 ;
453455
454456 memcpy (obj_mapping , obj_to_rank_in_comm , objs_per_node [0 ]* sizeof (int ));
455457 displ = objs_per_node [0 ];
@@ -508,8 +510,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
508510
509511 for (i = 0 ; i < hierarchies [0 ]; i ++ )
510512 hierarchies [i + 1 ] = tracker [i ]-> arity ;
511- for (; i < (TM_MAX_LEVELS + 1 ); i ++ ) /* fill up everything else with -1 */
512- hierarchies [i ] = -1 ;
513+ for (; i < (TM_MAX_LEVELS + 1 ); i ++ ) /* fill up everything else with 0 */
514+ hierarchies [i ] = 0 ;
513515
514516 /* gather hierarchies iff more than 1 node! */
515517 if ( num_nodes > 1 ) {
@@ -592,32 +594,24 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
592594 for (i = 1 ; i < tm_topology -> nb_levels ; i ++ )
593595 tm_topology -> nb_nodes [i ] = tm_topology -> nb_nodes [i - 1 ] * tm_topology -> arity [i - 1 ];
594596
597+ #ifdef __DEBUG__
598+ assert (num_objs_total == (int )tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
599+ #endif
595600 /* Build process id tab */
596- tm_topology -> node_id = (int * * )calloc (tm_topology -> nb_levels , sizeof (int * ));
597- tm_topology -> node_rank = (int * * )malloc (sizeof (int * ) * tm_topology -> nb_levels );
598- for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ) {
599- tm_topology -> node_id [i ] = (int * )calloc (tm_topology -> nb_nodes [i ], sizeof (int ));
600- tm_topology -> node_rank [i ] = (int * )calloc (tm_topology -> nb_nodes [i ], sizeof (int ));
601- /*note : we make the hypothesis that logical indexes in hwloc range from
602- 0 to N, are contiguous and crescent. */
603-
604- for ( j = 0 ; j < (int )tm_topology -> nb_nodes [i ] ; j ++ ) {
605- tm_topology -> node_id [i ][j ] = j ;
606- tm_topology -> node_rank [i ][j ] = j ;
607-
608- /* Should use object->logical_index */
609- /* obj = hwloc_get_obj_by_depth(topo,i,j%num_objs_in_node);
610- id = obj->logical_index + (num_objs_in_node)*(j/num_obj_in_node)*/
611- /*
612- int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
613- topology->node_id[i][j] = id;
614- topology->node_rank[i][id] = j;
615- */
616- }
601+ tm_topology -> node_id = (int * )malloc (num_objs_total * sizeof (int ));
602+ tm_topology -> node_rank = (int * )malloc (num_objs_total * sizeof (int ));
603+ for ( i = 0 ; i < num_objs_total ; i ++ )
604+ tm_topology -> node_id [i ] = tm_topology -> node_rank [i ] = -1 ;
605+ /*note : we make the hypothesis that logical indexes in hwloc range from
606+ 0 to N, are contiguous and crescent. */
607+ for ( i = 0 ; i < num_objs_total ; i ++ ) {
608+ tm_topology -> node_id [i ] = obj_mapping [i ]; /* use process ranks instead of core numbers */
609+ if (obj_mapping [i ] != -1 ) /* so that k[i] is the new rank of process i */
610+ tm_topology -> node_rank [obj_mapping [i ]] = i ; /* after computation by TreeMatch */
617611 }
612+
618613 /* unused for now*/
619614 tm_topology -> cost = (double * )calloc (tm_topology -> nb_levels ,sizeof (double ));
620-
621615 tm_topology -> nb_proc_units = num_objs_total ;
622616
623617 tm_topology -> nb_constraints = 0 ;
@@ -627,22 +621,23 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
627621 tm_topology -> constraints = (int * )calloc (tm_topology -> nb_constraints ,sizeof (int ));
628622 for (idx = 0 , i = 0 ; i < tm_topology -> nb_proc_units ; i ++ )
629623 if (obj_mapping [i ] != -1 )
630- tm_topology -> constraints [idx ++ ] = obj_mapping [i ];
631-
624+ tm_topology -> constraints [idx ++ ] = obj_mapping [i ]; /* use process ranks instead of core numbers */
625+ #ifdef __DEBUG__
626+ assert (idx == tm_topology -> nb_constraints );
627+ #endif
632628 tm_topology -> oversub_fact = 1 ;
633629
634630#ifdef __DEBUG__
635- assert (num_objs_total == (int )tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
636-
631+ /*
637632 for(i = 0; i < tm_topology->nb_levels ; i++) {
638633 opal_output_verbose(10, ompi_topo_base_framework.framework_output,
639634 "tm topo node_id for level [%i] : ",i);
640635 dump_int_array(10, ompi_topo_base_framework.framework_output,
641636 "", "", obj_mapping, tm_topology->nb_nodes[i]);
642637 }
638+ */
643639 tm_display_topology (tm_topology );
644640#endif
645-
646641 comm_pattern = (double * * )malloc (size * sizeof (double * ));
647642 for (i = 0 ; i < size ; i ++ )
648643 comm_pattern [i ] = local_pattern + i * size ;
@@ -660,15 +655,14 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
660655 "" , "" , comm_pattern [i ], size );
661656 }
662657#endif
663- tm_optimize_topology (& tm_topology );
658+ // tm_optimize_topology(&tm_topology);
664659 aff_mat = tm_build_affinity_mat (comm_pattern ,size );
665660 comm_tree = tm_build_tree_from_topology (tm_topology ,aff_mat , NULL , NULL );
666661 sol = tm_compute_mapping (tm_topology , comm_tree );
667662
668663 k = (int * )calloc (sol -> k_length , sizeof (int ));
669664 for (idx = 0 ; idx < (int )sol -> k_length ; idx ++ )
670665 k [idx ] = sol -> k [idx ][0 ];
671-
672666#ifdef __DEBUG__
673667 opal_output_verbose (10 , ompi_topo_base_framework .framework_output ,
674668 "====> nb levels : %i\n" ,tm_topology -> nb_levels );
@@ -690,6 +684,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
690684
691685 /* Todo : Bcast + group creation */
692686 /* scatter the ranks */
687+ /* don't need to convert k from local rank to global rank */
693688 if (OMPI_SUCCESS != (err = comm_old -> c_coll -> coll_scatter (k , 1 , MPI_INT ,
694689 & newrank , 1 , MPI_INT ,
695690 0 , comm_old ,
@@ -770,6 +765,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
770765 tm_solution_t * sol = NULL ;
771766 tm_affinity_mat_t * aff_mat = NULL ;
772767 double * * comm_pattern = NULL ;
768+ int * obj_to_rank_in_lcomm = NULL ;
773769
774770 comm_pattern = (double * * )malloc (num_procs_in_node * sizeof (double * ));
775771 for ( i = 0 ; i < num_procs_in_node ; i ++ ) {
@@ -800,35 +796,57 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
800796 tm_topology -> nb_levels = numlevels ;
801797 tm_topology -> arity = (int * )calloc (tm_topology -> nb_levels , sizeof (int ));
802798 tm_topology -> nb_nodes = (size_t * )calloc (tm_topology -> nb_levels , sizeof (size_t ));
803- tm_topology -> node_id = (int * * )malloc (tm_topology -> nb_levels * sizeof (int * ));
804- tm_topology -> node_rank = (int * * )malloc (tm_topology -> nb_levels * sizeof (int * ));
805-
799+
806800 for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ){
807801 int nb_objs = hwloc_get_nbobjs_by_depth (opal_hwloc_topology , tracker [i ]-> depth );
808802 tm_topology -> nb_nodes [i ] = nb_objs ;
809803 tm_topology -> arity [i ] = tracker [i ]-> arity ;
810- tm_topology -> node_id [i ] = (int * )calloc (tm_topology -> nb_nodes [i ], sizeof (int ));
811- tm_topology -> node_rank [i ] = (int * )calloc (tm_topology -> nb_nodes [i ], sizeof (int ));
812- for (j = 0 ; j < (int )tm_topology -> nb_nodes [i ] ; j ++ ){
813- tm_topology -> node_id [i ][j ] = j ;
814- tm_topology -> node_rank [i ][j ] = j ;
815- }
816804 }
817805
806+
807+ #ifdef __DEBUG__
808+ assert (num_objs_in_node == (int )tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
809+ #endif
810+ /* create a table that derives the rank in local (node) comm from the object number */
811+ obj_to_rank_in_lcomm = (int * )malloc (num_objs_in_node * sizeof (int ));
812+ for (i = 0 ; i < num_objs_in_node ; i ++ ) {
813+ obj_to_rank_in_lcomm [i ] = -1 ;
814+ object = hwloc_get_obj_by_depth (opal_hwloc_topology , effective_depth , i );
815+ for ( j = 0 ; j < num_procs_in_node ; j ++ )
816+ if (localrank_to_objnum [j ] == (int )(object -> logical_index )) {
817+ obj_to_rank_in_lcomm [i ] = j ;
818+ break ;
819+ }
820+ }
821+
822+ /* Build process id tab */
823+ tm_topology -> node_id = (int * )malloc (num_objs_in_node * sizeof (int ));
824+ tm_topology -> node_rank = (int * )malloc (num_objs_in_node * sizeof (int ));
825+ for (i = 1 ; i < num_objs_in_node ; i ++ )
826+ tm_topology -> node_id [i ] = tm_topology -> node_rank [i ] = -1 ;
827+
828+ for ( i = 0 ; i < num_objs_in_node ; i ++ ) {
829+ /*note : we make the hypothesis that logical indexes in hwloc range from
830+ 0 to N, are contiguous and crescent. */
831+ tm_topology -> node_id [i ] = obj_to_rank_in_lcomm [i ];
832+ if ( obj_to_rank_in_lcomm [i ] != -1 )
833+ tm_topology -> node_rank [obj_to_rank_in_lcomm [i ]] = i ;
834+ }
835+
818836 /* unused for now*/
819837 tm_topology -> cost = (double * )calloc (tm_topology -> nb_levels ,sizeof (double ));
820838
821839 tm_topology -> nb_proc_units = num_objs_in_node ;
822- //tm_topology->nb_proc_units = num_procs_in_node;
823840 tm_topology -> nb_constraints = 0 ;
824- for (i = 0 ; i < num_procs_in_node ; i ++ )
825- if (localrank_to_objnum [i ] != -1 )
841+
842+ for (i = 0 ; i < num_objs_in_node ; i ++ )
843+ if (obj_to_rank_in_lcomm [i ] != -1 )
826844 tm_topology -> nb_constraints ++ ;
827-
845+
828846 tm_topology -> constraints = (int * )calloc (tm_topology -> nb_constraints ,sizeof (int ));
829- for (idx = 0 ,i = 0 ; i < num_procs_in_node ; i ++ )
830- if (localrank_to_objnum [i ] != -1 )
831- tm_topology -> constraints [idx ++ ] = localrank_to_objnum [i ];
847+ for (idx = 0 ,i = 0 ; i < num_objs_in_node ; i ++ )
848+ if (obj_to_rank_in_lcomm [i ] != -1 )
849+ tm_topology -> constraints [idx ++ ] = obj_to_rank_in_lcomm [i ];
832850
833851 tm_topology -> oversub_fact = 1 ;
834852
@@ -841,12 +859,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
841859 OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
842860 "Nb objs for level %i : %lu | arity %i\n " ,
843861 i , tm_topology -> nb_nodes [i ],tm_topology -> arity [i ]));
844- dump_int_array (10 , ompi_topo_base_framework .framework_output ,
845- "" , "Obj id " , tm_topology -> node_id [i ], tm_topology -> nb_nodes [i ]);
846862 }
863+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
864+ "" , "Obj id " , tm_topology -> node_id , tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
847865 tm_display_topology (tm_topology );
848866#endif
849- tm_optimize_topology (& tm_topology );
867+ // tm_optimize_topology(&tm_topology);
850868 aff_mat = tm_build_affinity_mat (comm_pattern ,num_procs_in_node );
851869 comm_tree = tm_build_tree_from_topology (tm_topology ,aff_mat , NULL , NULL );
852870 sol = tm_compute_mapping (tm_topology , comm_tree );
@@ -866,15 +884,15 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
866884 dump_int_array (10 , ompi_topo_base_framework .framework_output ,
867885 "Matching : " , "" , sol -> sigma , sol -> sigma_length );
868886#endif
869-
887+ free ( obj_to_rank_in_lcomm );
870888 free (aff_mat -> sum_row );
871889 free (aff_mat );
872890 free (comm_pattern );
873891 tm_free_solution (sol );
874892 tm_free_tree (comm_tree );
875893 tm_free_topology (tm_topology );
876894 }
877-
895+
878896 /* Todo : Bcast + group creation */
879897 /* scatter the ranks */
880898 if (OMPI_SUCCESS != (err = localcomm -> c_coll -> coll_scatter (k , 1 , MPI_INT ,
0 commit comments