3636
3737#include "opal/mca/pmix/pmix.h"
3838
39- /* #define __DEBUG__ 1 */
39+ /* #define __DEBUG__ 1 */
4040
4141/**
4242 * This function is a allreduce between all processes to detect for oversubscription.
@@ -72,23 +72,25 @@ static int check_oversubscribing(int rank,
7272}
7373
7474#ifdef __DEBUG__
75- static void dump_int_array ( char * prolog , char * line_prolog , int * array , size_t length )
75+ static void dump_int_array ( int level , int output_id , char * prolog , char * line_prolog , int * array , size_t length )
7676{
7777 size_t i ;
78+ if ( -1 == output_id ) return ;
7879
79- fprintf ( stdout , "%s : " , prolog );
80+ opal_output_verbose ( level , output_id , "%s : " , prolog );
8081 for (i = 0 ; i < length ; i ++ )
81- fprintf ( stdout , "%s [%lu:%i] " , line_prolog , i , array [i ]);
82- fprintf ( stdout , "\n" );
82+ opal_output_verbose ( level , output_id , "%s [%lu:%i] " , line_prolog , i , array [i ]);
83+ opal_output_verbose ( level , output_id , "\n" );
8384}
84- static void dump_double_array ( char * prolog , char * line_prolog , double * array , size_t length )
85+ static void dump_double_array ( int level , int output_id , char * prolog , char * line_prolog , double * array , size_t length )
8586{
8687 size_t i ;
8788
88- fprintf (stdout ,"%s : " , prolog );
89+ if ( -1 == output_id ) return ;
90+ opal_output_verbose (level , output_id , "%s : " , prolog );
8991 for (i = 0 ; i < length ; i ++ )
90- fprintf ( stdout , "%s [%lu:%lf] " , line_prolog , i , array [i ]);
91- fprintf ( stdout , "\n" );
92+ opal_output_verbose ( level , output_id , "%s [%lu:%lf] " , line_prolog , i , array [i ]);
93+ opal_output_verbose ( level , output_id , "\n" );
9294}
9395#endif
9496
@@ -152,9 +154,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
152154 rank = ompi_comm_rank (comm_old );
153155 size = ompi_comm_size (comm_old );
154156
155- #ifdef __DEBUG__
156- fprintf (stdout ,"Process rank is : %i\n" ,rank );
157- #endif
157+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
158+ "Process rank is : %i\n" ,rank ));
158159 /**
159160 * In order to decrease the number of loops let's use a trick:
160161 * build the lindex_to_grank in the vpids array, and only allocate
@@ -184,8 +185,10 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
184185
185186#ifdef __DEBUG__
186187 if ( 0 == rank ) {
187- dump_int_array ("lindex_to_grank : " , "" , lindex_to_grank , num_procs_in_node );
188- dump_int_array ("Vpids : " , "" , colors , size );
188+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
189+ "lindex_to_grank : " , "" , lindex_to_grank , num_procs_in_node );
190+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
191+ "Vpids : " , "" , colors , size );
189192 }
190193#endif
191194 /* clean-up dupes in the array */
@@ -210,9 +213,11 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
210213 for (i = idx = 0 ; i < size ; i ++ )
211214 if ( vpids [i ] != -1 )
212215 nodes_roots [idx ++ ] = i ;
216+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
217+ "num nodes is %i\n" , num_nodes ));
213218#ifdef __DEBUG__
214- fprintf ( stdout , "num nodes is %i\n" , num_nodes );
215- dump_int_array ( "Root nodes are :\n" , "root " , nodes_roots , num_nodes );
219+ dump_int_array ( 10 , ompi_topo_base_framework . framework_output ,
220+ "Root nodes are :\n" , "root " , nodes_roots , num_nodes );
216221#endif
217222 }
218223 free (vpids );
@@ -229,10 +234,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
229234 */
230235
231236 if (hwloc_bitmap_isincluded (root_obj -> cpuset ,set )) { /* processes are not bound on the machine */
232- #ifdef __DEBUG__
233237 if (0 == rank )
234- fprintf ( stdout , ">>>>>>>>>>>>> Process Not bound <<<<<<<<<<<<<<<\n" );
235- #endif /* __DEBUG__ */
238+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_topo_base_framework . framework_output ,
239+ ">>>>>>>>>>>>> Process Not bound <<<<<<<<<<<<<<<\n" ));
236240
237241 /* we try to bind to cores or above objects if enough are present */
238242 /* Not sure that cores are present in ALL nodes */
@@ -255,9 +259,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
255259
256260 if (oversubscribing_objs ) {
257261 if (hwloc_bitmap_isincluded (root_obj -> cpuset , set )) { /* processes are not bound on the machine */
258- #ifdef __DEBUG__
259- fprintf ( stdout , "Oversubscribing OBJ/CORES resources => Trying to use PUs \n" );
260- #endif
262+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_topo_base_framework . framework_output ,
263+ "Oversubscribing OBJ/CORES resources => Trying to use PUs \n" ) );
264+
261265 oversubscribed_pus = check_oversubscribing (rank , num_nodes ,
262266 num_pus_in_node , num_procs_in_node ,
263267 nodes_roots , lindex_to_grank , comm_old );
@@ -266,9 +270,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
266270 obj_rank = ompi_process_info .my_local_rank %num_pus_in_node ;
267271 effective_depth = hwloc_topology_get_depth (opal_hwloc_topology ) - 1 ;
268272 num_objs_in_node = num_pus_in_node ;
269- #ifdef __DEBUG__
270- fprintf (stdout , "Process not bound : binding on PU#%i \n" , obj_rank );
271- #endif
273+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
274+ "Process %i not bound : binding on PU#%i \n" , rank , obj_rank ));
272275 }
273276 } else {
274277 /* Bound processes will participate with the same data as before */
@@ -293,23 +296,24 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
293296 hwloc_bitmap_singlify (set ); /* we don't want the process to move */
294297 hwloc_err = hwloc_set_cpubind (opal_hwloc_topology , set , 0 );
295298 if ( -1 == hwloc_err ) {
296- free (colors );
297- hwloc_bitmap_free (set );
298- goto fallback ; /* return with success */
299- }
300- #ifdef __DEBUG__
301- fprintf (stdout ,"Process not bound : binding on OBJ#%i \n" ,obj_rank );
302- #endif
299+ /* This is a local issue. Either we agree with the rest of the processes to stop the
300+ * reordering or we have to complete the entire process. Let's complete.
301+ */
302+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
303+ "Process %i failed to bind on OBJ#%i \n" , rank , obj_rank ));
304+ } else
305+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
306+ "Process %i not bound : binding on OBJ#%i \n" ,rank , obj_rank ));
303307 } else {
304- #ifdef __DEBUG__
305- fprintf (stdout , "Process %i bound on OBJ #%i \n" , rank , obj_rank );
306- fprintf (stdout , "=====> Num obj in node : %i | num pus in node : %i\n" , num_objs_in_node , num_pus_in_node );
307- #endif
308+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
309+ "Process %i bound on OBJ #%i \n"
310+ "=====> Num obj in node : %i | num pus in node : %i\n" ,
311+ rank , obj_rank ,
312+ num_objs_in_node , num_pus_in_node ));
308313 }
309314 } else {
310- #ifdef __DEBUG__
311- fprintf (stdout , "Oversubscribing PUs resources => Rank Reordering Impossible \n" );
312- #endif
315+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
316+ "Oversubscribing PUs resources => Rank Reordering Impossible \n" ));
313317 free (colors );
314318 hwloc_bitmap_free (set );
315319 goto fallback ; /* return with success */
@@ -324,9 +328,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
324328 myhierarchy [0 ] = hwloc_get_nbobjs_by_depth (opal_hwloc_topology , 0 );
325329 for (i = 1 ; i < array_size ; i ++ ) {
326330 myhierarchy [i ] = hwloc_get_nbobjs_by_depth (opal_hwloc_topology , i );
327- #ifdef __DEBUG__
328- fprintf (stdout ,"hierarchy[%i] = %i\n" , i , myhierarchy [i ]);
329- #endif
331+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
332+ "hierarchy[%i] = %i\n" , i , myhierarchy [i ]));
330333 if ((myhierarchy [i ] != 0 ) && (myhierarchy [i ] != myhierarchy [i - 1 ]))
331334 numlevels ++ ;
332335 }
@@ -339,12 +342,14 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
339342 tracker [idx ] = hwloc_get_obj_by_depth (opal_hwloc_topology , effective_depth , 0 );
340343 free (myhierarchy );
341344
342- #ifdef __DEBUG__
343- fprintf (stdout , ">>>>>>>>>>>>>>>>>>>>> Effective depth is : %i (total depth %i)| num_levels %i\n" ,
344- effective_depth , hwloc_topology_get_depth (opal_hwloc_topology ), numlevels );
345- for (i = 0 ; i < numlevels ; i ++ )
346- fprintf (stdout , "tracker[%i] : arity %i | depth %i\n" , i , tracker [i ]-> arity , tracker [i ]-> depth );
347- #endif
345+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
346+ ">>>>>>>>>>>>>>>>>>>>> Effective depth is : %i (total depth %i)| num_levels %i\n" ,
347+ effective_depth , hwloc_topology_get_depth (opal_hwloc_topology ), numlevels ));
348+ for (i = 0 ; i < numlevels ; i ++ ) {
349+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
350+ "tracker[%i] : arity %i | depth %i\n" ,
351+ i , tracker [i ]-> arity , tracker [i ]-> depth ));
352+ }
348353 /* get the obj number */
349354 localrank_to_objnum = (int * )calloc (num_procs_in_node , sizeof (int ));
350355 localrank_to_objnum [0 ] = obj_rank ;
@@ -383,9 +388,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
383388 */
384389 if ( 0 == rank ) {
385390
386- #ifdef __DEBUG__
387- fprintf (stderr ,"========== Centralized Reordering ========= \n" );
388- #endif
391+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
392+ "========== Centralized Reordering ========= \n" ));
389393 local_pattern = (double * )calloc (size * size ,sizeof (double ));
390394 } else {
391395 local_pattern = (double * )calloc (size ,sizeof (double ));
@@ -474,7 +478,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
474478 memcpy (obj_mapping , obj_to_rank_in_comm , num_objs_total * sizeof (int ));
475479 }
476480#ifdef __DEBUG__
477- dump_int_array ( "Obj mapping : " , "" , obj_mapping , num_objs_total );
481+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
482+ "Obj mapping : " , "" , obj_mapping , num_objs_total );
478483#endif
479484 } else {
480485 if ( num_nodes > 1 ) {
@@ -539,7 +544,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
539544 double * * comm_pattern = NULL ;
540545
541546#ifdef __DEBUG__
542- dump_int_array ("hierarchies : " , "" , hierarchies , num_nodes * (TM_MAX_LEVELS + 1 ));
547+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
548+ "hierarchies : " , "" , hierarchies , num_nodes * (TM_MAX_LEVELS + 1 ));
543549#endif
544550 tm_topology = (tm_topology_t * )malloc (sizeof (tm_topology_t ));
545551 tm_topology -> nb_levels = hierarchies [0 ];
@@ -574,10 +580,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
574580 tm_topology -> arity [i ] = hierarchies [i + 1 ];
575581 }
576582 free (hierarchies );
577- #ifdef __DEBUG__
578- for (i = 0 ; i < tm_topology -> nb_levels ; i ++ )
579- fprintf (stdout ,"topo_arity[%i] = %i\n" , i , tm_topology -> arity [i ]);
580- #endif
583+
584+ for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ) {
585+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
586+ "topo_arity[%i] = %i\n" , i , tm_topology -> arity [i ]));
587+ }
588+
581589 /* compute the number of processing elements */
582590 tm_topology -> nb_nodes = (size_t * )calloc (tm_topology -> nb_levels , sizeof (size_t ));
583591 tm_topology -> nb_nodes [0 ] = 1 ;
@@ -624,11 +632,13 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
624632 tm_topology -> oversub_fact = 1 ;
625633
626634#ifdef __DEBUG__
627- assert (num_objs_total == tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
635+ assert (num_objs_total == ( int ) tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
628636
629637 for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ) {
630- fprintf (stdout ,"tm topo node_id for level [%i] : " ,i );
631- dump_int_array ("" , "" , obj_mapping , tm_topology -> nb_nodes [i ]);
638+ opal_output_verbose (10 , ompi_topo_base_framework .framework_output ,
639+ "tm topo node_id for level [%i] : " ,i );
640+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
641+ "" , "" , obj_mapping , tm_topology -> nb_nodes [i ]);
632642 }
633643 tm_display_topology (tm_topology );
634644#endif
@@ -643,27 +653,30 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
643653 comm_pattern [j ][i ] = comm_pattern [i ][j ];
644654 }
645655#ifdef __DEBUG__
646- fprintf (stdout ,"==== COMM PATTERN ====\n" );
656+ opal_output_verbose (10 , ompi_topo_base_framework .framework_output ,
657+ "==== COMM PATTERN ====\n" );
647658 for ( i = 0 ; i < size ; i ++ ) {
648- dump_double_array ("" , "" , comm_pattern [i ], size );
659+ dump_double_array (10 , ompi_topo_base_framework .framework_output ,
660+ "" , "" , comm_pattern [i ], size );
649661 }
650662#endif
651663 tm_optimize_topology (& tm_topology );
652664 aff_mat = tm_build_affinity_mat (comm_pattern ,size );
653665 comm_tree = tm_build_tree_from_topology (tm_topology ,aff_mat , NULL , NULL );
654666 sol = tm_compute_mapping (tm_topology , comm_tree );
655667
656- assert ((int )sol -> k_length == size );
657-
658668 k = (int * )calloc (sol -> k_length , sizeof (int ));
659669 for (idx = 0 ; idx < (int )sol -> k_length ; idx ++ )
660670 k [idx ] = sol -> k [idx ][0 ];
661671
662672#ifdef __DEBUG__
663- fprintf (stdout ,"====> nb levels : %i\n" ,tm_topology -> nb_levels );
664- dump_int_array ("Rank permutation sigma/k : " , "" , k , num_objs_total );
665- assert (size == sol -> sigma_length );
666- dump_int_array ("Matching : " , "" ,sol -> sigma , sol -> sigma_length );
673+ opal_output_verbose (10 , ompi_topo_base_framework .framework_output ,
674+ "====> nb levels : %i\n" ,tm_topology -> nb_levels );
675+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
676+ "Rank permutation sigma/k : " , "" , k , num_objs_total );
677+ assert (size == (int )sol -> sigma_length );
678+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
679+ "Matching : " , "" ,sol -> sigma , sol -> sigma_length );
667680#endif
668681 free (obj_mapping );
669682 free (comm_pattern );
@@ -725,9 +738,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
725738
726739 /* Discover the local patterns */
727740 if (rank == lindex_to_grank [0 ]) {
728- #ifdef __DEBUG__
729- fprintf (stderr ,"========== Partially Distributed Reordering ========= \n" );
730- #endif
741+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
742+ "========== Partially Distributed Reordering ========= \n" ));
731743 local_pattern = (double * )calloc (num_procs_in_node * num_procs_in_node , sizeof (double ));
732744 } else {
733745 local_pattern = (double * )calloc (num_procs_in_node , sizeof (double ));
@@ -773,12 +785,15 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
773785 }
774786
775787#ifdef __DEBUG__
776- fprintf (stdout ,"========== COMM PATTERN ============= \n" );
788+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
789+ "========== COMM PATTERN ============= \n" ));
777790 for (i = 0 ; i < num_procs_in_node ; i ++ ){
778- fprintf (stdout ," %i : " ,i );
779- dump_double_array ("" , "" , comm_pattern [i ], num_procs_in_node );
791+ opal_output_verbose (10 , ompi_topo_base_framework .framework_output ," %i : " ,i );
792+ dump_double_array (10 , ompi_topo_base_framework .framework_output ,
793+ "" , "" , comm_pattern [i ], num_procs_in_node );
780794 }
781- fprintf (stdout ,"======================= \n" );
795+ opal_output_verbose (10 , ompi_topo_base_framework .framework_output ,
796+ "======================= \n" );
782797#endif
783798
784799 tm_topology = (tm_topology_t * )malloc (sizeof (tm_topology_t ));
@@ -818,11 +833,16 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
818833 tm_topology -> oversub_fact = 1 ;
819834
820835#ifdef __DEBUG__
821- assert (num_objs_in_node == tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
822- fprintf (stdout ,"Levels in topo : %i | num procs in node : %i\n" ,tm_topology -> nb_levels ,num_procs_in_node );
823- for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ){
824- fprintf (stdout ,"Nb objs for level %i : %i | arity %i\n " ,i ,tm_topology -> nb_nodes [i ],tm_topology -> arity [i ]);
825- dump_int_array ("" , "Obj id " , tm_topology -> node_id [i ], tm_topology -> nb_nodes [i ]);
836+ assert (num_objs_in_node == (int )tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
837+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
838+ "Levels in topo : %i | num procs in node : %i\n" ,
839+ tm_topology -> nb_levels ,num_procs_in_node ));
840+ for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ) {
841+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
842+ "Nb objs for level %i : %lu | arity %i\n " ,
843+ i , tm_topology -> nb_nodes [i ],tm_topology -> arity [i ]));
844+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
845+ "" , "Obj id " , tm_topology -> node_id [i ], tm_topology -> nb_nodes [i ]);
826846 }
827847 tm_display_topology (tm_topology );
828848#endif
@@ -831,17 +851,20 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
831851 comm_tree = tm_build_tree_from_topology (tm_topology ,aff_mat , NULL , NULL );
832852 sol = tm_compute_mapping (tm_topology , comm_tree );
833853
834- assert ((int )sol -> k_length == num_procs_in_node );
854+ assert ((int )sol -> k_length == num_objs_in_node );
835855
836856 k = (int * )calloc (sol -> k_length , sizeof (int ));
837857 for (idx = 0 ; idx < (int )sol -> k_length ; idx ++ )
838858 k [idx ] = sol -> k [idx ][0 ];
839859
840860#ifdef __DEBUG__
841- fprintf (stdout ,"====> nb levels : %i\n" ,tm_topology -> nb_levels );
842- dump_int_array ("Rank permutation sigma/k : " , "" , k , num_procs_in_node );
843- assert (num_procs_in_node == sol -> sigma_length );
844- dump_int_array ("Matching : " , "" , sol -> sigma , sol -> sigma_length );
861+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
862+ "====> nb levels : %i\n" ,tm_topology -> nb_levels ));
863+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
864+ "Rank permutation sigma/k : " , "" , k , num_procs_in_node );
865+ assert (num_procs_in_node == (int )sol -> sigma_length );
866+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
867+ "Matching : " , "" , sol -> sigma , sol -> sigma_length );
845868#endif
846869
847870 free (aff_mat -> sum_row );
0 commit comments