@@ -2363,6 +2363,13 @@ static float get_ib_dev_distance(struct ibv_device *dev)
23632363 goto out ;
23642364 }
23652365
2366+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
2367+ "hwloc_distances->nbobjs=%d" , hwloc_distances -> nbobjs );
2368+ for (i = 0 ; i < (int )(2 * hwloc_distances -> nbobjs ); i ++ ) {
2369+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
2370+ "hwloc_distances->latency[%d]=%f" , i , hwloc_distances -> latency [i ]);
2371+ }
2372+
23662373 /* If ibv_obj is a NUMA node or below, we're good. */
23672374 switch (ibv_obj -> type ) {
23682375 case HWLOC_OBJ_NODE :
@@ -2378,6 +2385,7 @@ static float get_ib_dev_distance(struct ibv_device *dev)
23782385 default :
23792386 /* If it's above a NUMA node, then I don't know how to compute
23802387 the distance... */
2388+ opal_output_verbose (5 , opal_btl_base_framework .framework_output , "ibv_obj->type set to NULL" );
23812389 ibv_obj = NULL ;
23822390 break ;
23832391 }
@@ -2387,6 +2395,8 @@ static float get_ib_dev_distance(struct ibv_device *dev)
23872395 goto out ;
23882396 }
23892397
2398+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
2399+ "ibv_obj->logical_index=%d" , ibv_obj -> logical_index );
23902400 /* This function is only called if the process is bound, so let's
23912401 find out where we are bound to. For the moment, we only care
23922402 about the NUMA node to which we are bound. */
@@ -2413,6 +2423,8 @@ static float get_ib_dev_distance(struct ibv_device *dev)
24132423 my_obj = my_obj -> parent ;
24142424 }
24152425 if (NULL != my_obj ) {
2426+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
2427+ "my_obj->logical_index=%d" , my_obj -> logical_index );
24162428 /* Distance may be asymetrical, so calculate both of them
24172429 and take the max */
24182430 a = hwloc_distances -> latency [my_obj -> logical_index +
@@ -2472,6 +2484,8 @@ sort_devs_by_distance(struct ibv_device **ib_devs, int count)
24722484
24732485 for (i = 0 ; i < count ; i ++ ) {
24742486 devs [i ].ib_dev = ib_devs [i ];
2487+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
2488+ "Checking distance from this process to device=%s" , ibv_get_device_name (ib_devs [i ]));
24752489 /* If we're not bound, just assume that the device is close. */
24762490 devs [i ].distance = 0 ;
24772491#if OPAL_HAVE_HWLOC
@@ -2481,6 +2495,9 @@ sort_devs_by_distance(struct ibv_device **ib_devs, int count)
24812495 devs [i ].distance = get_ib_dev_distance (ib_devs [i ]);
24822496 }
24832497#endif
2498+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
2499+ "Process is %s: distance to device is %f" ,
2500+ (opal_process_info .cpuset ? "bound" : "not bound" ), devs [i ].distance );
24842501 }
24852502
24862503 qsort (devs , count , sizeof (struct dev_distance ), compare_distance );
0 commit comments