5
5
* Copyright (c) 2020-2022 Triad National Security, LLC. All rights
6
6
* reserved.
7
7
* Copyright (c) 2020-2021 Cisco Systems, Inc. All rights reserved.
8
- * Copyright (c) 2021 Nanook Consulting. All rights reserved.
8
+ * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
9
9
* Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights
10
10
* reserved.
11
11
* Copyright (c) 2023 UT-Battelle, LLC. All rights reserved.
@@ -469,35 +469,42 @@ static int check_provider_attr(struct fi_info *provider_info, struct fi_info *pr
469
469
static int compute_dev_distances (pmix_device_distance_t * * distances ,
470
470
size_t * ndist )
471
471
{
472
- int ret = 0 ;
472
+ int ret = OPAL_SUCCESS ;
473
473
size_t ninfo ;
474
474
pmix_info_t * info ;
475
475
pmix_cpuset_t cpuset ;
476
- pmix_topology_t * pmix_topo ;
476
+ pmix_topology_t pmix_topo = PMIX_TOPOLOGY_STATIC_INIT ;
477
477
pmix_device_type_t type = PMIX_DEVTYPE_OPENFABRICS |
478
478
PMIX_DEVTYPE_NETWORK ;
479
479
480
480
PMIX_CPUSET_CONSTRUCT (& cpuset );
481
481
ret = PMIx_Get_cpuset (& cpuset , PMIX_CPUBIND_THREAD );
482
482
if (PMIX_SUCCESS != ret ) {
483
+ /* we are not bound */
484
+ ret = OPAL_ERR_NOT_BOUND ;
483
485
goto out ;
484
486
}
487
+ /* if we are not bound, then we cannot compute distances */
488
+ if (hwloc_bitmap_iszero (cpuset .bitmap ) ||
489
+ hwloc_bitmap_isfull (cpuset .bitmap )) {
490
+ return OPAL_ERR_NOT_BOUND ;
491
+ }
485
492
486
- /* load the PMIX topology */
487
- PMIx_Topology_free (pmix_topo , 1 );
488
- ret = PMIx_Load_topology (pmix_topo );
493
+ /* load the PMIX topology - this just loads a pointer to
494
+ * the local topology held in PMIx, so you must not
495
+ * free it */
496
+ ret = PMIx_Load_topology (& pmix_topo );
489
497
if (PMIX_SUCCESS != ret ) {
490
498
goto out ;
491
499
}
492
500
493
501
ninfo = 1 ;
494
502
info = PMIx_Info_create (ninfo );
495
503
PMIx_Info_load (& info [0 ], PMIX_DEVICE_TYPE , & type , PMIX_DEVTYPE );
496
- ret = PMIx_Compute_distances (pmix_topo , & cpuset , info , ninfo , distances ,
504
+ ret = PMIx_Compute_distances (& pmix_topo , & cpuset , info , ninfo , distances ,
497
505
ndist );
498
506
PMIx_Info_free (info , ninfo );
499
507
500
- PMIx_Topology_free (pmix_topo , 1 );
501
508
out :
502
509
return ret ;
503
510
}
@@ -533,8 +540,9 @@ get_nearest_nics(int *num_distances, pmix_value_t **valin)
533
540
PMIx_Info_destruct (& directive );
534
541
if (ret != PMIX_SUCCESS || !val ) {
535
542
ret = compute_dev_distances (& distances , & ndist );
536
- if (ret )
543
+ if (ret ) {
537
544
goto out ;
545
+ }
538
546
goto find_nearest ;
539
547
}
540
548
@@ -554,8 +562,9 @@ get_nearest_nics(int *num_distances, pmix_value_t **valin)
554
562
555
563
find_nearest :
556
564
nearest = calloc (sizeof (* distances ), ndist );
557
- if (!nearest )
565
+ if (!nearest ) {
558
566
goto out ;
567
+ }
559
568
560
569
for (i = 0 ; i < ndist ; i ++ ) {
561
570
if (distances [i ].type != PMIX_DEVTYPE_NETWORK &&
@@ -596,6 +605,15 @@ get_nearest_nics(int *num_distances, pmix_value_t **valin)
596
605
* distances array is not provided. False otherwise.
597
606
*
598
607
*/
608
+ #if HWLOC_API_VERSION < 0x00020000
609
+ static bool is_near (pmix_device_distance_t * distances ,
610
+ int num_distances ,
611
+ hwloc_topology_t topology ,
612
+ struct fi_pci_attr pci )
613
+ {
614
+ return true;
615
+ }
616
+ #else
599
617
static bool is_near (pmix_device_distance_t * distances ,
600
618
int num_distances ,
601
619
hwloc_topology_t topology ,
@@ -658,6 +676,7 @@ static bool is_near(pmix_device_distance_t *distances,
658
676
return false;
659
677
}
660
678
#endif
679
+ #endif // OPAL_OFI_PCI_DATA_AVAILABLE
661
680
662
681
/* Count providers returns the number of providers present in an fi_info list
663
682
* @param (IN) provider_list struct fi_info* list of providers available
@@ -772,8 +791,8 @@ struct fi_info *opal_common_ofi_select_provider(struct fi_info *provider_list,
772
791
pmix_value_t * pmix_val ;
773
792
struct fi_pci_attr pci ;
774
793
int num_distances = 0 ;
775
- bool near ;
776
794
#endif
795
+ bool near ;
777
796
int ret ;
778
797
unsigned int num_provider = 0 , provider_limit = 0 ;
779
798
bool provider_found = false;
0 commit comments