@@ -341,21 +341,12 @@ is_in_list(char **list, char *item)
341
341
}
342
342
343
343
static struct fi_info *
344
- select_ofi_provider (struct fi_info * providers )
344
+ select_ofi_provider (struct fi_info * providers ,
345
+ char * * include_list , char * * exclude_list )
345
346
{
346
- char * * include_list = NULL ;
347
- char * * exclude_list = NULL ;
348
347
struct fi_info * prov = providers ;
349
348
350
- opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
351
- "%s:%d: mtl:ofi:provider_include = \"%s\"\n" ,
352
- __FILE__ , __LINE__ , prov_include );
353
- opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
354
- "%s:%d: mtl:ofi:provider_exclude = \"%s\"\n" ,
355
- __FILE__ , __LINE__ , prov_exclude );
356
-
357
- if (NULL != prov_include ) {
358
- include_list = opal_argv_split (prov_include , ',' );
349
+ if (NULL != include_list ) {
359
350
while ((NULL != prov ) &&
360
351
(!is_in_list (include_list , prov -> fabric_attr -> prov_name ))) {
361
352
opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
@@ -364,8 +355,7 @@ select_ofi_provider(struct fi_info *providers)
364
355
prov -> fabric_attr -> prov_name );
365
356
prov = prov -> next ;
366
357
}
367
- } else if (NULL != prov_exclude ) {
368
- exclude_list = opal_argv_split (prov_exclude , ',' );
358
+ } else if (NULL != exclude_list ) {
369
359
while ((NULL != prov ) &&
370
360
(is_in_list (exclude_list , prov -> fabric_attr -> prov_name ))) {
371
361
opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
@@ -376,9 +366,6 @@ select_ofi_provider(struct fi_info *providers)
376
366
}
377
367
}
378
368
379
- opal_argv_free (include_list );
380
- opal_argv_free (exclude_list );
381
-
382
369
opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
383
370
"%s:%d: mtl:ofi:prov: %s\n" ,
384
371
__FILE__ , __LINE__ ,
@@ -621,7 +608,9 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
621
608
int ret , fi_version ;
622
609
int num_local_ranks , sep_support_in_provider , max_ofi_ctxts ;
623
610
int ofi_tag_leading_zeros , ofi_tag_bits_for_cid ;
624
- struct fi_info * hints ;
611
+ char * * include_list = NULL ;
612
+ char * * exclude_list = NULL ;
613
+ struct fi_info * hints , * hints_dup = NULL ;
625
614
struct fi_info * providers = NULL ;
626
615
struct fi_info * prov = NULL ;
627
616
struct fi_info * prov_cq_data = NULL ;
@@ -630,6 +619,19 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
630
619
int universe_size ;
631
620
char * univ_size_str ;
632
621
622
+ opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
623
+ "%s:%d: mtl:ofi:provider_include = \"%s\"\n" ,
624
+ __FILE__ , __LINE__ , prov_include );
625
+ opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
626
+ "%s:%d: mtl:ofi:provider_exclude = \"%s\"\n" ,
627
+ __FILE__ , __LINE__ , prov_exclude );
628
+
629
+ if (NULL != prov_include ) {
630
+ include_list = opal_argv_split (prov_include , ',' );
631
+ } else if (NULL != prov_exclude ) {
632
+ exclude_list = opal_argv_split (prov_exclude , ',' );
633
+ }
634
+
633
635
/**
634
636
* Hints to filter providers
635
637
* See man fi_getinfo for a list of all filters
@@ -647,9 +649,11 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
647
649
__FILE__ , __LINE__ );
648
650
goto error ;
649
651
}
652
+ /* Make sure to get a RDM provider that can do the tagged matching
653
+ interface and local communication and remote communication. */
650
654
hints -> mode = FI_CONTEXT ;
651
- hints -> ep_attr -> type = FI_EP_RDM ; /* Reliable datagram */
652
- hints -> caps = FI_TAGGED ; /* Tag matching interface */
655
+ hints -> ep_attr -> type = FI_EP_RDM ;
656
+ hints -> caps = FI_TAGGED | FI_LOCAL_COMM | FI_REMOTE_COMM ;
653
657
hints -> tx_attr -> msg_order = FI_ORDER_SAS ;
654
658
hints -> rx_attr -> msg_order = FI_ORDER_SAS ;
655
659
hints -> rx_attr -> op_flags = FI_COMPLETION ;
@@ -697,8 +701,59 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
697
701
* FI_VERSION provides binary backward and forward compatibility support
698
702
* Specify the version of OFI is coded to, the provider will select struct
699
703
* layouts that are compatible with this version.
704
+ *
705
+ * Note: API version 1.5 is the first version that supports
706
+ * FI_LOCAL_COMM / FI_REMOTE_COMM checking (and we definitely need
707
+ * that checking -- e.g., some providers are suitable for RXD or
708
+ * RXM, but can't provide local communication).
700
709
*/
701
- fi_version = FI_VERSION (1 , 0 );
710
+ fi_version = FI_VERSION (1 , 5 );
711
+
712
+ /**
713
+ * The EFA provider in Libfabric versions prior to 1.10 contains a bug
714
+ * where the FI_LOCAL_COMM and FI_REMOTE_COMM capabilities are not
715
+ * advertised. However, we know that this provider supports both local and
716
+ * remote communication. We must exclude these capability bits in order to
717
+ * select EFA when we are using a version of Libfabric with this bug.
718
+ *
719
+ * Call fi_getinfo() without those capabilities and specifically ask for
720
+ * the EFA provider. This is safe to do as EFA is only supported on Amazon
721
+ * EC2 and EC2 only supports EFA and TCP-based networks. We'll also skip
722
+ * this logic if the user specifies an include list without EFA or adds EFA
723
+ * to the exclude list.
724
+ */
725
+ if ((include_list && is_in_list (include_list , "efa" )) ||
726
+ (exclude_list && !is_in_list (exclude_list , "efa" ))) {
727
+ hints_dup = fi_dupinfo (hints );
728
+ hints_dup -> caps &= ~(FI_LOCAL_COMM | FI_REMOTE_COMM );
729
+ hints_dup -> fabric_attr -> prov_name = strdup ("efa" );
730
+
731
+ ret = fi_getinfo (fi_version , NULL , NULL , 0ULL , hints_dup , & providers );
732
+
733
+ opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
734
+ "%s:%d: EFA specific fi_getinfo(): %s\n" ,
735
+ __FILE__ , __LINE__ , fi_strerror (- ret ));
736
+
737
+ if (FI_ENODATA == - ret ) {
738
+ /**
739
+ * EFA is not available so fall through to call fi_getinfo() again
740
+ * with the local/remote capabilities set.
741
+ */
742
+ fi_freeinfo (hints_dup );
743
+ hints_dup = NULL ;
744
+ } else if (0 != ret ) {
745
+ opal_show_help ("help-mtl-ofi.txt" , "OFI call fail" , true,
746
+ "fi_getinfo" ,
747
+ ompi_process_info .nodename , __FILE__ , __LINE__ ,
748
+ fi_strerror (- ret ), - ret );
749
+ goto error ;
750
+ } else {
751
+ fi_freeinfo (hints );
752
+ hints = hints_dup ;
753
+ hints_dup = NULL ;
754
+ goto select_prov ;
755
+ }
756
+ }
702
757
703
758
/**
704
759
* fi_getinfo: returns information about fabric services for reaching a
@@ -711,6 +766,11 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
711
766
0ULL , /* Optional flag */
712
767
hints , /* In: Hints to filter providers */
713
768
& providers ); /* Out: List of matching providers */
769
+
770
+ opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
771
+ "%s:%d: fi_getinfo(): %s\n" ,
772
+ __FILE__ , __LINE__ , fi_strerror (- ret ));
773
+
714
774
if (FI_ENODATA == - ret ) {
715
775
// It is not an error if no information is returned.
716
776
goto error ;
@@ -722,17 +782,23 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
722
782
goto error ;
723
783
}
724
784
785
+ select_prov :
725
786
/**
726
787
* Select a provider from the list returned by fi_getinfo().
727
788
*/
728
- prov = select_ofi_provider (providers );
789
+ prov = select_ofi_provider (providers , include_list , exclude_list );
729
790
if (!prov ) {
730
791
opal_output_verbose (1 , ompi_mtl_base_framework .framework_output ,
731
792
"%s:%d: select_ofi_provider: no provider found\n" ,
732
793
__FILE__ , __LINE__ );
733
794
goto error ;
734
795
}
735
796
797
+ opal_argv_free (include_list );
798
+ include_list = NULL ;
799
+ opal_argv_free (exclude_list );
800
+ exclude_list = NULL ;
801
+
736
802
/**
737
803
* Select the format of the OFI tag
738
804
*/
@@ -1006,6 +1072,12 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
1006
1072
return & ompi_mtl_ofi .base ;
1007
1073
1008
1074
error :
1075
+ if (include_list ) {
1076
+ opal_argv_free (include_list );
1077
+ }
1078
+ if (exclude_list ) {
1079
+ opal_argv_free (exclude_list );
1080
+ }
1009
1081
if (providers ) {
1010
1082
(void ) fi_freeinfo (providers );
1011
1083
}
@@ -1015,6 +1087,9 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
1015
1087
if (hints ) {
1016
1088
(void ) fi_freeinfo (hints );
1017
1089
}
1090
+ if (hints_dup ) {
1091
+ (void ) fi_freeinfo (hints_dup );
1092
+ }
1018
1093
if (ompi_mtl_ofi .sep ) {
1019
1094
(void ) fi_close ((fid_t )ompi_mtl_ofi .sep );
1020
1095
}
0 commit comments