@@ -560,7 +560,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
560560 bool enable_mpi_threads ,
561561 bool * accelerator_support )
562562{
563- int ret , fi_version ;
563+ int ret , fi_primary_version , fi_alternate_version ;
564564 int num_local_ranks , sep_support_in_provider , max_ofi_ctxts ;
565565 int ofi_tag_leading_zeros , ofi_tag_bits_for_cid ;
566566 char * * include_list = NULL ;
@@ -595,8 +595,17 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
595595 * (FI_REMOTE_COMM), which is insufficient for MTL selection.
596596 *
597597 * Note: API version 1.9 is the first version that supports FI_HMEM
598+ *
599+ * Note: API version 1.18 is the first version that clearly define
600+ * provider's behavior in making CUDA API calls that all provider
601+ * by default is permitted to make CUDA calls if application uses >= 1.18 API.
602+ *
603+ * If application is using < 1.18 API, some provider will not claim support
604+ * of FI_HMEM (even if they are capable of) because it does not know
605+ * whether application permits it to make CUDA calls.
598606 */
599- fi_version = FI_VERSION (1 , 9 );
607+ fi_primary_version = FI_VERSION (1 , 18 );
608+ fi_alternate_version = FI_VERSION (1 , 9 );
600609
601610 /**
602611 * Hints to filter providers
@@ -695,7 +704,11 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
695704 hints_dup -> caps &= ~(FI_LOCAL_COMM | FI_REMOTE_COMM );
696705 hints_dup -> fabric_attr -> prov_name = strdup ("efa" );
697706
698- ret = fi_getinfo (fi_version , NULL , NULL , 0ULL , hints_dup , & providers );
707+ ret = fi_getinfo (fi_primary_version , NULL , NULL , 0ULL , hints_dup , & providers );
708+ if (FI_ENOSYS == - ret ) {
709+ /* libfabric is not new enough, fallback to use older version of API */
710+ ret = fi_getinfo (fi_alternate_version , NULL , NULL , 0ULL , hints_dup , & providers );
711+ }
699712
700713 opal_output_verbose (1 , opal_common_ofi .output ,
701714 "%s:%d: EFA specific fi_getinfo(): %s\n" ,
@@ -727,12 +740,15 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
727740 * remote node or service. this does not necessarily allocate resources.
728741 * Pass NULL for name/service because we want a list of providers supported.
729742 */
730- ret = fi_getinfo (fi_version , /* OFI version requested */
743+ ret = fi_getinfo (fi_primary_version , /* OFI version requested */
731744 NULL , /* Optional name or fabric to resolve */
732745 NULL , /* Optional service name or port to request */
733746 0ULL , /* Optional flag */
734747 hints , /* In: Hints to filter providers */
735748 & providers ); /* Out: List of matching providers */
749+ if (FI_ENOSYS == - ret ) {
750+ ret = fi_getinfo (fi_alternate_version , NULL , NULL , 0ULL , hints , & providers );
751+ }
736752
737753 opal_output_verbose (1 , opal_common_ofi .output ,
738754 "%s:%d: fi_getinfo(): %s\n" ,
0 commit comments