Skip to content

Commit 49128a7

Browse files
rwespetaljsquyres
authored andcommitted
mtl/ofi: Add workaround for EFA local/remote capabilities bug
Some versions of Libfabric contain a bug in EFA where FI_REMOTE_COMM and FI_LOCAL_COMM are not advertised. In order to workaround this, we need to call fi_getinfo() without those capability bits to see if EFA is available first. Also move around some of the provider include/exclude list logic so we can skip this workaround if applicable. Signed-off-by: Robert Wespetal <[email protected]>
1 parent 21bc904 commit 49128a7

File tree

1 file changed

+87
-19
lines changed

1 file changed

+87
-19
lines changed

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 87 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -341,21 +341,12 @@ is_in_list(char **list, char *item)
341341
}
342342

343343
static struct fi_info*
344-
select_ofi_provider(struct fi_info *providers)
344+
select_ofi_provider(struct fi_info *providers,
345+
char **include_list, char **exclude_list)
345346
{
346-
char **include_list = NULL;
347-
char **exclude_list = NULL;
348347
struct fi_info *prov = providers;
349348

350-
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
351-
"%s:%d: mtl:ofi:provider_include = \"%s\"\n",
352-
__FILE__, __LINE__, prov_include);
353-
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
354-
"%s:%d: mtl:ofi:provider_exclude = \"%s\"\n",
355-
__FILE__, __LINE__, prov_exclude);
356-
357-
if (NULL != prov_include) {
358-
include_list = opal_argv_split(prov_include, ',');
349+
if (NULL != include_list) {
359350
while ((NULL != prov) &&
360351
(!is_in_list(include_list, prov->fabric_attr->prov_name))) {
361352
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
@@ -364,8 +355,7 @@ select_ofi_provider(struct fi_info *providers)
364355
prov->fabric_attr->prov_name);
365356
prov = prov->next;
366357
}
367-
} else if (NULL != prov_exclude) {
368-
exclude_list = opal_argv_split(prov_exclude, ',');
358+
} else if (NULL != exclude_list) {
369359
while ((NULL != prov) &&
370360
(is_in_list(exclude_list, prov->fabric_attr->prov_name))) {
371361
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
@@ -376,9 +366,6 @@ select_ofi_provider(struct fi_info *providers)
376366
}
377367
}
378368

379-
opal_argv_free(include_list);
380-
opal_argv_free(exclude_list);
381-
382369
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
383370
"%s:%d: mtl:ofi:prov: %s\n",
384371
__FILE__, __LINE__,
@@ -621,7 +608,9 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
621608
int ret, fi_version;
622609
int num_local_ranks, sep_support_in_provider, max_ofi_ctxts;
623610
int ofi_tag_leading_zeros, ofi_tag_bits_for_cid;
624-
struct fi_info *hints;
611+
char **include_list = NULL;
612+
char **exclude_list = NULL;
613+
struct fi_info *hints, *hints_dup = NULL;
625614
struct fi_info *providers = NULL;
626615
struct fi_info *prov = NULL;
627616
struct fi_info *prov_cq_data = NULL;
@@ -630,6 +619,19 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
630619
int universe_size;
631620
char *univ_size_str;
632621

622+
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
623+
"%s:%d: mtl:ofi:provider_include = \"%s\"\n",
624+
__FILE__, __LINE__, prov_include);
625+
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
626+
"%s:%d: mtl:ofi:provider_exclude = \"%s\"\n",
627+
__FILE__, __LINE__, prov_exclude);
628+
629+
if (NULL != prov_include) {
630+
include_list = opal_argv_split(prov_include, ',');
631+
} else if (NULL != prov_exclude) {
632+
exclude_list = opal_argv_split(prov_exclude, ',');
633+
}
634+
633635
/**
634636
* Hints to filter providers
635637
* See man fi_getinfo for a list of all filters
@@ -707,6 +709,52 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
707709
*/
708710
fi_version = FI_VERSION(1, 5);
709711

712+
/**
713+
* The EFA provider in Libfabric versions prior to 1.10 contains a bug
714+
* where the FI_LOCAL_COMM and FI_REMOTE_COMM capabilities are not
715+
* advertised. However, we know that this provider supports both local and
716+
* remote communication. We must exclude these capability bits in order to
717+
* select EFA when we are using a version of Libfabric with this bug.
718+
*
719+
* Call fi_getinfo() without those capabilities and specifically ask for
720+
* the EFA provider. This is safe to do as EFA is only supported on Amazon
721+
* EC2 and EC2 only supports EFA and TCP-based networks. We'll also skip
722+
* this logic if the user specifies an include list without EFA or adds EFA
723+
* to the exclude list.
724+
*/
725+
if ((include_list && is_in_list(include_list, "efa")) ||
726+
(exclude_list && !is_in_list(exclude_list, "efa"))) {
727+
hints_dup = fi_dupinfo(hints);
728+
hints_dup->caps &= ~(FI_LOCAL_COMM | FI_REMOTE_COMM);
729+
hints_dup->fabric_attr->prov_name = strdup("efa");
730+
731+
ret = fi_getinfo(fi_version, NULL, NULL, 0ULL, hints_dup, &providers);
732+
733+
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
734+
"%s:%d: EFA specific fi_getinfo(): %s\n",
735+
__FILE__, __LINE__, fi_strerror(-ret));
736+
737+
if (FI_ENODATA == -ret) {
738+
/**
739+
* EFA is not available so fall through to call fi_getinfo() again
740+
* with the local/remote capabilities set.
741+
*/
742+
fi_freeinfo(hints_dup);
743+
hints_dup = NULL;
744+
} else if (0 != ret) {
745+
opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
746+
"fi_getinfo",
747+
ompi_process_info.nodename, __FILE__, __LINE__,
748+
fi_strerror(-ret), -ret);
749+
goto error;
750+
} else {
751+
fi_freeinfo(hints);
752+
hints = hints_dup;
753+
hints_dup = NULL;
754+
goto select_prov;
755+
}
756+
}
757+
710758
/**
711759
* fi_getinfo: returns information about fabric services for reaching a
712760
* remote node or service. this does not necessarily allocate resources.
@@ -718,6 +766,11 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
718766
0ULL, /* Optional flag */
719767
hints, /* In: Hints to filter providers */
720768
&providers); /* Out: List of matching providers */
769+
770+
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
771+
"%s:%d: fi_getinfo(): %s\n",
772+
__FILE__, __LINE__, fi_strerror(-ret));
773+
721774
if (FI_ENODATA == -ret) {
722775
// It is not an error if no information is returned.
723776
goto error;
@@ -729,17 +782,23 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
729782
goto error;
730783
}
731784

785+
select_prov:
732786
/**
733787
* Select a provider from the list returned by fi_getinfo().
734788
*/
735-
prov = select_ofi_provider(providers);
789+
prov = select_ofi_provider(providers, include_list, exclude_list);
736790
if (!prov) {
737791
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
738792
"%s:%d: select_ofi_provider: no provider found\n",
739793
__FILE__, __LINE__);
740794
goto error;
741795
}
742796

797+
opal_argv_free(include_list);
798+
include_list = NULL;
799+
opal_argv_free(exclude_list);
800+
exclude_list = NULL;
801+
743802
/**
744803
* Select the format of the OFI tag
745804
*/
@@ -1013,6 +1072,12 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
10131072
return &ompi_mtl_ofi.base;
10141073

10151074
error:
1075+
if (include_list) {
1076+
opal_argv_free(include_list);
1077+
}
1078+
if (exclude_list) {
1079+
opal_argv_free(exclude_list);
1080+
}
10161081
if (providers) {
10171082
(void) fi_freeinfo(providers);
10181083
}
@@ -1022,6 +1087,9 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
10221087
if (hints) {
10231088
(void) fi_freeinfo(hints);
10241089
}
1090+
if (hints_dup) {
1091+
(void) fi_freeinfo(hints_dup);
1092+
}
10251093
if (ompi_mtl_ofi.sep) {
10261094
(void) fi_close((fid_t)ompi_mtl_ofi.sep);
10271095
}

0 commit comments

Comments
 (0)