Skip to content

Commit 4f404a8

Browse files
committed
ofi: support RHEL7 and libfabric pre 1.9
Fix to allow Open MPI to build on RHEL7 systems with default distro libfabric. Related to #10954 Signed-off-by: Howard Pritchard <[email protected]>
1 parent d61d384 commit 4f404a8

File tree

5 files changed

+42
-5
lines changed

5 files changed

+42
-5
lines changed

config/opal_check_ofi.m4

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,16 @@ AC_DEFUN([OPAL_CHECK_OFI],[
137137
AC_CHECK_DECLS([PMIX_PACKAGE_RANK],
138138
[],
139139
[],
140-
[#include <pmix.h>])])
140+
[#include <pmix.h>])
141+
142+
AC_CHECK_MEMBER([struct fi_mr_attr.iface],
143+
[opal_check_fi_mr_attr_iface=1],
144+
[opal_check_fi_mr_attr_iface=0],
145+
[[#include <rdma/fi_domain.h>]])
146+
147+
AC_DEFINE_UNQUOTED([OPAL_OFI_HAVE_FI_MR_IFACE],
148+
[${opal_check_fi_mr_attr_iface}],
149+
[check if iface avaiable in fi_mr_attr])])
141150
142151
CPPFLAGS=${opal_check_ofi_save_CPPFLAGS}
143152
LDFLAGS=${opal_check_ofi_save_LDFLAGS}

ompi/mca/mtl/ofi/mtl_ofi.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,8 @@ int ompi_mtl_ofi_register_buffer(struct opal_convertor_t *convertor,
306306
return OMPI_SUCCESS;
307307
}
308308

309+
#if OPAL_OFI_HAVE_FI_MR_IFACE
310+
309311
if ((convertor->flags & CONVERTOR_ACCELERATOR) && ompi_mtl_ofi.hmem_needs_reg) {
310312
/* Register buffer */
311313
int ret;
@@ -343,6 +345,8 @@ int ompi_mtl_ofi_register_buffer(struct opal_convertor_t *convertor,
343345
}
344346
}
345347

348+
#endif
349+
346350
return OMPI_SUCCESS;
347351
}
348352

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
77
* reserved.
88
* Copyright (c) 2018-2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
9-
* Copyright (c) 2020-2021 Triad National Security, LLC. All rights
9+
* Copyright (c) 2020-2023 Triad National Security, LLC. All rights
1010
* reserved.
1111
* $COPYRIGHT$
1212
*
@@ -617,8 +617,10 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
617617
}
618618

619619
/* Request device transfer capabilities */
620+
#if defined(FI_HMEM)
620621
hints->caps |= FI_HMEM;
621622
hints->domain_attr->mr_mode |= FI_MR_HMEM | FI_MR_ALLOCATED;
623+
#endif
622624

623625
no_hmem:
624626

@@ -737,12 +739,14 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
737739
__FILE__, __LINE__, fi_strerror(-ret));
738740

739741
if (FI_ENODATA == -ret) {
742+
#if defined(FI_HMEM)
740743
/* Attempt selecting a provider without FI_HMEM hints */
741744
if (hints->caps & FI_HMEM) {
742745
hints->caps &= ~FI_HMEM;
743746
hints->domain_attr->mr_mode &= ~FI_MR_HMEM;
744747
goto no_hmem;
745748
}
749+
#endif
746750
/* It is not an error if no information is returned. */
747751
goto error;
748752
} else if (0 != ret) {
@@ -770,11 +774,12 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
770774
opal_argv_free(exclude_list);
771775
exclude_list = NULL;
772776

777+
*accelerator_support = false;
778+
#if defined(FI_HMEM)
773779
if (!(prov->caps & FI_HMEM)) {
774780
opal_output_verbose(50, opal_common_ofi.output,
775781
"%s:%d: Libfabric provider does not support device buffers. Continuing with device to host copies.\n",
776782
__FILE__, __LINE__);
777-
*accelerator_support = false;
778783
} else {
779784
*accelerator_support = true;
780785
ompi_mtl_ofi.hmem_needs_reg = true;
@@ -791,6 +796,11 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
791796
}
792797

793798
}
799+
#else
800+
opal_output_verbose(50, opal_common_ofi.output,
801+
"%s:%d: Libfabric provider does not support device buffers. Continuing with device to host copies.\n",
802+
__FILE__, __LINE__);
803+
#endif
794804

795805
/**
796806
* Select the format of the OFI tag

opal/mca/btl/ofi/btl_ofi_component.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Copyright (c) 2018-2019 Intel, Inc. All rights reserved.
1616
*
1717
* Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. All Rights reserved.
18-
* Copyright (c) 2020-2022 Triad National Security, LLC. All rights
18+
* Copyright (c) 2020-2023 Triad National Security, LLC. All rights
1919
* reserved.
2020
* $COPYRIGHT$
2121
*
@@ -106,7 +106,11 @@ static int validate_info(struct fi_info *info, uint64_t required_caps, char **in
106106
mr_mode = info->domain_attr->mr_mode;
107107

108108
if (!(mr_mode == FI_MR_BASIC || mr_mode == FI_MR_SCALABLE
109+
#if defined(FI_MR_HMEM)
109110
|| (mr_mode & ~(FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT | FI_MR_HMEM)) == 0)) {
111+
#else
112+
|| (mr_mode & ~(FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT)) == 0)) {
113+
#endif
110114
BTL_VERBOSE(("unsupported MR mode"));
111115
return OPAL_ERROR;
112116
}
@@ -339,27 +343,32 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
339343

340344
mca_btl_ofi_component.module_count = 0;
341345

346+
#if defined(FI_HMEM)
342347
/* Request device transfer capabilities, separate from required_caps */
343348
hints.caps |= FI_HMEM;
344349
hints.domain_attr->mr_mode |= FI_MR_HMEM;
345350
no_hmem:
351+
#endif
346352

347353
/* Do the query. The earliest version that supports FI_HMEM hints is 1.9 */
348354
rc = fi_getinfo(FI_VERSION(1, 9), NULL, NULL, 0, &hints, &info_list);
349355
if (0 != rc) {
356+
#if defined(FI_HMEM)
350357
if (hints.caps & FI_HMEM) {
351358
/* Try again without FI_HMEM hints */
352359
hints.caps &= ~FI_HMEM;
353360
hints.domain_attr->mr_mode &= ~FI_MR_HMEM;
354361
goto no_hmem;
355362
}
363+
#endif
356364
BTL_VERBOSE(("fi_getinfo failed with code %d: %s", rc, fi_strerror(-rc)));
357365
if (NULL != include_list) {
358366
opal_argv_free(include_list);
359367
}
360368
return NULL;
361369
}
362370

371+
#if defined(FI_HMEM)
363372
/* If we get to this point with FI_HMEM hint set, we want it to be a
364373
* required capability
365374
*/
@@ -375,6 +384,7 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
375384
}
376385
required_caps |= FI_HMEM;
377386
}
387+
#endif
378388

379389
/* count the number of resources/ */
380390
info = info_list;
@@ -604,9 +614,11 @@ static int mca_btl_ofi_init_device(struct fi_info *info)
604614
module->use_fi_mr_bind = false;
605615
module->bypass_cache = false;
606616

617+
#if defined(FI_HMEM)
607618
if (ofi_info->caps & FI_HMEM) {
608619
module->super.btl_flags |= MCA_BTL_FLAGS_ACCELERATOR_RDMA;
609620
}
621+
#endif
610622

611623
if (ofi_info->domain_attr->mr_mode == FI_MR_BASIC
612624
|| ofi_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) {

opal/mca/btl/ofi/btl_ofi_module.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
*
1717
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
1818
* Copyright (c) 2020 Google, LLC. All rights reserved.
19-
* Copyright (c) 2022 Triad National Security, LLC. All rights
19+
* Copyright (c) 2022-2023 Triad National Security, LLC. All rights
2020
* reserved.
2121
* $COPYRIGHT$
2222
*
@@ -254,6 +254,7 @@ int mca_btl_ofi_reg_mem(void *reg_data, void *base, size_t size,
254254
attr.context = NULL;
255255
attr.requested_key = (uint64_t) reg;
256256

257+
#if OPAL_OFI_HAVE_FI_MR_IFACE
257258
if (OPAL_LIKELY(NULL != base)) {
258259
rc = opal_accelerator.check_addr(base, &dev_id, &flags);
259260
if (rc < 0) {
@@ -270,6 +271,7 @@ int mca_btl_ofi_reg_mem(void *reg_data, void *base, size_t size,
270271
}
271272
}
272273
}
274+
#endif
273275

274276
rc = fi_mr_regattr(btl->domain, &attr, 0, &ur->ur_mr);
275277
if (0 != rc) {

0 commit comments

Comments
 (0)