diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index e575cec02b2..e4ac687edd5 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -251,6 +251,15 @@ ompi_mtl_ofi_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_ofi.num_ofi_contexts); + ompi_mtl_ofi.disable_hmem = false; + mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, + "disable_hmem", + "Disable HMEM usage", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_mtl_ofi.disable_hmem); + return opal_common_ofi_mca_register(&mca_mtl_ofi_component.super.mtl_version); } @@ -626,8 +635,10 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, /* Request device transfer capabilities */ #if defined(FI_HMEM) - hints->caps |= FI_HMEM; - hints->domain_attr->mr_mode |= FI_MR_HMEM | FI_MR_ALLOCATED; + if (false == ompi_mtl_ofi.disable_hmem) { + hints->caps |= FI_HMEM; + hints->domain_attr->mr_mode |= FI_MR_HMEM | FI_MR_ALLOCATED; + } #endif no_hmem: @@ -791,10 +802,17 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, *accelerator_support = false; #if defined(FI_HMEM) - if (!(prov->caps & FI_HMEM)) { - opal_output_verbose(50, opal_common_ofi.output, - "%s:%d: Libfabric provider does not support device buffers. Continuing with device to host copies.\n", - __FILE__, __LINE__); + if (!(prov->caps & FI_HMEM) || (true == ompi_mtl_ofi.disable_hmem)) { + if (!(prov->caps & FI_HMEM) && (false == ompi_mtl_ofi.disable_hmem)) { + opal_output_verbose(50, opal_common_ofi.output, + "%s:%d: Libfabric provider does not support device buffers. Continuing with device to host copies.\n", + __FILE__, __LINE__); + } + if (true == ompi_mtl_ofi.disable_hmem) { + opal_output_verbose(50, opal_common_ofi.output, + "%s:%d: Support for device buffers disabled by MCA parameter. Continuing with device to host copies.\n", + __FILE__, __LINE__); + } } else { *accelerator_support = true; ompi_mtl_ofi.hmem_needs_reg = true; diff --git a/ompi/mca/mtl/ofi/mtl_ofi_types.h b/ompi/mca/mtl/ofi/mtl_ofi_types.h index 836870f8ca7..a925f0ec28e 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_types.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_types.h @@ -2,8 +2,8 @@ * Copyright (c) 2013-2018 Intel, Inc. All rights reserved * * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2022 Triad National Security, LLC. All rights - * reserved. + * Copyright (c) 2022-2023 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -59,6 +59,7 @@ typedef struct mca_mtl_ofi_module_t { int enable_sep; /* MCA to enable/disable SEP feature */ int thread_grouping; /* MCA for thread grouping feature */ int num_ofi_contexts; /* MCA for number of contexts to use */ + bool disable_hmem; /* MCA to enable/disable request for FI_HMEM support from provider */ /** Endpoint name length */ size_t epnamelen; diff --git a/opal/mca/btl/ofi/btl_ofi.h b/opal/mca/btl/ofi/btl_ofi.h index 590e9b34c2b..0019065ecfe 100644 --- a/opal/mca/btl/ofi/btl_ofi.h +++ b/opal/mca/btl/ofi/btl_ofi.h @@ -169,6 +169,8 @@ struct mca_btl_ofi_component_t { size_t max_inject_size; bool disable_inject; + bool disable_hmem; + /** All BTL OFI modules (1 per tl) */ mca_btl_ofi_module_t *modules[MCA_BTL_OFI_MAX_MODULES]; }; diff --git a/opal/mca/btl/ofi/btl_ofi_component.c b/opal/mca/btl/ofi/btl_ofi_component.c index fd52e56d848..a9de2620ac4 100644 --- a/opal/mca/btl/ofi/btl_ofi_component.c +++ b/opal/mca/btl/ofi/btl_ofi_component.c @@ -200,6 +200,16 @@ static int mca_btl_ofi_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_ofi_component.disable_inject); + mca_btl_ofi_component.disable_hmem = false; + mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version, + "disable_hmem", + "Disable HMEM usage", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_btl_ofi_component.disable_hmem); + + /* for now we want this component to lose to the MTL. */ module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 50; @@ -345,8 +355,10 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules, #if defined(FI_HMEM) /* Request device transfer capabilities, separate from required_caps */ - hints.caps |= FI_HMEM; - hints.domain_attr->mr_mode |= FI_MR_HMEM; + if (false == mca_btl_ofi_component.disable_hmem) { + hints.caps |= FI_HMEM; + hints.domain_attr->mr_mode |= FI_MR_HMEM; + } no_hmem: #endif