From c3e22c6903e62152e126a43b66e9a899ef2c2997 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Thu, 3 Jul 2025 22:26:54 +0000 Subject: [PATCH 1/4] btl/ofi: Remove cache hack for EFA This was an optimization around a bug in the EFA provider. The EFA provider shouldn't be caching explicit registrations anyway, so avoiding the double cache is silly (and breaks when EFA fixes the explicit registration cache bug). Signed-off-by: Brian Barrett --- opal/mca/btl/ofi/btl_ofi.h | 6 +----- opal/mca/btl/ofi/btl_ofi_component.c | 10 +--------- opal/mca/btl/ofi/btl_ofi_module.c | 5 +---- 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/opal/mca/btl/ofi/btl_ofi.h b/opal/mca/btl/ofi/btl_ofi.h index 0019065ecfe..e12d490b390 100644 --- a/opal/mca/btl/ofi/btl_ofi.h +++ b/opal/mca/btl/ofi/btl_ofi.h @@ -13,8 +13,7 @@ * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. - * All Rights reserved. + * Copyright (c) 2020-2025 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2022 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -140,9 +139,6 @@ struct mca_btl_ofi_module_t { /** registration cache */ mca_rcache_base_module_t *rcache; - /* If the underlying OFI provider has its own cache, we want to bypass - * rcache registration */ - bool bypass_cache; }; typedef struct mca_btl_ofi_module_t mca_btl_ofi_module_t; diff --git a/opal/mca/btl/ofi/btl_ofi_component.c b/opal/mca/btl/ofi/btl_ofi_component.c index 6785dcc74a0..bafa29f6c9d 100644 --- a/opal/mca/btl/ofi/btl_ofi_component.c +++ b/opal/mca/btl/ofi/btl_ofi_component.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * - * Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2025 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2020-2023 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -657,7 +657,6 @@ static int mca_btl_ofi_init_device(struct fi_info *info) module->outstanding_rdma = 0; module->use_virt_addr = false; module->use_fi_mr_bind = false; - module->bypass_cache = false; #if defined(FI_HMEM) if (ofi_info->caps & FI_HMEM) { @@ -674,13 +673,6 @@ static int mca_btl_ofi_init_device(struct fi_info *info) module->use_fi_mr_bind = true; } - /* Currently there is no API to query whether the libfabric provider - * uses an underlying registration cache. For now, just check for known - * providers that use registration caching. */ - if (!strncasecmp(info->fabric_attr->prov_name, "efa", 3)) { - module->bypass_cache = true; - } - /* create endpoint list */ OBJ_CONSTRUCT(&module->endpoints, opal_list_t); OBJ_CONSTRUCT(&module->module_lock, opal_mutex_t); diff --git a/opal/mca/btl/ofi/btl_ofi_module.c b/opal/mca/btl/ofi/btl_ofi_module.c index e213d5b1865..96bd0cbdce0 100644 --- a/opal/mca/btl/ofi/btl_ofi_module.c +++ b/opal/mca/btl/ofi/btl_ofi_module.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2018 Intel, Inc, All rights reserved * - * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2025 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2020 Google, LLC. All rights reserved. * Copyright (c) 2022-2024 Triad National Security, LLC. All rights * reserved. @@ -198,9 +198,6 @@ mca_btl_ofi_register_mem(struct mca_btl_base_module_t *btl, int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; int rc; uint32_t cache_flags = 0; - if (ofi_module->bypass_cache) { - cache_flags |= MCA_RCACHE_FLAGS_CACHE_BYPASS; - } rc = ofi_module->rcache->rcache_register(ofi_module->rcache, base, size, cache_flags, access_flags, (mca_rcache_base_registration_t **) ®); From 937429fd83e3d3977525730bf769a7d1b5786a8b Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Tue, 8 Jul 2025 21:47:30 +0000 Subject: [PATCH 2/4] btl/ofi: Export memory monitor The OFI MTL exports a memory monitor to Libfabric (so that OMPI's patcher wins), but in cases where OB1 is directly selected, that code won't run. So make sure to also configure Libfabric so that it won't try to use a suboptimial memory monitor in the case that only the OFI BTL is used. Signed-off-by: Brian Barrett --- opal/mca/btl/ofi/btl_ofi_module.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/opal/mca/btl/ofi/btl_ofi_module.c b/opal/mca/btl/ofi/btl_ofi_module.c index 96bd0cbdce0..58844bee018 100644 --- a/opal/mca/btl/ofi/btl_ofi_module.c +++ b/opal/mca/btl/ofi/btl_ofi_module.c @@ -148,6 +148,20 @@ void mca_btl_ofi_rcache_init(mca_btl_ofi_module_t *module) if (!module->initialized) { mca_rcache_base_resources_t rcache_resources; char *tmp; + int ret; + + /* this must be called during single threaded part of the code and + * before Libfabric configures its memory monitors. Easiest to do + * that before domain open. Silently ignore not-supported errors, + * as they are not critical to program correctness, but only + * indicate that LIbfabric will have to pick a different, possibly + * less optimal, monitor. */ + ret = opal_common_ofi_export_memory_monitor(); + if (0 != ret && -FI_ENOSYS != ret) { + opal_output_verbose(1, opal_common_ofi.output, + "Failed to inject Libfabric memory monitor: %s", + fi_strerror(-ret)); + } (void) opal_asprintf(&tmp, "ofi.%s", module->linux_device_name); From edf3634c3ad89b42e1898305f05f64b9979cbe6f Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Tue, 8 Jul 2025 21:29:19 +0000 Subject: [PATCH 3/4] mtl/ofi: Use FI_MR_HMEM for explicit reg check Rather than use the CXI provider name to disable explicit hmem registration, use the FI_MR_HMEM flag. Signed-off-by: Brian Barrett --- ompi/mca/mtl/ofi/mtl_ofi_component.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index 256dd483fc0..0ac9044576b 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -5,7 +5,7 @@ * Copyright (c) 2014-2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018-2022 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2025 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2020-2023 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -823,20 +823,17 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, } } else { *accelerator_support = true; - ompi_mtl_ofi.hmem_needs_reg = true; - /* - * Workaround for the fact that the CXI provider actually doesn't need for accelerator memory to be registered - * for local buffers, but if one does do so using fi_mr_regattr, one actually needs to manage the - * requested_key field in the fi_mr_attr attr argument, and the OFI MTL doesn't track which requested_keys - * have already been registered. So just set a flag to disable local registration. Note the OFI BTL doesn't - * have a problem here since it uses fi_mr_regattr only within the context of an rcache, and manages the - * requested_key field in this way. - */ - if ((NULL != strstr(prov->fabric_attr->prov_name, "cxi")) || - (NULL != strstr(prov->fabric_attr->prov_name, "CXI")) ) { - ompi_mtl_ofi.hmem_needs_reg = false; - } + /* Only explicitly register domain buffers if the provider requires it. + For example, CXI does not require it but EFA does require it. */ + if ((prov->domain_attr->mr_mode & FI_MR_HMEM) != 0) { + ompi_mtl_ofi.hmem_needs_reg = true; + opal_output_verbose(50, opal_common_ofi.output, + "Support for device buffers enabled with explicit registration"); + } else { + opal_output_verbose(50, opal_common_ofi.output, + "Support for device buffers enabled with implicit registration"); + } } #else opal_output_verbose(50, opal_common_ofi.output, From 72a7e0ec88bd505afe4ed900abc055a94a605258 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Sun, 6 Jul 2025 21:39:49 +0000 Subject: [PATCH 4/4] mtl/ofi: Add rcache for hmem The OFI MTL was creating a registration for every operation that used HMEM when FI_MR_HMEM is required. This is really performance inefficient, since creating registrations is expensive. So stick a rcache in front of the registrations. Signed-off-by: Brian Barrett --- ompi/mca/mtl/ofi/Makefile.am | 3 +- ompi/mca/mtl/ofi/mtl_ofi.h | 80 +++++------------ ompi/mca/mtl/ofi/mtl_ofi_component.c | 9 ++ ompi/mca/mtl/ofi/mtl_ofi_mr.c | 124 +++++++++++++++++++++++++++ ompi/mca/mtl/ofi/mtl_ofi_request.h | 7 +- ompi/mca/mtl/ofi/mtl_ofi_types.h | 14 +++ 6 files changed, 174 insertions(+), 63 deletions(-) create mode 100644 ompi/mca/mtl/ofi/mtl_ofi_mr.c diff --git a/ompi/mca/mtl/ofi/Makefile.am b/ompi/mca/mtl/ofi/Makefile.am index 5842abd3018..7baad1b211f 100644 --- a/ompi/mca/mtl/ofi/Makefile.am +++ b/ompi/mca/mtl/ofi/Makefile.am @@ -9,7 +9,7 @@ # and Technology (RIST). All rights reserved. # Copyright (c) 2020 Triad National Security, LLC. All rights # reserved. -# Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved. +# Copyright (c) 2022-2025 Amazon.com, Inc. or its affiliates. All Rights reserved. # Copyright (c) 2025 Jeffrey M. Squyres. All rights reserved. # $COPYRIGHT$ # @@ -48,6 +48,7 @@ mtl_ofi_sources = \ mtl_ofi_component.c \ mtl_ofi_endpoint.h \ mtl_ofi_endpoint.c \ + mtl_ofi_mr.c \ mtl_ofi_request.h \ mtl_ofi_types.h \ mtl_ofi_opt.h \ diff --git a/ompi/mca/mtl/ofi/mtl_ofi.h b/ompi/mca/mtl/ofi/mtl_ofi.h index aae756b0518..323477d74f4 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.h +++ b/ompi/mca/mtl/ofi/mtl_ofi.h @@ -4,8 +4,7 @@ * reserved. * Copyright (c) 2019-2024 Triad National Security, LLC. All rights * reserved. - * Copyright (c) 2018-2023 Amazon.com, Inc. or its affiliates. All Rights reserved. - * reserved. + * Copyright (c) 2018-2025 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -73,6 +72,8 @@ extern int ompi_mtl_ofi_del_comm(struct mca_mtl_base_module_t *mtl, int ompi_mtl_ofi_progress_no_inline(void); +int ompi_mtl_ofi_rcache_init(void); + #if OPAL_HAVE_THREAD_LOCAL extern opal_thread_local int ompi_mtl_ofi_per_thread_ctx; #endif @@ -291,78 +292,37 @@ ompi_mtl_ofi_set_mr_null(ompi_mtl_ofi_request_t *ofi_req) { static int ompi_mtl_ofi_register_buffer(struct opal_convertor_t *convertor, ompi_mtl_ofi_request_t *ofi_req, - void* buffer) { + void* buffer) +{ + int ret; + uint32_t cache_flags = 0; + ofi_req->mr = NULL; if (ofi_req->length <= 0 || NULL == buffer) { return OMPI_SUCCESS; } -#if OPAL_OFI_HAVE_FI_MR_IFACE - - if ((convertor->flags & CONVERTOR_ACCELERATOR) && ompi_mtl_ofi.hmem_needs_reg) { - /* Register buffer */ - int ret; - struct fi_mr_attr attr = {0}; - struct iovec iov = {0}; - - iov.iov_base = buffer; - iov.iov_len = ofi_req->length; - attr.mr_iov = &iov; - attr.iov_count = 1; - attr.access = FI_SEND | FI_RECV; - attr.offset = 0; - attr.context = NULL; - if (false == ompi_mtl_base_selected_component->accelerator_support) { - goto reg; - } else if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "cuda")) { - attr.iface = FI_HMEM_CUDA; - opal_accelerator.get_device(&attr.device.cuda); -#if OPAL_OFI_HAVE_FI_HMEM_ROCR - } else if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "rocm")) { - attr.iface = FI_HMEM_ROCR; - opal_accelerator.get_device(&attr.device.cuda); -#endif -#if OPAL_OFI_HAVE_FI_HMEM_ZE - } else if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "ze")) { - attr.iface = FI_HMEM_ZE; - opal_accelerator.get_device(&attr.device.ze); -#endif - } else { - return OPAL_ERROR; - } -reg: - ret = fi_mr_regattr(ompi_mtl_ofi.domain, &attr, 0, &ofi_req->mr); - - if (ret) { - opal_show_help("help-mtl-ofi.txt", "Buffer Memory Registration Failed", true, - opal_accelerator_base_selected_component.base_version.mca_component_name, - buffer, ofi_req->length, - fi_strerror(-ret), ret); - ofi_req->mr = NULL; - return OMPI_ERROR; - } + if (! ((convertor->flags & CONVERTOR_ACCELERATOR) && ompi_mtl_ofi.hmem_needs_reg)) { + return OMPI_SUCCESS; } -#endif - - return OMPI_SUCCESS; + /* note - the cache access flags are a little broken, because rcache doesn't + * understand send/recv requirements. Since this rcache is only used in the + * MTL, that isn't a problem and we fix it in the underlying register call. + */ + ret = ompi_mtl_ofi.rcache->rcache_register(ompi_mtl_ofi.rcache, buffer, ofi_req->length, + cache_flags, MCA_RCACHE_ACCESS_ANY, + (mca_rcache_base_registration_t **) &ofi_req->mr); + return ret; } /** Deregister buffer */ __opal_attribute_always_inline__ static inline int ompi_mtl_ofi_deregister_buffer(ompi_mtl_ofi_request_t *ofi_req) { if (ofi_req->mr) { - int ret; - ret = fi_close(&ofi_req->mr->fid); - if (ret) { - opal_show_help("help-mtl-ofi.txt", "OFI call fail", true, - "fi_close", - ompi_process_info.nodename, __FILE__, __LINE__, - fi_strerror(-ret), ofi_req->mr->fid); - return OMPI_ERROR; - } - ofi_req->mr = NULL; + (void)ompi_mtl_ofi.rcache->rcache_deregister(ompi_mtl_ofi.rcache, &ofi_req->mr->base); } + return OMPI_SUCCESS; } diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index 0ac9044576b..049ff4cf8c8 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -841,6 +841,10 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, __FILE__, __LINE__); #endif + if (ompi_mtl_ofi.hmem_needs_reg) { + ompi_mtl_ofi_rcache_init(); + } + /** * Select the format of the OFI tag */ @@ -1174,6 +1178,11 @@ ompi_mtl_ofi_finalize(struct mca_mtl_base_module_t *mtl) { ssize_t ret; + if (NULL != ompi_mtl_ofi.rcache) { + mca_rcache_base_module_destroy(ompi_mtl_ofi.rcache); + ompi_mtl_ofi.rcache = NULL; + } + opal_progress_unregister(ompi_mtl_ofi_progress_no_inline); /* Close all the OFI objects */ diff --git a/ompi/mca/mtl/ofi/mtl_ofi_mr.c b/ompi/mca/mtl/ofi/mtl_ofi_mr.c new file mode 100644 index 00000000000..2f39a98ba23 --- /dev/null +++ b/ompi/mca/mtl/ofi/mtl_ofi_mr.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2025 Amazon.com, Inc. or its affiliates. All Rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "mtl_ofi.h" + +static int +ompi_mtl_ofi_reg_mem(void *reg_data, void *base, size_t size, + mca_rcache_base_registration_t *reg) +{ + int ret; + struct fi_mr_attr attr = {0}; + struct iovec iov = {0}; + ompi_mtl_ofi_reg_t *mtl_reg = (ompi_mtl_ofi_reg_t *)reg; + int dev_id; + uint64_t flags; + + iov.iov_base = base; + iov.iov_len = size; + attr.mr_iov = &iov; + attr.iov_count = 1; + attr.access = FI_SEND | FI_RECV; + attr.offset = 0; + attr.context = NULL; + +#if OPAL_OFI_HAVE_FI_MR_IFACE + if (OPAL_LIKELY(NULL != base)) { + ret = opal_accelerator.check_addr(base, &dev_id, &flags); + if (ret < 0) { + return ret; + } else if (ret > 0 ) { + if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "cuda")) { + attr.iface = FI_HMEM_CUDA; + opal_accelerator.get_device(&attr.device.cuda); +#if OPAL_OFI_HAVE_FI_HMEM_ROCR + } else if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "rocm")) { + attr.iface = FI_HMEM_ROCR; + opal_accelerator.get_device(&attr.device.cuda); +#endif +#if OPAL_OFI_HAVE_FI_HMEM_ZE + } else if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "ze")) { + attr.iface = FI_HMEM_ZE; + opal_accelerator.get_device(&attr.device.ze); +#endif + } else { + return OPAL_ERROR; + } + } + } +#endif + + ret = fi_mr_regattr(ompi_mtl_ofi.domain, &attr, 0, &mtl_reg->ofi_mr); + if (0 != ret) { + opal_show_help("help-mtl-ofi.txt", "Buffer Memory Registration Failed", true, + opal_accelerator_base_selected_component.base_version.mca_component_name, + base, size, fi_strerror(-ret), ret); + mtl_reg->ofi_mr = NULL; + return OPAL_ERR_OUT_OF_RESOURCE; + } + + mtl_reg->mem_desc = fi_mr_desc(mtl_reg->ofi_mr); + + return OPAL_SUCCESS; +} + + +static int +ompi_mtl_ofi_dereg_mem(void *reg_data, mca_rcache_base_registration_t *reg) +{ + ompi_mtl_ofi_reg_t *mtl_reg = (ompi_mtl_ofi_reg_t *)reg; + int ret; + + if (mtl_reg->ofi_mr != NULL) { + ret = fi_close(&mtl_reg->ofi_mr->fid); + if (0 != ret) { + opal_output_verbose(1, opal_common_ofi.output, + "%s: error unpinning memory mr=%p: %s", + __func__, (void *)mtl_reg->ofi_mr, + fi_strerror(-ret)); + return OPAL_ERROR; + } + } + + return OPAL_SUCCESS; +} + + +int +ompi_mtl_ofi_rcache_init(void) +{ + mca_rcache_base_resources_t rcache_resources; + char *tmp; + + if (NULL != ompi_mtl_ofi.rcache) { + return OMPI_SUCCESS; + } + + tmp = strdup("mtl-ofi"); + rcache_resources.cache_name = tmp; + rcache_resources.reg_data = NULL; + rcache_resources.sizeof_reg = sizeof(ompi_mtl_ofi_reg_t); + rcache_resources.register_mem = ompi_mtl_ofi_reg_mem; + rcache_resources.deregister_mem = ompi_mtl_ofi_dereg_mem; + + ompi_mtl_ofi.rcache = mca_rcache_base_module_create("grdma", &ompi_mtl_ofi, &rcache_resources); + free(tmp); + + if (NULL == ompi_mtl_ofi.rcache) { + /* something when horribly wrong */ + opal_output_verbose(1, opal_common_ofi.output, + "creating rcache failed"); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/mtl/ofi/mtl_ofi_request.h b/ompi/mca/mtl/ofi/mtl_ofi_request.h index cb746f341db..74355f7bca0 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_request.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_request.h @@ -2,6 +2,7 @@ * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * Copyright (c) 2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2025 Amazon.com, Inc. or its affiliates. All Rights reserved. * * $COPYRIGHT$ * @@ -25,6 +26,7 @@ typedef enum { OMPI_MTL_OFI_PROBE } ompi_mtl_ofi_request_type_t; +struct ompi_mtl_ofi_reg_t; struct ompi_mtl_ofi_request_t; struct ompi_mtl_ofi_request_t { @@ -89,8 +91,9 @@ struct ompi_mtl_ofi_request_t { struct mca_mtl_request_t *mrecv_req; /** Stores reference to memory region from registration */ - /* Set to NULL if memory not registered or if non accelerator buffer */ - struct fid_mr *mr; + + /* Set to NULL if memory not registered */ + struct ompi_mtl_ofi_reg_t *mr; }; typedef struct ompi_mtl_ofi_request_t ompi_mtl_ofi_request_t; diff --git a/ompi/mca/mtl/ofi/mtl_ofi_types.h b/ompi/mca/mtl/ofi/mtl_ofi_types.h index a925f0ec28e..b89cc2c274e 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_types.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_types.h @@ -4,6 +4,7 @@ * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2022-2023 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2025 Amazon.com, Inc. or its affiliates. All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,6 +17,9 @@ #include "mtl_ofi.h" +#include "opal/mca/rcache/base/base.h" + + BEGIN_C_DECLS /** @@ -102,6 +106,8 @@ typedef struct mca_mtl_ofi_module_t { bool has_posted_initial_buffer; bool hmem_needs_reg; + /** registration cache */ + mca_rcache_base_module_t *rcache; } mca_mtl_ofi_module_t; extern mca_mtl_ofi_module_t ompi_mtl_ofi; @@ -116,6 +122,14 @@ typedef enum { OFI_SCALABLE_EP, } mca_mtl_ofi_ep_type; +struct ompi_mtl_ofi_reg_t { + mca_rcache_base_registration_t base; + struct fid_mr *ofi_mr; + void *mem_desc; +}; +typedef struct ompi_mtl_ofi_reg_t ompi_mtl_ofi_reg_t; + + /* * Define upper limit for number of events read from a CQ. * Setting this to 100 as this was deemed optimal from empirical data.