From 421532553d5630befa467839b66fbecc1812594d Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Mon, 23 Jun 2025 22:32:28 +0000 Subject: [PATCH] osc: Fix rdma component when not using ob1 When the ob1 PML was not eligible for selection (such as when the user sets --mca pml cm), the BML and BTL frameworks are not initialized and the rdma osc component will later fail as there are no BTLs available. This patch resolves the issue by having the rdma osc component initialize the BML interface. Making this change required two additional, related changes. First, since the BTLs use the modex, the rdma initialization must be moved before the modex point, so that putting data in the modex works as expected. Second, BTLs can require loading the entire world during init (such as TCP when there are multiple threads and multiple NICs or usnic), so we extend the world loading checks to include OSC. Since the other Portals4 components say that they do require world loading, we also assume the Portals4 osc component also requires world loading. Signed-off-by: Brian Barrett --- ompi/instance/instance.c | 14 +++++++------ ompi/mca/osc/base/osc_base_init.c | 2 ++ ompi/mca/osc/osc.h | 8 +++++++ .../mca/osc/portals4/osc_portals4_component.c | 2 ++ ompi/mca/osc/rdma/osc_rdma_component.c | 21 +++++++++++++++++++ 5 files changed, 41 insertions(+), 6 deletions(-) diff --git a/ompi/instance/instance.c b/ompi/instance/instance.c index 0b83e442b0c..ff140e98891 100644 --- a/ompi/instance/instance.c +++ b/ompi/instance/instance.c @@ -536,6 +536,10 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) return ompi_instance_print_error ("mca_pml_base_select() failed", ret); } + if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { + return ompi_instance_print_error ("ompi_osc_base_find_available() failed", ret); + } + OMPI_TIMING_IMPORT_OPAL("orte_init"); OMPI_TIMING_NEXT("rte_init-commit"); @@ -617,10 +621,6 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) return ompi_instance_print_error ("mca_coll_base_find_available() failed", ret); } - if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { - return ompi_instance_print_error ("ompi_osc_base_find_available() failed", ret); - } - /* io and topo components are not selected here -- see comment above about the io and topo frameworks being loaded lazily */ @@ -654,7 +654,8 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) return ompi_instance_print_error ("ompi_attr_create_predefined_keyvals() failed", ret); } - if (mca_pml_base_requires_world ()) { + if (mca_pml_base_requires_world() || + mca_osc_base_requires_world()) { /* need to set up comm world for this instance -- XXX -- FIXME -- probably won't always * be the case. */ if (OMPI_SUCCESS != (ret = ompi_comm_init_mpi3 ())) { @@ -699,7 +700,8 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) /* some btls/mtls require we call add_procs with all procs in the job. * since the btls/mtls have no visibility here it is up to the pml to * convey this requirement */ - if (mca_pml_base_requires_world ()) { + if (mca_pml_base_requires_world() || + mca_osc_base_requires_world()) { if (NULL == (procs = ompi_proc_world (&nprocs))) { return ompi_instance_print_error ("ompi_proc_get_allocated () failed", ret); } diff --git a/ompi/mca/osc/base/osc_base_init.c b/ompi/mca/osc/base/osc_base_init.c index 5dea1fae8fa..9df5e9363fb 100644 --- a/ompi/mca/osc/base/osc_base_init.c +++ b/ompi/mca/osc/base/osc_base_init.c @@ -30,6 +30,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/win/win.h" +bool ompi_osc_base_requires_world = false; + int ompi_osc_base_select(ompi_win_t *win, void **base, diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index 82c1879ce93..39063ef0914 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -53,6 +53,9 @@ struct ompi_datatype_t; struct ompi_op_t; struct ompi_request_t; + +extern bool ompi_osc_base_requires_world; + /* ******************************************************************** */ @@ -419,6 +422,11 @@ typedef ompi_osc_base_module_4_0_0_t ompi_osc_base_module_t; /* ******************************************************************** */ +static inline bool mca_osc_base_requires_world (void) +{ + return ompi_osc_base_requires_world; +} + END_C_DECLS diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index 75bdffaf278..c6302541e9b 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -349,6 +349,8 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) return ret; } + ompi_osc_base_requires_world = true; + return OMPI_SUCCESS; } diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 41b5f9fe553..a5d06cb7916 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -345,6 +345,27 @@ static int ompi_osc_rdma_component_init (bool enable_progress_threads, __FILE__, __LINE__, ret); } + ret = mca_bml_base_init(enable_progress_threads, enable_mpi_threads); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: bml_base_init() failed: %d", + __FILE__, __LINE__, ret); + return ret; + } + + /* check if any btls do not support dynamic add_procs */ + mca_btl_base_selected_module_t* selected_btl; + OPAL_LIST_FOREACH(selected_btl, &mca_btl_base_modules_initialized, + mca_btl_base_selected_module_t) { + mca_btl_base_module_t *btl = selected_btl->btl_module; + + if (btl->btl_flags & MCA_BTL_FLAGS_SINGLE_ADD_PROCS) { + ompi_osc_base_requires_world = true; + break; + } + + } + return ret; }