From b821b625bcb72ce8064e03746a6e75ea0c769906 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Fri, 5 Sep 2025 11:48:12 -0500 Subject: [PATCH] Prevent subcomms from being freed before user's comm Signed-off-by: Matthew Whitlock (cherry picked from commit 5fadf87978e96fe7ec501f9496828f23d17764c9) --- ompi/mca/coll/han/coll_han_subcomms.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ompi/mca/coll/han/coll_han_subcomms.c b/ompi/mca/coll/han/coll_han_subcomms.c index 90bc0d1d972..11e5e65103e 100644 --- a/ompi/mca/coll/han/coll_han_subcomms.c +++ b/ompi/mca/coll/han/coll_han_subcomms.c @@ -40,6 +40,15 @@ (COMM)->c_coll->coll_ ## COLL ## _module = (FALLBACKS).COLL.module; \ } while(0) +#define HAN_SUBCOM_EXTRA_RETAIN(COMM, PARENT_COMM) \ + do \ + { \ + if (OMPI_COMM_CID_IS_LOWER(COMM, PARENT_COMM)) { \ + OMPI_COMM_SET_EXTRA_RETAIN(COMM); \ + OBJ_RETAIN(COMM); \ + } \ + } while (0) + /* * Routine that creates the local hierarchical sub-communicators * Called each time a collective is called. @@ -184,6 +193,11 @@ int mca_coll_han_comm_create_new(struct ompi_communicator_t *comm, HAN_SUBCOM_LOAD_COLLECTIVE(fallbacks, comm, han_module, scatter); OBJ_DESTRUCT(&comm_info); + + /* Ensure these communicators aren't released before the parent comm */ + HAN_SUBCOM_EXTRA_RETAIN(*low_comm, comm); + HAN_SUBCOM_EXTRA_RETAIN(*up_comm, comm); + return OMPI_SUCCESS; return_with_error: @@ -338,6 +352,14 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm, han_module->cached_up_comms = up_comms; han_module->cached_vranks = vranks; + /* Ensure these communicators aren't released before the parent comm */ + for(int i = 0; i < COLL_HAN_LOW_MODULES; i++) { + HAN_SUBCOM_EXTRA_RETAIN(low_comms[i], comm); + } + for(int i = 0; i < COLL_HAN_UP_MODULES; i++) { + HAN_SUBCOM_EXTRA_RETAIN(up_comms[i], comm); + } + /* Reset the saved collectives to point back to HAN */ HAN_SUBCOM_LOAD_COLLECTIVE(fallbacks, comm, han_module, allgatherv); HAN_SUBCOM_LOAD_COLLECTIVE(fallbacks, comm, han_module, allgather);