From 4b1b9a9c947b7cfad8389ebec52e74788747abe8 Mon Sep 17 00:00:00 2001 From: Kento Hasegawa Date: Wed, 15 Oct 2025 16:49:23 +0900 Subject: [PATCH] COLL/UCC: Fix initialization in non-blocking and persistent Signed-off-by: Kento Hasegawa --- .mailmap | 2 ++ ompi/mca/coll/ucc/coll_ucc_allgather.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_allgatherv.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_allreduce.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_alltoall.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_alltoallv.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_barrier.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_bcast.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_common.h | 6 ++++-- ompi/mca/coll/ucc/coll_ucc_gather.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_gatherv.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_reduce.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_reduce_scatter.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_reduce_scatter_block.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_scatter.c | 4 ++-- ompi/mca/coll/ucc/coll_ucc_scatterv.c | 4 ++-- 16 files changed, 34 insertions(+), 30 deletions(-) diff --git a/.mailmap b/.mailmap index b463497a038..0b2e1684f6d 100644 --- a/.mailmap +++ b/.mailmap @@ -137,3 +137,5 @@ George Katevenis Brian Barrett Andrii Bilokur B-a-S + +Kento Hasegawa hasegawa.kento diff --git a/ompi/mca/coll/ucc/coll_ucc_allgather.c b/ompi/mca/coll/ucc/coll_ucc_allgather.c index 2362cc038a1..a0e63daf1d3 100644 --- a/ompi/mca/coll/ucc/coll_ucc_allgather.c +++ b/ompi/mca/coll/ucc/coll_ucc_allgather.c @@ -99,7 +99,7 @@ int mca_coll_ucc_iallgather(const void *sbuf, size_t scount, struct ompi_datatyp mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iallgather"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_allgather_init_common(sbuf, scount, sdtype, rbuf, rcount, rdtype, false, ucc_module, &req, coll_req)); @@ -124,7 +124,7 @@ int mca_coll_ucc_allgather_init(const void *sbuf, size_t scount, struct ompi_dat ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "allgather_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_allgather_init_common(sbuf, scount, sdtype, rbuf, rcount, rdtype, diff --git a/ompi/mca/coll/ucc/coll_ucc_allgatherv.c b/ompi/mca/coll/ucc/coll_ucc_allgatherv.c index a2958496c70..aeec8577e9f 100644 --- a/ompi/mca/coll/ucc/coll_ucc_allgatherv.c +++ b/ompi/mca/coll/ucc/coll_ucc_allgatherv.c @@ -104,7 +104,7 @@ int mca_coll_ucc_iallgatherv(const void *sbuf, size_t scount, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iallgatherv"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_allgatherv_init_common(sbuf, scount, sdtype, rbuf, rcounts, rdisps, rdtype, false, ucc_module, &req, coll_req)); @@ -131,7 +131,7 @@ int mca_coll_ucc_allgatherv_init(const void *sbuf, size_t scount, struct ompi_da ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "allgatherv_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_allgatherv_init_common(sbuf, scount, sdtype, rbuf, rcounts, rdisps, rdtype, diff --git a/ompi/mca/coll/ucc/coll_ucc_allreduce.c b/ompi/mca/coll/ucc/coll_ucc_allreduce.c index ac8c990a939..9bd2fdb6cde 100644 --- a/ompi/mca/coll/ucc/coll_ucc_allreduce.c +++ b/ompi/mca/coll/ucc/coll_ucc_allreduce.c @@ -92,7 +92,7 @@ int mca_coll_ucc_iallreduce(const void *sbuf, void *rbuf, size_t count, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iallreduce"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_allreduce_init_common(sbuf, rbuf, count, dtype, op, false, ucc_module, &req, coll_req)); COLL_UCC_POST_AND_CHECK(req); @@ -116,7 +116,7 @@ int mca_coll_ucc_allreduce_init(const void *sbuf, void *rbuf, size_t count, ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "allreduce_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_allreduce_init_common(sbuf, rbuf, count, dtype, op, true, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_alltoall.c b/ompi/mca/coll/ucc/coll_ucc_alltoall.c index f61171576b2..c689dfe11d8 100644 --- a/ompi/mca/coll/ucc/coll_ucc_alltoall.c +++ b/ompi/mca/coll/ucc/coll_ucc_alltoall.c @@ -99,7 +99,7 @@ int mca_coll_ucc_ialltoall(const void *sbuf, size_t scount, struct ompi_datatype mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ialltoall"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_alltoall_init_common(sbuf, scount, sdtype, rbuf, rcount, rdtype, false, ucc_module, &req, coll_req)); @@ -124,7 +124,7 @@ int mca_coll_ucc_alltoall_init(const void *sbuf, size_t scount, struct ompi_data ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "alltoall_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_alltoall_init_common(sbuf, scount, sdtype, rbuf, rcount, rdtype, diff --git a/ompi/mca/coll/ucc/coll_ucc_alltoallv.c b/ompi/mca/coll/ucc/coll_ucc_alltoallv.c index ce9b7e03fee..7221545358c 100644 --- a/ompi/mca/coll/ucc/coll_ucc_alltoallv.c +++ b/ompi/mca/coll/ucc/coll_ucc_alltoallv.c @@ -106,7 +106,7 @@ int mca_coll_ucc_ialltoallv(const void *sbuf, ompi_count_array_t scounts, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ialltoallv"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_alltoallv_init_common(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, false, ucc_module, &req, coll_req)); @@ -134,7 +134,7 @@ int mca_coll_ucc_alltoallv_init(const void *sbuf, ompi_count_array_t scounts, ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "alltoallv_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_alltoallv_init_common(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, diff --git a/ompi/mca/coll/ucc/coll_ucc_barrier.c b/ompi/mca/coll/ucc/coll_ucc_barrier.c index da886e56f54..92920816c57 100644 --- a/ompi/mca/coll/ucc/coll_ucc_barrier.c +++ b/ompi/mca/coll/ucc/coll_ucc_barrier.c @@ -54,7 +54,7 @@ int mca_coll_ucc_ibarrier(struct ompi_communicator_t *comm, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ibarrier"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_barrier_init_common(false, ucc_module, &req, coll_req)); COLL_UCC_POST_AND_CHECK(req); *request = &coll_req->super; @@ -75,7 +75,7 @@ int mca_coll_ucc_barrier_init(struct ompi_communicator_t *comm, struct ompi_info ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "barrier_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_barrier_init_common(true, ucc_module, &req, coll_req)); *request = &coll_req->super; diff --git a/ompi/mca/coll/ucc/coll_ucc_bcast.c b/ompi/mca/coll/ucc/coll_ucc_bcast.c index 8da3c839133..e05fa1a2470 100644 --- a/ompi/mca/coll/ucc/coll_ucc_bcast.c +++ b/ompi/mca/coll/ucc/coll_ucc_bcast.c @@ -72,7 +72,7 @@ int mca_coll_ucc_ibcast(void *buf, size_t count, struct ompi_datatype_t *dtype, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ibcast"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_bcast_init_common(buf, count, dtype, root, false, ucc_module, &req, coll_req)); COLL_UCC_POST_AND_CHECK(req); @@ -95,7 +95,7 @@ int mca_coll_ucc_bcast_init(void *buf, size_t count, struct ompi_datatype_t *dty ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "bcast_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_bcast_init_common(buf, count, dtype, root, true, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_common.h b/ompi/mca/coll/ucc/coll_ucc_common.h index 09bd9359a1e..c3c1ca2ba3c 100644 --- a/ompi/mca/coll/ucc/coll_ucc_common.h +++ b/ompi/mca/coll/ucc/coll_ucc_common.h @@ -26,7 +26,7 @@ } \ } while(0) -#define COLL_UCC_GET_REQ(_coll_req) do { \ +#define COLL_UCC_GET_REQ(_coll_req, _comm) do { \ opal_free_list_item_t *item; \ item = opal_free_list_wait (&mca_coll_ucc_component.requests); \ if (OPAL_UNLIKELY(NULL == item)) { \ @@ -41,9 +41,10 @@ _coll_req->super.req_state = OMPI_REQUEST_ACTIVE; \ _coll_req->super.req_free = mca_coll_ucc_req_free; \ _coll_req->super.req_type = OMPI_REQUEST_COLL; \ + _coll_req->super.req_mpi_object.comm = _comm; \ } while(0) -#define COLL_UCC_GET_REQ_PERSISTENT(_coll_req) \ +#define COLL_UCC_GET_REQ_PERSISTENT(_coll_req, _comm) \ do { \ opal_free_list_item_t *item; \ item = opal_free_list_wait(&mca_coll_ucc_component.requests); \ @@ -59,6 +60,7 @@ _coll_req->super.req_free = mca_coll_ucc_req_free; \ _coll_req->super.req_start = mca_coll_ucc_req_start; \ _coll_req->super.req_type = OMPI_REQUEST_COLL; \ + _coll_req->super.req_mpi_object.comm = _comm; \ _coll_req->ucc_req = NULL; \ } while (0) diff --git a/ompi/mca/coll/ucc/coll_ucc_gather.c b/ompi/mca/coll/ucc/coll_ucc_gather.c index ad03d654b4c..9e2595b611d 100644 --- a/ompi/mca/coll/ucc/coll_ucc_gather.c +++ b/ompi/mca/coll/ucc/coll_ucc_gather.c @@ -116,7 +116,7 @@ int mca_coll_ucc_igather(const void *sbuf, size_t scount, struct ompi_datatype_t mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc igather"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_gather_init_common(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, false, ucc_module, &req, coll_req)); @@ -142,7 +142,7 @@ int mca_coll_ucc_gather_init(const void *sbuf, size_t scount, struct ompi_dataty ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "gather_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_gather_init_common(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, true, ucc_module, diff --git a/ompi/mca/coll/ucc/coll_ucc_gatherv.c b/ompi/mca/coll/ucc/coll_ucc_gatherv.c index abbdde5a77b..42c195ea51f 100644 --- a/ompi/mca/coll/ucc/coll_ucc_gatherv.c +++ b/ompi/mca/coll/ucc/coll_ucc_gatherv.c @@ -111,7 +111,7 @@ int mca_coll_ucc_igatherv(const void *sbuf, size_t scount, struct ompi_datatype_ mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc igatherv"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_gatherv_init_common(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, root, false, ucc_module, &req, coll_req)); @@ -138,7 +138,7 @@ int mca_coll_ucc_gatherv_init(const void *sbuf, size_t scount, struct ompi_datat ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "gatherv_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_gatherv_init_common(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, root, true, ucc_module, diff --git a/ompi/mca/coll/ucc/coll_ucc_reduce.c b/ompi/mca/coll/ucc/coll_ucc_reduce.c index c76b16c8881..fc4a698d9bd 100644 --- a/ompi/mca/coll/ucc/coll_ucc_reduce.c +++ b/ompi/mca/coll/ucc/coll_ucc_reduce.c @@ -95,7 +95,7 @@ int mca_coll_ucc_ireduce(const void *sbuf, void* rbuf, size_t count, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ireduce"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_reduce_init_common(sbuf, rbuf, count, dtype, op, root, false, ucc_module, &req, coll_req)); COLL_UCC_POST_AND_CHECK(req); @@ -119,7 +119,7 @@ int mca_coll_ucc_reduce_init(const void *sbuf, void *rbuf, size_t count, ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "reduce_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_reduce_init_common(sbuf, rbuf, count, dtype, op, root, true, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_reduce_scatter.c b/ompi/mca/coll/ucc/coll_ucc_reduce_scatter.c index 7ba6effb774..c52d9f73478 100644 --- a/ompi/mca/coll/ucc/coll_ucc_reduce_scatter.c +++ b/ompi/mca/coll/ucc/coll_ucc_reduce_scatter.c @@ -109,7 +109,7 @@ int mca_coll_ucc_ireduce_scatter(const void *sbuf, void *rbuf, ompi_count_array_ mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ireduce_scatter"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_reduce_scatter_init_common(sbuf, rbuf, rcounts, dtype, op, false, ucc_module, &req, coll_req)); COLL_UCC_POST_AND_CHECK(req); @@ -134,7 +134,7 @@ int mca_coll_ucc_reduce_scatter_init(const void *sbuf, void *rbuf, ompi_count_ar ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "reduce_scatter_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_reduce_scatter_init_common(sbuf, rbuf, rcounts, dtype, op, true, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_reduce_scatter_block.c b/ompi/mca/coll/ucc/coll_ucc_reduce_scatter_block.c index 49deba9393e..e8082a645c1 100644 --- a/ompi/mca/coll/ucc/coll_ucc_reduce_scatter_block.c +++ b/ompi/mca/coll/ucc/coll_ucc_reduce_scatter_block.c @@ -105,7 +105,7 @@ int mca_coll_ucc_ireduce_scatter_block(const void *sbuf, void *rbuf, size_t rcou mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ireduce_scatter_block"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_reduce_scatter_block_init_common(sbuf, rbuf, rcount, dtype, op, false, ucc_module, &req, coll_req)); @@ -132,7 +132,7 @@ int mca_coll_ucc_reduce_scatter_block_init(const void *sbuf, void *rbuf, size_t ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "reduce_scatter_block_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_reduce_scatter_block_init_common(sbuf, rbuf, rcount, dtype, op, true, ucc_module, diff --git a/ompi/mca/coll/ucc/coll_ucc_scatter.c b/ompi/mca/coll/ucc/coll_ucc_scatter.c index 4f4e60eaec3..216b88f1cd9 100644 --- a/ompi/mca/coll/ucc/coll_ucc_scatter.c +++ b/ompi/mca/coll/ucc/coll_ucc_scatter.c @@ -120,7 +120,7 @@ int mca_coll_ucc_iscatter(const void *sbuf, size_t scount, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iscatter"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_scatter_init_common(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, false, ucc_module, &req, coll_req)); @@ -146,7 +146,7 @@ int mca_coll_ucc_scatter_init(const void *sbuf, size_t scount, struct ompi_datat ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "scatter_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_scatter_init_common(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, true, ucc_module, &req, diff --git a/ompi/mca/coll/ucc/coll_ucc_scatterv.c b/ompi/mca/coll/ucc/coll_ucc_scatterv.c index c1a611afd53..c97e69e942e 100644 --- a/ompi/mca/coll/ucc/coll_ucc_scatterv.c +++ b/ompi/mca/coll/ucc/coll_ucc_scatterv.c @@ -113,7 +113,7 @@ int mca_coll_ucc_iscatterv(const void *sbuf, ompi_count_array_t scounts, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iscatterv"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_scatterv_init_common(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype, root, false, ucc_module, &req, coll_req)); @@ -140,7 +140,7 @@ int mca_coll_ucc_scatterv_init(const void *sbuf, ompi_count_array_t scounts, ucc_coll_req_h req; mca_coll_ucc_req_t *coll_req = NULL; - COLL_UCC_GET_REQ_PERSISTENT(coll_req); + COLL_UCC_GET_REQ_PERSISTENT(coll_req, comm); UCC_VERBOSE(3, "scatterv_init init %p", coll_req); COLL_UCC_CHECK(mca_coll_ucc_scatterv_init_common(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype, root,