From 8d88926356a3c9820f4a67c52a9b7b05cfa0efaf Mon Sep 17 00:00:00 2001 From: Kento Hasegawa Date: Wed, 15 Oct 2025 16:49:23 +0900 Subject: [PATCH] COLL/UCC: Fix initialization in non-blocking and persistent Signed-off-by: Kento Hasegawa (cherry picked from commit 4b1b9a9c947b7cfad8389ebec52e74788747abe8) --- .mailmap | 2 ++ ompi/mca/coll/ucc/coll_ucc_allgather.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_allgatherv.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_allreduce.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_alltoall.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_alltoallv.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_barrier.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_bcast.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_common.h | 4 +++- ompi/mca/coll/ucc/coll_ucc_gather.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_gatherv.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_reduce.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_reduce_scatter.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_reduce_scatter_block.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_scatter.c | 3 ++- ompi/mca/coll/ucc/coll_ucc_scatterv.c | 3 ++- 16 files changed, 33 insertions(+), 15 deletions(-) diff --git a/.mailmap b/.mailmap index 42895b1ddd6..17c977feab8 100644 --- a/.mailmap +++ b/.mailmap @@ -137,3 +137,5 @@ George Katevenis Brian Barrett Andrii Bilokur B-a-S + +Kento Hasegawa hasegawa.kento diff --git a/ompi/mca/coll/ucc/coll_ucc_allgather.c b/ompi/mca/coll/ucc/coll_ucc_allgather.c index c80aebb2a2c..08eb6eeb5e8 100644 --- a/ompi/mca/coll/ucc/coll_ucc_allgather.c +++ b/ompi/mca/coll/ucc/coll_ucc_allgather.c @@ -1,6 +1,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -98,7 +99,7 @@ int mca_coll_ucc_iallgather(const void *sbuf, int scount, struct ompi_datatype_t mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iallgather"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_allgather_init(sbuf, scount, sdtype, rbuf, rcount, rdtype, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_allgatherv.c b/ompi/mca/coll/ucc/coll_ucc_allgatherv.c index 1a3ba27f053..64ff9856c58 100644 --- a/ompi/mca/coll/ucc/coll_ucc_allgatherv.c +++ b/ompi/mca/coll/ucc/coll_ucc_allgatherv.c @@ -1,6 +1,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -99,7 +100,7 @@ int mca_coll_ucc_iallgatherv(const void *sbuf, int scount, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iallgatherv"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_allgatherv_init(sbuf, scount, sdtype, rbuf, rcounts, rdisps, rdtype, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_allreduce.c b/ompi/mca/coll/ucc/coll_ucc_allreduce.c index 3ed8e8cc372..5eb2793985a 100644 --- a/ompi/mca/coll/ucc/coll_ucc_allreduce.c +++ b/ompi/mca/coll/ucc/coll_ucc_allreduce.c @@ -1,6 +1,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -89,7 +90,7 @@ int mca_coll_ucc_iallreduce(const void *sbuf, void *rbuf, int count, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iallreduce"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_allreduce_init(sbuf, rbuf, count, dtype, op, ucc_module, &req, coll_req)); COLL_UCC_POST_AND_CHECK(req); diff --git a/ompi/mca/coll/ucc/coll_ucc_alltoall.c b/ompi/mca/coll/ucc/coll_ucc_alltoall.c index 1fce7b1f733..26700932bf2 100644 --- a/ompi/mca/coll/ucc/coll_ucc_alltoall.c +++ b/ompi/mca/coll/ucc/coll_ucc_alltoall.c @@ -1,6 +1,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -97,7 +98,7 @@ int mca_coll_ucc_ialltoall(const void *sbuf, int scount, struct ompi_datatype_t mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ialltoall"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_alltoall_init(sbuf, scount, sdtype, rbuf, rcount, rdtype, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_alltoallv.c b/ompi/mca/coll/ucc/coll_ucc_alltoallv.c index 53fd0cfa4d7..82ee126309e 100644 --- a/ompi/mca/coll/ucc/coll_ucc_alltoallv.c +++ b/ompi/mca/coll/ucc/coll_ucc_alltoallv.c @@ -1,6 +1,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -100,7 +101,7 @@ int mca_coll_ucc_ialltoallv(const void *sbuf, const int *scounts, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ialltoallv"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_alltoallv_init(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_barrier.c b/ompi/mca/coll/ucc/coll_ucc_barrier.c index 9790fffc2f9..a8cc72eaf95 100644 --- a/ompi/mca/coll/ucc/coll_ucc_barrier.c +++ b/ompi/mca/coll/ucc/coll_ucc_barrier.c @@ -1,5 +1,6 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,7 +49,7 @@ int mca_coll_ucc_ibarrier(struct ompi_communicator_t *comm, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ibarrier"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_barrier_init(ucc_module, &req, coll_req)); COLL_UCC_POST_AND_CHECK(req); *request = &coll_req->super; diff --git a/ompi/mca/coll/ucc/coll_ucc_bcast.c b/ompi/mca/coll/ucc/coll_ucc_bcast.c index fb80fb03f8a..8b3f5593de9 100644 --- a/ompi/mca/coll/ucc/coll_ucc_bcast.c +++ b/ompi/mca/coll/ucc/coll_ucc_bcast.c @@ -1,5 +1,6 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -66,7 +67,7 @@ int mca_coll_ucc_ibcast(void *buf, int count, struct ompi_datatype_t *dtype, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ibcast"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_bcast_init(buf, count, dtype, root, ucc_module, &req, coll_req)); COLL_UCC_POST_AND_CHECK(req); diff --git a/ompi/mca/coll/ucc/coll_ucc_common.h b/ompi/mca/coll/ucc/coll_ucc_common.h index 9d9163aa46d..7f9b3c84c62 100644 --- a/ompi/mca/coll/ucc/coll_ucc_common.h +++ b/ompi/mca/coll/ucc/coll_ucc_common.h @@ -1,5 +1,6 @@ /** Copyright (c) 2021 Mellanox Technologies. All rights reserved. + Copyright (c) 2025 Fujitsu Limited. All rights reserved. $COPYRIGHT$ Additional copyrights may follow $HEADER$ @@ -25,7 +26,7 @@ } \ } while(0) -#define COLL_UCC_GET_REQ(_coll_req) do { \ +#define COLL_UCC_GET_REQ(_coll_req, _comm) do { \ opal_free_list_item_t *item; \ item = opal_free_list_wait (&mca_coll_ucc_component.requests); \ if (OPAL_UNLIKELY(NULL == item)) { \ @@ -40,6 +41,7 @@ _coll_req->super.req_state = OMPI_REQUEST_ACTIVE; \ _coll_req->super.req_free = mca_coll_ucc_req_free; \ _coll_req->super.req_type = OMPI_REQUEST_COLL; \ + _coll_req->super.req_mpi_object.comm = _comm; \ } while(0) #define COLL_UCC_REQ_INIT(_coll_req, _req, _coll, _module) do{ \ diff --git a/ompi/mca/coll/ucc/coll_ucc_gather.c b/ompi/mca/coll/ucc/coll_ucc_gather.c index 8ede6a58e58..c8cf40908a3 100644 --- a/ompi/mca/coll/ucc/coll_ucc_gather.c +++ b/ompi/mca/coll/ucc/coll_ucc_gather.c @@ -2,6 +2,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. * Copyright (c) 2022 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -114,7 +115,7 @@ int mca_coll_ucc_igather(const void *sbuf, int scount, struct ompi_datatype_t *s mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc igather"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_gather_init(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_gatherv.c b/ompi/mca/coll/ucc/coll_ucc_gatherv.c index 13049a76e0f..4cd9b651d4e 100644 --- a/ompi/mca/coll/ucc/coll_ucc_gatherv.c +++ b/ompi/mca/coll/ucc/coll_ucc_gatherv.c @@ -2,6 +2,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. * Copyright (c) 2022 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -106,7 +107,7 @@ int mca_coll_ucc_igatherv(const void *sbuf, int scount, struct ompi_datatype_t * mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc igatherv"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_gatherv_init(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, root, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_reduce.c b/ompi/mca/coll/ucc/coll_ucc_reduce.c index 0de4b2ff421..3595a448d35 100644 --- a/ompi/mca/coll/ucc/coll_ucc_reduce.c +++ b/ompi/mca/coll/ucc/coll_ucc_reduce.c @@ -1,5 +1,6 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -92,7 +93,7 @@ int mca_coll_ucc_ireduce(const void *sbuf, void* rbuf, int count, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ireduce"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_reduce_init(sbuf, rbuf, count, dtype, op, root, ucc_module, &req, coll_req)); COLL_UCC_POST_AND_CHECK(req); diff --git a/ompi/mca/coll/ucc/coll_ucc_reduce_scatter.c b/ompi/mca/coll/ucc/coll_ucc_reduce_scatter.c index 93a9b295ac0..a0a33f3c1ac 100644 --- a/ompi/mca/coll/ucc/coll_ucc_reduce_scatter.c +++ b/ompi/mca/coll/ucc/coll_ucc_reduce_scatter.c @@ -1,6 +1,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. * Copyright (c) 2022 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -103,7 +104,7 @@ int mca_coll_ucc_ireduce_scatter(const void *sbuf, void *rbuf, const int *rcount mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ireduce_scatter"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_reduce_scatter_init(sbuf, rbuf, rcounts, dtype, op, ucc_module, &req, coll_req)); COLL_UCC_POST_AND_CHECK(req); diff --git a/ompi/mca/coll/ucc/coll_ucc_reduce_scatter_block.c b/ompi/mca/coll/ucc/coll_ucc_reduce_scatter_block.c index e9352d669e2..ebc22e5a409 100644 --- a/ompi/mca/coll/ucc/coll_ucc_reduce_scatter_block.c +++ b/ompi/mca/coll/ucc/coll_ucc_reduce_scatter_block.c @@ -1,6 +1,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. * Copyright (c) 2022 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -98,7 +99,7 @@ int mca_coll_ucc_ireduce_scatter_block(const void *sbuf, void *rbuf, int rcount, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc ireduce_scatter_block"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_reduce_scatter_block_init(sbuf, rbuf, rcount, dtype, op, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_scatter.c b/ompi/mca/coll/ucc/coll_ucc_scatter.c index 548ce290bdf..c31fcb04ec4 100644 --- a/ompi/mca/coll/ucc/coll_ucc_scatter.c +++ b/ompi/mca/coll/ucc/coll_ucc_scatter.c @@ -1,6 +1,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. * Copyright (c) 2022 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -117,7 +118,7 @@ int mca_coll_ucc_iscatter(const void *sbuf, int scount, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iscatter"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_scatter_init(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, ucc_module, &req, coll_req)); diff --git a/ompi/mca/coll/ucc/coll_ucc_scatterv.c b/ompi/mca/coll/ucc/coll_ucc_scatterv.c index 738aa14a953..121c894179e 100644 --- a/ompi/mca/coll/ucc/coll_ucc_scatterv.c +++ b/ompi/mca/coll/ucc/coll_ucc_scatterv.c @@ -1,6 +1,7 @@ /** * Copyright (c) 2021 Mellanox Technologies. All rights reserved. * Copyright (c) 2022 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2025 Fujitsu Limited. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -110,7 +111,7 @@ int mca_coll_ucc_iscatterv(const void *sbuf, const int *scounts, mca_coll_ucc_req_t *coll_req = NULL; UCC_VERBOSE(3, "running ucc iscatterv"); - COLL_UCC_GET_REQ(coll_req); + COLL_UCC_GET_REQ(coll_req, comm); COLL_UCC_CHECK(mca_coll_ucc_scatterv_init(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype, root, ucc_module, &req, coll_req));