Skip to content

Commit 6f8ae38

Browse files
committed
Fix the intercom scatter.
The original implementation overwrote the requests array, leading to segfaults. While fixing this I also fix few other typos and removed all basic functions that were just an indirection to the base functions. Fixes #12482. Signed-off-by: George Bosilca <[email protected]>
1 parent 0261a03 commit 6f8ae38

File tree

9 files changed

+13
-194
lines changed

9 files changed

+13
-194
lines changed

ompi/mca/coll/basic/Makefile.am

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights
1515
# reserved.
1616
# Copyright (c) 2017 IBM Corporation. All rights reserved.
17+
# Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
1718
# $COPYRIGHT$
1819
#
1920
# Additional copyrights may follow
@@ -43,8 +44,6 @@ sources = \
4344
coll_basic_reduce.c \
4445
coll_basic_reduce_scatter.c \
4546
coll_basic_reduce_scatter_block.c \
46-
coll_basic_scan.c \
47-
coll_basic_exscan.c \
4847
coll_basic_scatter.c \
4948
coll_basic_scatterv.c
5049

ompi/mca/coll/basic/coll_basic.h

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* reserved.
1717
* Copyright (c) 2015 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
19+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
1920
* $COPYRIGHT$
2021
*
2122
* Additional copyrights may follow
@@ -132,24 +133,6 @@ BEGIN_C_DECLS
132133
struct ompi_communicator_t *comm,
133134
mca_coll_base_module_t *module);
134135

135-
int mca_coll_basic_bcast_log_inter(void *buff, int count,
136-
struct ompi_datatype_t *datatype,
137-
int root,
138-
struct ompi_communicator_t *comm,
139-
mca_coll_base_module_t *module);
140-
141-
int mca_coll_basic_exscan_intra(const void *sbuf, void *rbuf, int count,
142-
struct ompi_datatype_t *dtype,
143-
struct ompi_op_t *op,
144-
struct ompi_communicator_t *comm,
145-
mca_coll_base_module_t *module);
146-
147-
int mca_coll_basic_exscan_inter(const void *sbuf, void *rbuf, int count,
148-
struct ompi_datatype_t *dtype,
149-
struct ompi_op_t *op,
150-
struct ompi_communicator_t *comm,
151-
mca_coll_base_module_t *module);
152-
153136
int mca_coll_basic_gather_inter(const void *sbuf, int scount,
154137
struct ompi_datatype_t *sdtype,
155138
void *rbuf, int rcount,
@@ -187,12 +170,6 @@ BEGIN_C_DECLS
187170
int root,
188171
struct ompi_communicator_t *comm,
189172
mca_coll_base_module_t *module);
190-
int mca_coll_basic_reduce_log_inter(const void *sbuf, void *rbuf, int count,
191-
struct ompi_datatype_t *dtype,
192-
struct ompi_op_t *op,
193-
int root,
194-
struct ompi_communicator_t *comm,
195-
mca_coll_base_module_t *module);
196173

197174
int mca_coll_basic_reduce_scatter_block_intra(const void *sbuf, void *rbuf,
198175
int rcount,
@@ -222,11 +199,6 @@ BEGIN_C_DECLS
222199
struct ompi_communicator_t *comm,
223200
mca_coll_base_module_t *module);
224201

225-
int mca_coll_basic_scan_intra(const void *sbuf, void *rbuf, int count,
226-
struct ompi_datatype_t *dtype,
227-
struct ompi_op_t *op,
228-
struct ompi_communicator_t *comm,
229-
mca_coll_base_module_t *module);
230202
int mca_coll_basic_scan_inter(const void *sbuf, void *rbuf, int count,
231203
struct ompi_datatype_t *dtype,
232204
struct ompi_op_t *op,

ompi/mca/coll/basic/coll_basic_bcast.c

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -181,20 +181,3 @@ mca_coll_basic_bcast_lin_inter(void *buff, int count,
181181
/* All done */
182182
return err;
183183
}
184-
185-
186-
/*
187-
* bcast_log_inter
188-
*
189-
* Function: - broadcast using O(N) algorithm
190-
* Accepts: - same arguments as MPI_Bcast()
191-
* Returns: - MPI_SUCCESS or error code
192-
*/
193-
int
194-
mca_coll_basic_bcast_log_inter(void *buff, int count,
195-
struct ompi_datatype_t *datatype, int root,
196-
struct ompi_communicator_t *comm,
197-
mca_coll_base_module_t *module)
198-
{
199-
return OMPI_ERR_NOT_IMPLEMENTED;
200-
}

ompi/mca/coll/basic/coll_basic_exscan.c

Lines changed: 0 additions & 70 deletions
This file was deleted.

ompi/mca/coll/basic/coll_basic_module.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
* Copyright (c) 2015 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
1919
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
20+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
2021
* $COPYRIGHT$
2122
*
2223
* Additional copyrights may follow
@@ -97,13 +98,13 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm,
9798
basic_module->super.coll_alltoallw = mca_coll_basic_alltoallw_intra;
9899
basic_module->super.coll_barrier = ompi_coll_base_barrier_intra_basic_linear;
99100
basic_module->super.coll_bcast = ompi_coll_base_bcast_intra_basic_linear;
100-
basic_module->super.coll_exscan = mca_coll_basic_exscan_intra;
101+
basic_module->super.coll_exscan = ompi_coll_base_exscan_intra_linear;
101102
basic_module->super.coll_gather = ompi_coll_base_gather_intra_basic_linear;
102103
basic_module->super.coll_gatherv = mca_coll_basic_gatherv_intra;
103104
basic_module->super.coll_reduce = ompi_coll_base_reduce_intra_basic_linear;
104105
basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_intra;
105106
basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_intra;
106-
basic_module->super.coll_scan = mca_coll_basic_scan_intra;
107+
basic_module->super.coll_scan = ompi_coll_base_scan_intra_linear;
107108
basic_module->super.coll_scatter = ompi_coll_base_scatter_intra_basic_linear;
108109
basic_module->super.coll_scatterv = mca_coll_basic_scatterv_intra;
109110
} else {
@@ -115,13 +116,13 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm,
115116
basic_module->super.coll_alltoallw = mca_coll_basic_alltoallw_intra;
116117
basic_module->super.coll_barrier = mca_coll_basic_barrier_intra_log;
117118
basic_module->super.coll_bcast = mca_coll_basic_bcast_log_intra;
118-
basic_module->super.coll_exscan = mca_coll_basic_exscan_intra;
119+
basic_module->super.coll_exscan = ompi_coll_base_exscan_intra_linear;
119120
basic_module->super.coll_gather = ompi_coll_base_gather_intra_basic_linear;
120121
basic_module->super.coll_gatherv = mca_coll_basic_gatherv_intra;
121122
basic_module->super.coll_reduce = mca_coll_basic_reduce_log_intra;
122123
basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_intra;
123124
basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_intra;
124-
basic_module->super.coll_scan = mca_coll_basic_scan_intra;
125+
basic_module->super.coll_scan = ompi_coll_base_scan_intra_linear;
125126
basic_module->super.coll_scatter = ompi_coll_base_scatter_intra_basic_linear;
126127
basic_module->super.coll_scatterv = mca_coll_basic_scatterv_intra;
127128
}

ompi/mca/coll/basic/coll_basic_reduce.c

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* Copyright (c) 2015 Research Organization for Information Science
1313
* and Technology (RIST). All rights reserved.
1414
* Copyright (c) 2022 IBM Corporation. All rights reserved.
15+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
1516
* $COPYRIGHT$
1617
*
1718
* Additional copyrights may follow
@@ -101,9 +102,9 @@ mca_coll_basic_reduce_log_intra(const void *sbuf, void *rbuf, int count,
101102
char *rcv_buffer = (char*)rbuf;
102103
char *inplace_temp = NULL;
103104

104-
/* JMS Codearound for now -- if the operations is not communative,
105+
/* JMS Code around for now -- if the operations is not commutative,
105106
* just call the linear algorithm. Need to talk to Edgar / George
106-
* about fixing this algorithm here to work with non-communative
107+
* about fixing this algorithm here to work with non-commutative
107108
* operations. */
108109

109110
if (!ompi_op_is_commute(op)) {
@@ -353,21 +354,3 @@ mca_coll_basic_reduce_lin_inter(const void *sbuf, void *rbuf, int count,
353354
/* All done */
354355
return err;
355356
}
356-
357-
358-
/*
359-
* reduce_log_inter
360-
*
361-
* Function: - reduction using O(N) algorithm
362-
* Accepts: - same as MPI_Reduce()
363-
* Returns: - MPI_SUCCESS or error code
364-
*/
365-
int
366-
mca_coll_basic_reduce_log_inter(const void *sbuf, void *rbuf, int count,
367-
struct ompi_datatype_t *dtype,
368-
struct ompi_op_t *op,
369-
int root, struct ompi_communicator_t *comm,
370-
mca_coll_base_module_t *module)
371-
{
372-
return OMPI_ERR_NOT_IMPLEMENTED;
373-
}

ompi/mca/coll/basic/coll_basic_reduce_scatter.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
* Returns: - MPI_SUCCESS or error code
5050
*
5151
* Algorithm:
52-
* Cummutative, reasonable sized messages
52+
* Commutative, reasonable sized messages
5353
* recursive halving algorithm
5454
* Others:
5555
* reduce and scatterv (needs to be cleaned

ompi/mca/coll/basic/coll_basic_scan.c

Lines changed: 0 additions & 50 deletions
This file was deleted.

ompi/mca/coll/basic/coll_basic_scatter.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* Copyright (c) 2015 Research Organization for Information Science
1313
* and Technology (RIST). All rights reserved.
1414
* Copyright (c) 2017 IBM Corporation. All rights reserved.
15+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
1516
* $COPYRIGHT$
1617
*
1718
* Additional copyrights may follow
@@ -77,7 +78,7 @@ mca_coll_basic_scatter_inter(const void *sbuf, int scount,
7778
err = MCA_PML_CALL(isend(ptmp, scount, sdtype, i,
7879
MCA_COLL_BASE_TAG_SCATTER,
7980
MCA_PML_BASE_SEND_STANDARD, comm,
80-
reqs++));
81+
&(reqs[i])));
8182
if (OMPI_SUCCESS != err) {
8283
ompi_coll_base_free_reqs(reqs, i + 1);
8384
return err;

0 commit comments

Comments
 (0)