Skip to content

Commit 65f92cd

Browse files
ggouaillardetbosilca
authored andcommitted
coll/libnbc: correctly handle datatype alignment when allocating two buffers at once
1 parent fddc91b commit 65f92cd

File tree

3 files changed

+25
-14
lines changed

3 files changed

+25
-14
lines changed

ompi/mca/coll/libnbc/nbc_ireduce.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
*
1515
*/
1616

17+
#include "opal/include/opal/align.h"
1718
#include "ompi/op/op.h"
1819

1920
#include "nbc_internal.h"
@@ -104,8 +105,9 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
104105
redbuf = recvbuf;
105106
} else {
106107
/* recvbuf may not be valid on non-root nodes */
107-
handle->tmpbuf = malloc (2*span);
108-
redbuf = (char*) handle->tmpbuf + span - gap;
108+
ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
109+
handle->tmpbuf = malloc (span_align + span);
110+
redbuf = (char*) handle->tmpbuf + span_align - gap;
109111
}
110112
} else {
111113
handle->tmpbuf = malloc (span);

ompi/mca/coll/libnbc/nbc_ireduce_scatter.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
* Author(s): Torsten Hoefler <[email protected]>
1717
*
1818
*/
19+
#include "opal/include/opal/align.h"
20+
1921
#include "nbc_internal.h"
2022

2123
/* an reduce_csttare schedule can not be cached easily because the contents
@@ -40,7 +42,7 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i
4042
struct mca_coll_base_module_2_1_0_t *module) {
4143
int peer, rank, maxr, p, res, count;
4244
MPI_Aint ext;
43-
ptrdiff_t gap, span;
45+
ptrdiff_t gap, span, span_align;
4446
char *sbuf, inplace;
4547
NBC_Schedule *schedule;
4648
NBC_Handle *handle;
@@ -84,14 +86,15 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i
8486
maxr = (int) ceil ((log((double) p) / LOG2));
8587

8688
span = opal_datatype_span(&datatype->super, count, &gap);
87-
handle->tmpbuf = malloc (span * 2);
89+
span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
90+
handle->tmpbuf = malloc (span_align + span);
8891
if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) {
8992
NBC_Return_handle (handle);
9093
return OMPI_ERR_OUT_OF_RESOURCE;
9194
}
9295

9396
rbuf = (char *)(-gap);
94-
lbuf = (char *)(span - gap);
97+
lbuf = (char *)(span_align - gap);
9598

9699
schedule = OBJ_NEW(NBC_Schedule);
97100
if (OPAL_UNLIKELY(NULL == schedule)) {
@@ -205,7 +208,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
205208
struct mca_coll_base_module_2_1_0_t *module) {
206209
int rank, res, count, lsize, rsize;
207210
MPI_Aint ext;
208-
ptrdiff_t gap, span;
211+
ptrdiff_t gap, span, span_align;
209212
NBC_Schedule *schedule;
210213
NBC_Handle *handle;
211214
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
@@ -226,14 +229,15 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
226229
}
227230

228231
span = opal_datatype_span(&datatype->super, count, &gap);
232+
span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
229233

230234
res = NBC_Init_handle(comm, &handle, libnbc_module);
231235
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
232236
return res;
233237
}
234238

235239
if (count > 0) {
236-
handle->tmpbuf = malloc (2 * span);
240+
handle->tmpbuf = malloc (span_align + span);
237241
if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) {
238242
NBC_Return_handle (handle);
239243
return OMPI_ERR_OUT_OF_RESOURCE;
@@ -259,7 +263,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
259263
if (0 == rank) {
260264
char *lbuf, *rbuf;
261265
lbuf = (char *)(-gap);
262-
rbuf = (char *)(span-gap);
266+
rbuf = (char *)(span_align-gap);
263267
res = NBC_Sched_recv (lbuf, true, count, datatype, 0, schedule, true);
264268
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
265269
NBC_Return_handle (handle);

ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* Author(s): Torsten Hoefler <[email protected]>
1515
*
1616
*/
17+
#include "opal/include/opal/align.h"
18+
1719
#include "nbc_internal.h"
1820

1921
/* an reduce_csttare schedule can not be cached easily because the contents
@@ -75,18 +77,20 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i
7577

7678
if (0 < count) {
7779
char *rbuf, *lbuf, *buf;
80+
ptrdiff_t span_align;
7881

7982
span = opal_datatype_span(&datatype->super, count, &gap);
80-
handle->tmpbuf = malloc (2*span);
83+
span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
84+
handle->tmpbuf = malloc (span_align + span);
8185
if (NULL == handle->tmpbuf) {
8286
OMPI_COLL_LIBNBC_REQUEST_RETURN(handle);
8387
OBJ_RELEASE(schedule);
8488
return OMPI_ERR_OUT_OF_RESOURCE;
8589
}
8690

8791
rbuf = (void *)(-gap);
88-
lbuf = (char *)(span - gap);
89-
redbuf = (char *) handle->tmpbuf + span - gap;
92+
lbuf = (char *)(span_align - gap);
93+
redbuf = (char *) handle->tmpbuf + span_align - gap;
9094

9195
/* copy data to redbuf if we only have a single node */
9296
if ((p == 1) && !inplace) {
@@ -206,7 +210,7 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv
206210
ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module) {
207211
int rank, res, count, lsize, rsize;
208212
MPI_Aint ext;
209-
ptrdiff_t gap, span;
213+
ptrdiff_t gap, span, span_align;
210214
NBC_Schedule *schedule;
211215
NBC_Handle *handle;
212216
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
@@ -229,9 +233,10 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv
229233
count = rcount * lsize;
230234

231235
span = opal_datatype_span(&dtype->super, count, &gap);
236+
span_align = OPAL_ALIGN(span, dtype->super.align, ptrdiff_t);
232237

233238
if (count > 0) {
234-
handle->tmpbuf = malloc (2 * span);
239+
handle->tmpbuf = malloc (span_align + span);
235240
if (NULL == handle->tmpbuf) {
236241
NBC_Return_handle (handle);
237242
return OMPI_ERR_OUT_OF_RESOURCE;
@@ -257,7 +262,7 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv
257262
if (0 == rank) {
258263
char *lbuf, *rbuf;
259264
lbuf = (char *)(-gap);
260-
rbuf = (char *)(span-gap);
265+
rbuf = (char *)(span_align-gap);
261266
res = NBC_Sched_recv (lbuf, true, count, dtype, 0, schedule, true);
262267
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
263268
NBC_Return_handle (handle);

0 commit comments

Comments
 (0)