Skip to content

Commit 757d189

Browse files
ggouaillardetbwbarrett
authored andcommitted
coll/libnbc: fix integer overflow
Use internal pack/unpack subroutines that operate on MPI_Aint instead of int and hence solve some integer overflows. Thanks Clyde Stanfield for reporting this issue. Refs #5383 Signed-off-by: Gilles Gouaillardet <[email protected]> (back-ported from commit 7629295)
1 parent 39f8903 commit 757d189

File tree

2 files changed

+34
-76
lines changed

2 files changed

+34
-76
lines changed

ompi/mca/coll/libnbc/nbc_ialltoall.c

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
99
* reserved.
1010
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
11-
* Copyright (c) 2014-2017 Research Organization for Information Science
12-
* and Technology (RIST). All rights reserved.
11+
* Copyright (c) 2014-2018 Research Organization for Information Science
12+
* and Technology (RIST). All rights reserved.
1313
* Copyright (c) 2017 IBM Corporation. All rights reserved.
1414
* $COPYRIGHT$
1515
*
@@ -57,7 +57,8 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype
5757
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
5858
struct mca_coll_base_module_2_2_0_t *module)
5959
{
60-
int rank, p, res, datasize;
60+
int rank, p, res;
61+
MPI_Aint datasize;
6162
size_t a2asize, sndsize;
6263
NBC_Schedule *schedule;
6364
MPI_Aint rcvext, sndext;
@@ -131,9 +132,9 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype
131132
if(NBC_Type_intrinsic(sendtype)) {
132133
datasize = sndext * sendcount;
133134
} else {
134-
res = PMPI_Pack_size (sendcount, sendtype, comm, &datasize);
135+
res = ompi_datatype_pack_external_size("external32", sendcount, sendtype, &datasize);
135136
if (MPI_SUCCESS != res) {
136-
NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res);
137+
NBC_Error("MPI Error in ompi_datatype_pack_external_size() (%i)", res);
137138
return res;
138139
}
139140
}
@@ -162,23 +163,23 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype
162163
memcpy ((char *) tmpbuf + datasize * (p - rank), sendbuf, datasize * rank);
163164
}
164165
} else {
165-
int pos=0;
166+
MPI_Aint pos=0;
166167

167168
/* non-contiguous - pack */
168-
res = PMPI_Pack ((char *) sendbuf + rank * sendcount * sndext, (p - rank) * sendcount, sendtype, tmpbuf,
169-
(p - rank) * datasize, &pos, comm);
169+
res = ompi_datatype_pack_external ("external32", (char *) sendbuf + (intptr_t)rank * (intptr_t)sendcount * sndext, (intptr_t)(p - rank) * (intptr_t)sendcount, sendtype, tmpbuf,
170+
(intptr_t)(p - rank) * datasize, &pos);
170171
if (OPAL_UNLIKELY(MPI_SUCCESS != res)) {
171-
NBC_Error("MPI Error in PMPI_Pack() (%i)", res);
172+
NBC_Error("MPI Error in ompi_datatype_pack_external() (%i)", res);
172173
free(tmpbuf);
173174
return res;
174175
}
175176

176177
if (rank != 0) {
177178
pos = 0;
178-
res = PMPI_Pack(sendbuf, rank * sendcount, sendtype, (char *) tmpbuf + datasize * (p - rank),
179-
rank * datasize, &pos, comm);
179+
res = ompi_datatype_pack_external("external32", sendbuf, (intptr_t)rank * (intptr_t)sendcount, sendtype, (char *) tmpbuf + datasize * (intptr_t)(p - rank),
180+
rank * datasize, &pos);
180181
if (OPAL_UNLIKELY(MPI_SUCCESS != res)) {
181-
NBC_Error("MPI Error in PMPI_Pack() (%i)", res);
182+
NBC_Error("MPI Error in ompi_datatype_pack_external() (%i)", res);
182183
free(tmpbuf);
183184
return res;
184185
}
@@ -379,13 +380,13 @@ static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rc
379380
continue;
380381
}
381382

382-
char *rbuf = (char *) recvbuf + r * recvcount * rcvext;
383+
char *rbuf = (char *) recvbuf + (intptr_t)r * (intptr_t)recvcount * rcvext;
383384
res = NBC_Sched_recv (rbuf, false, recvcount, recvtype, r, schedule, false);
384385
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
385386
return res;
386387
}
387388

388-
char *sbuf = (char *) sendbuf + r * sendcount * sndext;
389+
char *sbuf = (char *) sendbuf + (intptr_t)r * (intptr_t)sendcount * sndext;
389390
res = NBC_Sched_send (sbuf, false, sendcount, sendtype, r, schedule, false);
390391
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
391392
return res;
@@ -398,7 +399,8 @@ static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rc
398399
static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule,
399400
const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
400401
MPI_Datatype recvtype, MPI_Comm comm, void* tmpbuf) {
401-
int res, speer, rpeer, datasize, offset, virtp;
402+
int res, speer, rpeer, virtp;
403+
MPI_Aint datasize, offset;
402404
char *rbuf, *rtmpbuf, *stmpbuf;
403405

404406
if (p < 2) {
@@ -408,9 +410,9 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve
408410
if(NBC_Type_intrinsic(sendtype)) {
409411
datasize = sndext*sendcount;
410412
} else {
411-
res = PMPI_Pack_size(sendcount, sendtype, comm, &datasize);
413+
res = ompi_datatype_pack_external_size("external32", sendcount, sendtype, &datasize);
412414
if (MPI_SUCCESS != res) {
413-
NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res);
415+
NBC_Error("MPI Error in ompi_datatype_pack_external_size() (%i)", res);
414416
return res;
415417
}
416418
}
@@ -495,8 +497,8 @@ static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, voi
495497
for (int i = 1 ; i < (p+1)/2 ; i++) {
496498
int speer = (rank + i) % p;
497499
int rpeer = (rank + p - i) % p;
498-
char *sbuf = (char *) buf + speer * count * ext;
499-
char *rbuf = (char *) buf + rpeer * count * ext;
500+
char *sbuf = (char *) buf + (intptr_t)speer * (intptr_t)count * ext;
501+
char *rbuf = (char *) buf + (intptr_t)rpeer * (intptr_t)count * ext;
500502

501503
res = NBC_Sched_copy (rbuf, false, count, type,
502504
(void *)(-gap), true, count, type,
@@ -525,7 +527,7 @@ static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, voi
525527
if (0 == (p%2)) {
526528
int peer = (rank + p/2) % p;
527529

528-
char *tbuf = (char *) buf + peer * count * ext;
530+
char *tbuf = (char *) buf + (intptr_t)peer * (intptr_t)count * ext;
529531
res = NBC_Sched_copy (tbuf, false, count, type,
530532
(void *)(-gap), true, count, type,
531533
schedule, true);

ompi/mca/coll/libnbc/nbc_internal.h

Lines changed: 12 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
*
1111
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
1212
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
13-
* Copyright (c) 2015-2017 Research Organization for Information Science
14-
* and Technology (RIST). All rights reserved.
13+
* Copyright (c) 2015-2018 Research Organization for Information Science
14+
* and Technology (RIST). All rights reserved.
1515
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
1616
* reserved.
1717
*
@@ -486,60 +486,20 @@ static inline int NBC_Type_intrinsic(MPI_Datatype type) {
486486

487487
/* let's give a try to inline functions */
488488
static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) {
489-
int size, pos, res;
490-
void *packbuf;
489+
int res;
491490

492-
#if OPAL_CUDA_SUPPORT
493-
if((srctype == tgttype) && NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
494-
#else
495-
if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) {
496-
#endif /* OPAL_CUDA_SUPPORT */
497-
/* if we have the same types and they are contiguous (intrinsic
498-
* types are contiguous), we can just use a single memcpy */
499-
ptrdiff_t gap, span;
500-
span = opal_datatype_span(&srctype->super, srccount, &gap);
501-
502-
memcpy(tgt, src, span);
503-
} else {
504-
/* we have to pack and unpack */
505-
res = PMPI_Pack_size(srccount, srctype, comm, &size);
506-
if (MPI_SUCCESS != res) {
507-
NBC_Error ("MPI Error in PMPI_Pack_size() (%i:%i)", res, size);
508-
return res;
509-
}
510-
511-
if (0 == size) {
512-
return OMPI_SUCCESS;
513-
}
514-
packbuf = malloc(size);
515-
if (NULL == packbuf) {
516-
NBC_Error("Error in malloc()");
517-
return res;
518-
}
519-
520-
pos=0;
521-
res = PMPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm);
522-
523-
if (MPI_SUCCESS != res) {
524-
NBC_Error ("MPI Error in PMPI_Pack() (%i)", res);
525-
free (packbuf);
526-
return res;
527-
}
528-
529-
pos=0;
530-
res = PMPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm);
531-
free(packbuf);
532-
if (MPI_SUCCESS != res) {
533-
NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res);
534-
return res;
535-
}
491+
res = ompi_datatype_sndrcv(src, srccount, srctype, tgt, tgtcount, tgttype);
492+
if (OMPI_SUCCESS != res) {
493+
NBC_Error ("MPI Error in ompi_datatype_sndrcv() (%i)", res);
494+
return res;
536495
}
537496

538497
return OMPI_SUCCESS;
539498
}
540499

541500
static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) {
542-
int size, pos, res;
501+
MPI_Aint size, pos;
502+
int res;
543503
ptrdiff_t ext, lb;
544504

545505
#if OPAL_CUDA_SUPPORT
@@ -549,6 +509,7 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void
549509
#endif /* OPAL_CUDA_SUPPORT */
550510
/* if we have the same types and they are contiguous (intrinsic
551511
* types are contiguous), we can just use a single memcpy */
512+
res = ompi_datatype_pack_external_size("external32", srccount, srctype, &size);
552513
res = ompi_datatype_get_extent (srctype, &lb, &ext);
553514
if (OMPI_SUCCESS != res) {
554515
NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res);
@@ -559,15 +520,10 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void
559520

560521
} else {
561522
/* we have to unpack */
562-
res = PMPI_Pack_size(srccount, srctype, comm, &size);
563-
if (MPI_SUCCESS != res) {
564-
NBC_Error ("MPI Error in PMPI_Pack_size() (%i)", res);
565-
return res;
566-
}
567523
pos = 0;
568-
res = PMPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm);
524+
res = ompi_datatype_unpack_external("external32", src, size, &pos, tgt, srccount, srctype);
569525
if (MPI_SUCCESS != res) {
570-
NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res);
526+
NBC_Error ("MPI Error in ompi_datatype_unpack_external() (%i)", res);
571527
return res;
572528
}
573529
}

0 commit comments

Comments
 (0)