8
8
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
9
9
* reserved.
10
10
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
11
- * Copyright (c) 2014-2015 Research Organization for Information Science
11
+ * Copyright (c) 2014-2016 Research Organization for Information Science
12
12
* and Technology (RIST). All rights reserved.
13
13
*
14
14
* Author(s): Torsten Hoefler <[email protected] >
@@ -25,6 +25,8 @@ static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint
25
25
static inline int a2a_sched_diss (int rank , int p , MPI_Aint sndext , MPI_Aint rcvext , NBC_Schedule * schedule ,
26
26
const void * sendbuf , int sendcount , MPI_Datatype sendtype , void * recvbuf ,
27
27
int recvcount , MPI_Datatype recvtype , MPI_Comm comm , NBC_Handle * handle );
28
+ static inline int a2a_sched_inplace (int rank , int p , NBC_Schedule * schedule , void * buf , int count ,
29
+ MPI_Datatype type , MPI_Aint ext , ptrdiff_t gap , MPI_Comm comm );
28
30
29
31
#ifdef NBC_CACHE_SCHEDULE
30
32
/* tree comparison function for schedule cache */
@@ -59,9 +61,10 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype
59
61
NBC_Alltoall_args * args , * found , search ;
60
62
#endif
61
63
char * rbuf , * sbuf , inplace ;
62
- enum {NBC_A2A_LINEAR , NBC_A2A_PAIRWISE , NBC_A2A_DISS } alg ;
64
+ enum {NBC_A2A_LINEAR , NBC_A2A_PAIRWISE , NBC_A2A_DISS , NBC_A2A_INPLACE } alg ;
63
65
NBC_Handle * handle ;
64
66
ompi_coll_libnbc_module_t * libnbc_module = (ompi_coll_libnbc_module_t * ) module ;
67
+ ptrdiff_t span , gap ;
65
68
66
69
NBC_IN_PLACE (sendbuf , recvbuf , inplace );
67
70
@@ -89,7 +92,9 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype
89
92
/* algorithm selection */
90
93
a2asize = sndsize * sendcount * p ;
91
94
/* this number is optimized for TCP on odin.cs.indiana.edu */
92
- if ((p <= 8 ) && ((a2asize < 1 <<17 ) || (sndsize * sendcount < 1 <<12 ))) {
95
+ if (inplace ) {
96
+ alg = NBC_A2A_INPLACE ;
97
+ } else if ((p <= 8 ) && ((a2asize < 1 <<17 ) || (sndsize * sendcount < 1 <<12 ))) {
93
98
/* just send as fast as we can if we have less than 8 peers, if the
94
99
* total communicated size is smaller than 1<<17 *and* if we don't
95
100
* have eager messages (msgsize < 1<<13) */
@@ -116,7 +121,14 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype
116
121
}
117
122
118
123
/* allocate temp buffer if we need one */
119
- if (alg == NBC_A2A_DISS ) {
124
+ if (alg == NBC_A2A_INPLACE ) {
125
+ span = opal_datatype_span (& recvtype -> super , recvcount , & gap );
126
+ handle -> tmpbuf = malloc (span );
127
+ if (OPAL_UNLIKELY (NULL == handle -> tmpbuf )) {
128
+ NBC_Return_handle (handle );
129
+ return OMPI_ERR_OUT_OF_RESOURCE ;
130
+ }
131
+ } else if (alg == NBC_A2A_DISS ) {
120
132
/* only A2A_DISS needs buffers */
121
133
if (NBC_Type_intrinsic (sendtype )) {
122
134
datasize = sndext * sendcount ;
@@ -200,6 +212,9 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype
200
212
handle -> schedule = schedule ;
201
213
202
214
switch (alg ) {
215
+ case NBC_A2A_INPLACE :
216
+ res = a2a_sched_inplace (rank , p , schedule , recvbuf , recvcount , recvtype , rcvext , gap , comm );
217
+ break ;
203
218
case NBC_A2A_LINEAR :
204
219
res = a2a_sched_linear (rank , p , sndext , rcvext , schedule , sendbuf , sendcount , sendtype , recvbuf , recvcount , recvtype , comm );
205
220
break ;
@@ -359,17 +374,10 @@ static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint
359
374
}
360
375
361
376
char * sbuf = (char * ) sendbuf + sndpeer * sendcount * sndext ;
362
- res = NBC_Sched_send (sbuf , false, sendcount , sendtype , sndpeer , schedule , false );
377
+ res = NBC_Sched_send (sbuf , false, sendcount , sendtype , sndpeer , schedule , true );
363
378
if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
364
379
return res ;
365
380
}
366
-
367
- if (r < p ) {
368
- res = NBC_Sched_barrier (schedule );
369
- if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
370
- return res ;
371
- }
372
- }
373
381
}
374
382
375
383
return OMPI_SUCCESS ;
@@ -496,3 +504,59 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve
496
504
return OMPI_SUCCESS ;
497
505
}
498
506
507
+ static inline int a2a_sched_inplace (int rank , int p , NBC_Schedule * schedule , void * buf , int count ,
508
+ MPI_Datatype type , MPI_Aint ext , ptrdiff_t gap , MPI_Comm comm ) {
509
+ int res ;
510
+
511
+ for (int i = 1 ; i < (p + 1 )/2 ; i ++ ) {
512
+ int speer = (rank + i ) % p ;
513
+ int rpeer = (rank + p - i ) % p ;
514
+ char * sbuf = (char * ) buf + speer * count * ext ;
515
+ char * rbuf = (char * ) buf + rpeer * count * ext ;
516
+
517
+ res = NBC_Sched_copy (rbuf , false, count , type ,
518
+ (void * )(- gap ), true, count , type ,
519
+ schedule , true);
520
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
521
+ return res ;
522
+ }
523
+ res = NBC_Sched_send (sbuf , false , count , type , speer , schedule , false);
524
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
525
+ return res ;
526
+ }
527
+ res = NBC_Sched_recv (rbuf , false , count , type , rpeer , schedule , true);
528
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
529
+ return res ;
530
+ }
531
+
532
+ res = NBC_Sched_send ((void * )(- gap ), true, count , type , rpeer , schedule , false);
533
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
534
+ return res ;
535
+ }
536
+ res = NBC_Sched_recv (sbuf , false, count , type , speer , schedule , true);
537
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
538
+ return res ;
539
+ }
540
+ }
541
+ if (0 == (p %2 )) {
542
+ int peer = (rank + p /2 ) % p ;
543
+
544
+ char * tbuf = (char * ) buf + peer * count * ext ;
545
+ res = NBC_Sched_copy (tbuf , false, count , type ,
546
+ (void * )(- gap ), true, count , type ,
547
+ schedule , true);
548
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
549
+ return res ;
550
+ }
551
+ res = NBC_Sched_send ((void * )(- gap ), true , count , type , peer , schedule , false);
552
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
553
+ return res ;
554
+ }
555
+ res = NBC_Sched_recv (tbuf , false , count , type , peer , schedule , true);
556
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
557
+ return res ;
558
+ }
559
+ }
560
+
561
+ return OMPI_SUCCESS ;
562
+ }
0 commit comments