1010 *                         rights reserved. 
1111 * Copyright (c) 2015      Los Alamos National Security, LLC.  All rights 
1212 *                         reserved. 
13-  * Copyright (c) 2015       Research Organization for Information Science 
13+  * Copyright (c) 2015-2016  Research Organization for Information Science 
1414 *                         and Technology (RIST). All rights reserved. 
1515 * 
1616 * Author(s): Torsten Hoefler <[email protected] > @@ -111,7 +111,7 @@ static int nbc_schedule_round_append (NBC_Schedule *schedule, void *data, int da
111111}
112112
113113/* this function puts a send into the schedule */ 
114- int  NBC_Sched_send  (const  void *  buf , char  tmpbuf , int  count , MPI_Datatype  datatype , int  dest , NBC_Schedule  * schedule , bool  barrier ) {
114+ static   int  NBC_Sched_send_internal  (const  void *  buf , char  tmpbuf , int  count , MPI_Datatype  datatype , int  dest ,  bool   local , NBC_Schedule  * schedule , bool  barrier ) {
115115  NBC_Args_send  send_args ;
116116  int  ret ;
117117
@@ -122,6 +122,7 @@ int NBC_Sched_send (const void* buf, char tmpbuf, int count, MPI_Datatype dataty
122122  send_args .count  =  count ;
123123  send_args .datatype  =  datatype ;
124124  send_args .dest  =  dest ;
125+   send_args .local  =  local ;
125126
126127  /* append to the round-schedule */ 
127128  ret  =  nbc_schedule_round_append  (schedule , & send_args , sizeof  (send_args ), barrier );
@@ -134,8 +135,16 @@ int NBC_Sched_send (const void* buf, char tmpbuf, int count, MPI_Datatype dataty
134135  return  OMPI_SUCCESS ;
135136}
136137
138+ int  NBC_Sched_send  (const  void *  buf , char  tmpbuf , int  count , MPI_Datatype  datatype , int  dest , NBC_Schedule  * schedule , bool  barrier ) {
139+   return  NBC_Sched_send_internal  (buf , tmpbuf , count , datatype , dest , false, schedule , barrier );
140+ }
141+ 
142+ int  NBC_Sched_local_send  (const  void *  buf , char  tmpbuf , int  count , MPI_Datatype  datatype , int  dest , NBC_Schedule  * schedule , bool  barrier ) {
143+   return  NBC_Sched_send_internal  (buf , tmpbuf , count , datatype , dest , true, schedule , barrier );
144+ }
145+ 
137146/* this function puts a receive into the schedule */ 
138- int  NBC_Sched_recv  (void *  buf , char  tmpbuf , int  count , MPI_Datatype  datatype , int  source , NBC_Schedule  * schedule , bool  barrier ) {
147+ static   int  NBC_Sched_recv_internal  (void *  buf , char  tmpbuf , int  count , MPI_Datatype  datatype , int  source ,  bool   local , NBC_Schedule  * schedule , bool  barrier ) {
139148  NBC_Args_recv  recv_args ;
140149  int  ret ;
141150
@@ -146,6 +155,7 @@ int NBC_Sched_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, in
146155  recv_args .count  =  count ;
147156  recv_args .datatype  =  datatype ;
148157  recv_args .source  =  source ;
158+   recv_args .local  =  local ;
149159
150160  /* append to the round-schedule */ 
151161  ret  =  nbc_schedule_round_append  (schedule , & recv_args , sizeof  (recv_args ), barrier );
@@ -158,8 +168,16 @@ int NBC_Sched_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, in
158168  return  OMPI_SUCCESS ;
159169}
160170
171+ int  NBC_Sched_recv  (void *  buf , char  tmpbuf , int  count , MPI_Datatype  datatype , int  source , NBC_Schedule  * schedule , bool  barrier ) {
172+   return  NBC_Sched_recv_internal (buf , tmpbuf , count , datatype , source , false, schedule , barrier );
173+ }
174+ 
175+ int  NBC_Sched_local_recv  (void *  buf , char  tmpbuf , int  count , MPI_Datatype  datatype , int  source , NBC_Schedule  * schedule , bool  barrier ) {
176+   return  NBC_Sched_recv_internal (buf , tmpbuf , count , datatype , source , true, schedule , barrier );
177+ }
178+ 
161179/* this function puts an operation into the schedule */ 
162- int  NBC_Sched_op  (void   * buf3 ,  char   tmpbuf3 ,  const  void *  buf1 , char  tmpbuf1 , void *  buf2 , char  tmpbuf2 , int  count , MPI_Datatype  datatype ,
180+ int  NBC_Sched_op  (const  void *  buf1 , char  tmpbuf1 , void *  buf2 , char  tmpbuf2 , int  count , MPI_Datatype  datatype ,
163181                  MPI_Op  op , NBC_Schedule  * schedule , bool  barrier ) {
164182  NBC_Args_op  op_args ;
165183  int  ret ;
@@ -168,10 +186,8 @@ int NBC_Sched_op (void *buf3, char tmpbuf3, const void* buf1, char tmpbuf1, void
168186  op_args .type  =  OP ;
169187  op_args .buf1  =  buf1 ;
170188  op_args .buf2  =  buf2 ;
171-   op_args .buf3  =  buf3 ;
172189  op_args .tmpbuf1  =  tmpbuf1 ;
173190  op_args .tmpbuf2  =  tmpbuf2 ;
174-   op_args .tmpbuf3  =  tmpbuf3 ;
175191  op_args .count  =  count ;
176192  op_args .op  =  op ;
177193  op_args .datatype  =  datatype ;
@@ -182,7 +198,7 @@ int NBC_Sched_op (void *buf3, char tmpbuf3, const void* buf1, char tmpbuf1, void
182198    return  ret ;
183199  }
184200
185-   NBC_DEBUG (10 , "added op  - ends at byte %i\n" , nbc_schedule_get_size  (schedule ));
201+   NBC_DEBUG (10 , "added op2  - ends at byte %i\n" , nbc_schedule_get_size  (schedule ));
186202
187203  return  OMPI_SUCCESS ;
188204}
@@ -373,7 +389,7 @@ static inline int NBC_Start_round(NBC_Handle *handle) {
373389  NBC_Args_op          opargs ;
374390  NBC_Args_copy      copyargs ;
375391  NBC_Args_unpack  unpackargs ;
376-   void  * buf1 ,  * buf2 ,  * buf3 ;
392+   void  * buf1 ,  * buf2 ;
377393
378394  /* get round-schedule address */ 
379395  ptr  =  handle -> schedule -> data  +  handle -> row_offset ;
@@ -410,7 +426,7 @@ static inline int NBC_Start_round(NBC_Handle *handle) {
410426        handle -> req_array  =  tmp ;
411427
412428        res  =  MCA_PML_CALL (isend (buf1 , sendargs .count , sendargs .datatype , sendargs .dest , handle -> tag ,
413-                                  MCA_PML_BASE_SEND_STANDARD , handle -> comm ,
429+                                  MCA_PML_BASE_SEND_STANDARD , sendargs . local ? handle -> comm -> c_local_comm : handle -> comm ,
414430                                 handle -> req_array + handle -> req_count  -  1 ));
415431        if  (OMPI_SUCCESS  !=  res ) {
416432          NBC_Error  ("Error in MPI_Isend(%lu, %i, %p, %i, %i, %lu) (%i)" , (unsigned long )buf1 , sendargs .count ,
@@ -444,7 +460,7 @@ static inline int NBC_Start_round(NBC_Handle *handle) {
444460
445461        handle -> req_array  =  tmp ;
446462
447-         res  =  MCA_PML_CALL (irecv (buf1 , recvargs .count , recvargs .datatype , recvargs .source , handle -> tag , handle -> comm ,
463+         res  =  MCA_PML_CALL (irecv (buf1 , recvargs .count , recvargs .datatype , recvargs .source , handle -> tag , recvargs . local ? handle -> comm -> c_local_comm : handle -> comm ,
448464                                 handle -> req_array + handle -> req_count - 1 ));
449465        if  (OMPI_SUCCESS  !=  res ) {
450466          NBC_Error ("Error in MPI_Irecv(%lu, %i, %p, %i, %i, %lu) (%i)" , (unsigned long )buf1 , recvargs .count ,
@@ -456,10 +472,10 @@ static inline int NBC_Start_round(NBC_Handle *handle) {
456472#endif 
457473        break ;
458474      case  OP :
459-         NBC_DEBUG (5 , "  OP    (offset %li) " , offset );
475+         NBC_DEBUG (5 , "  OP2   (offset %li) " , offset );
460476        NBC_GET_BYTES (ptr ,opargs );
461-         NBC_DEBUG (5 , "*buf1: %p, buf2: %p, buf3: %p,  count: %i, type: %p)\n" , opargs .buf1 , opargs .buf2 ,
462-                   opargs .buf3 ,  opargs . count , opargs .datatype );
477+         NBC_DEBUG (5 , "*buf1: %p, buf2: %p, count: %i, type: %p)\n" , opargs .buf1 , opargs .buf2 ,
478+                   opargs .count , opargs .datatype );
463479        /* get buffers */ 
464480        if (opargs .tmpbuf1 ) {
465481          buf1 = (char * )handle -> tmpbuf + (long )opargs .buf1 ;
@@ -471,12 +487,7 @@ static inline int NBC_Start_round(NBC_Handle *handle) {
471487        } else  {
472488          buf2 = opargs .buf2 ;
473489        }
474-         if (opargs .tmpbuf3 ) {
475-           buf3 = (char * )handle -> tmpbuf + (long )opargs .buf3 ;
476-         } else  {
477-           buf3 = opargs .buf3 ;
478-         }
479-         ompi_3buff_op_reduce (opargs .op , buf1 , buf2 , buf3 , opargs .count , opargs .datatype );
490+         ompi_op_reduce (opargs .op , buf1 , buf2 , opargs .count , opargs .datatype );
480491        break ;
481492      case  COPY :
482493        NBC_DEBUG (5 , "  COPY   (offset %li) " , offset );
0 commit comments