@@ -345,12 +345,40 @@ contains
345345 #:endfor
346346 call nvtxEndRange ! Packbuf
347347
348- call nvtxStartRange("IB-MARKER-SENDRECV")
349- call MPI_SENDRECV( &
350- ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, &
351- ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, &
352- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
353- call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
348+ #:for rdma_mpi in [False, True]
349+ if (rdma_mpi .eqv. ${' .true. ' if rdma_mpi else ' .false. ' }$) then
350+ #:if rdma_mpi
351+ #:call GPU_HOST_DATA(use_device=' [ib_buff_send, ib_buff_recv]' )
352+ call nvtxStartRange("IB-MARKER-SENDRECV-RDMA")
353+
354+ call MPI_SENDRECV( &
355+ ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, &
356+ ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, &
357+ MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
358+
359+ call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
360+
361+ #:endcall GPU_HOST_DATA
362+ $:GPU_WAIT()
363+ #:else
364+ call nvtxStartRange("IB-MARKER-DEV2HOST")
365+ $:GPU_UPDATE(host=' [ib_buff_send]' )
366+ call nvtxEndRange
367+ call nvtxStartRange("IB-MARKER-SENDRECV-NO-RMDA")
368+
369+ call MPI_SENDRECV( &
370+ ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, &
371+ ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, &
372+ MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
373+
374+ call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
375+
376+ call nvtxStartRange("IB-MARKER-HOST2DEV")
377+ $:GPU_UPDATE(device=' [ib_buff_recv]' )
378+ call nvtxEndRange
379+ #:endif
380+ end if
381+ #:endfor
354382
355383 ! Unpack Received Buffer
356384 call nvtxStartRange("IB-MARKER-COMM-UNPACKBUF")
0 commit comments