@@ -837,73 +837,73 @@ contains
837837 end do
838838 $:END_GPU_PARALLEL_LOOP()
839839
840- $:GPU_PARALLEL_LOOP(collapse=5,private=' [r]' )
841- do i = nVar + 1, nVar + 4
842- do l = 0, p
843- do k = 0, buff_size - 1
844- do j = -buff_size, m + buff_size
845- do q = 1, nb
846- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
847- ((j + buff_size) + (m + 2*buff_size + 1)* &
848- (k + buff_size*l))
849- buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp)
840+ #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private=' [r]' )
841+ do i = nVar + 1, nVar + 4
842+ do l = 0, p
843+ do k = 0, buff_size - 1
844+ do j = -buff_size, m + buff_size
845+ do q = 1, nb
846+ r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
847+ ((j + buff_size) + (m + 2*buff_size + 1)* &
848+ (k + buff_size*l))
849+ buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp)
850+ end do
850851 end do
851852 end do
852853 end do
853854 end do
854- end do
855- $:END_GPU_PARALLEL_LOOP()
855+ #:endcall GPU_PARALLEL_LOOP_OLD
856856 end if
857857 #:else
858- $:GPU_PARALLEL_LOOP(collapse=4,private=' [r]' )
859- do i = 1, nVar
860- do l = 0, buff_size - 1
861- do k = -buff_size, n + buff_size
862- do j = -buff_size, m + buff_size
863- r = (i - 1) + v_size* &
864- ((j + buff_size) + (m + 2*buff_size + 1)* &
865- ((k + buff_size) + (n + 2*buff_size + 1)*l))
866- buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp)
858+ #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private=' [r]' )
859+ do i = 1, nVar
860+ do l = 0, buff_size - 1
861+ do k = -buff_size, n + buff_size
862+ do j = -buff_size, m + buff_size
863+ r = (i - 1) + v_size* &
864+ ((j + buff_size) + (m + 2*buff_size + 1)* &
865+ ((k + buff_size) + (n + 2*buff_size + 1)*l))
866+ buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp)
867+ end do
867868 end do
868869 end do
869870 end do
870- end do
871- $:END_GPU_PARALLEL_LOOP()
871+ #:endcall GPU_PARALLEL_LOOP_OLD
872872
873873 if (qbmm_comm) then
874- $:GPU_PARALLEL_LOOP(collapse=5,private=' [r]' )
875- do i = nVar + 1, nVar + 4
876- do l = 0, buff_size - 1
877- do k = -buff_size, n + buff_size
878- do j = -buff_size, m + buff_size
879- do q = 1, nb
880- r = (i - 1) + (q - 1)*4 + v_size* &
881- ((j + buff_size) + (m + 2*buff_size + 1)* &
882- ((k + buff_size) + (n + 2*buff_size + 1)*l))
883- buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
874+ #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private=' [r]' )
875+ do i = nVar + 1, nVar + 4
876+ do l = 0, buff_size - 1
877+ do k = -buff_size, n + buff_size
878+ do j = -buff_size, m + buff_size
879+ do q = 1, nb
880+ r = (i - 1) + (q - 1)*4 + v_size* &
881+ ((j + buff_size) + (m + 2*buff_size + 1)* &
882+ ((k + buff_size) + (n + 2*buff_size + 1)*l))
883+ buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
884+ end do
884885 end do
885886 end do
886887 end do
887888 end do
888- end do
889- $:END_GPU_PARALLEL_LOOP()
889+ #:endcall GPU_PARALLEL_LOOP_OLD
890890
891- $:GPU_PARALLEL_LOOP(collapse=5,private=' [r]' )
892- do i = nVar + 1, nVar + 4
893- do l = 0, buff_size - 1
894- do k = -buff_size, n + buff_size
895- do j = -buff_size, m + buff_size
896- do q = 1, nb
897- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
898- ((j + buff_size) + (m + 2*buff_size + 1)* &
899- ((k + buff_size) + (n + 2*buff_size + 1)*l))
900- buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
891+ #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private=' [r]' )
892+ do i = nVar + 1, nVar + 4
893+ do l = 0, buff_size - 1
894+ do k = -buff_size, n + buff_size
895+ do j = -buff_size, m + buff_size
896+ do q = 1, nb
897+ r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
898+ ((j + buff_size) + (m + 2*buff_size + 1)* &
899+ ((k + buff_size) + (n + 2*buff_size + 1)*l))
900+ buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
901+ end do
901902 end do
902903 end do
903904 end do
904905 end do
905- end do
906- $:END_GPU_PARALLEL_LOOP()
906+ #:endcall GPU_PARALLEL_LOOP_OLD
907907 end if
908908 #:endif
909909 end if
0 commit comments