@@ -757,153 +757,153 @@ contains
757757 #:for mpi_dir in [1, 2, 3]
758758 if (mpi_dir == ${mpi_dir}$) then
759759 #:if mpi_dir == 1
760- #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private=' [r]' )
761- do l = 0, p
762- do k = 0, n
763- do j = 0, buff_size - 1
764- do i = 1, nVar
765- r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
766- buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp)
767- end do
760+ $:GPU_PARALLEL_LOOP(collapse=4,private=' [r]' )
761+ do l = 0, p
762+ do k = 0, n
763+ do j = 0, buff_size - 1
764+ do i = 1, nVar
765+ r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
766+ buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp)
768767 end do
769768 end do
770769 end do
771- #:endcall GPU_PARALLEL_LOOP_OLD
770+ end do
771+ $:END_GPU_PARALLEL_LOOP()
772772
773773 if (qbmm_comm) then
774- #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private=' [r]' )
775- do l = 0, p
776- do k = 0, n
777- do j = 0, buff_size - 1
778- do i = nVar + 1, nVar + 4
779- do q = 1, nb
780- r = (i - 1) + (q - 1)*4 + v_size* &
781- (j + buff_size*(k + (n + 1)*l))
782- buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp)
783- end do
774+ $:GPU_PARALLEL_LOOP(collapse=4,private=' [r]' )
775+ do l = 0, p
776+ do k = 0, n
777+ do j = 0, buff_size - 1
778+ do i = nVar + 1, nVar + 4
779+ do q = 1, nb
780+ r = (i - 1) + (q - 1)*4 + v_size* &
781+ (j + buff_size*(k + (n + 1)*l))
782+ buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp)
784783 end do
785784 end do
786785 end do
787786 end do
788- #:endcall GPU_PARALLEL_LOOP_OLD
787+ end do
788+ $:END_GPU_PARALLEL_LOOP()
789789
790- #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private=' [r]' )
791- do l = 0, p
792- do k = 0, n
793- do j = 0, buff_size - 1
794- do i = nVar + 1, nVar + 4
795- do q = 1, nb
796- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
797- (j + buff_size*(k + (n + 1)*l))
798- buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp)
799- end do
790+ $:GPU_PARALLEL_LOOP(collapse=5,private=' [r]' )
791+ do l = 0, p
792+ do k = 0, n
793+ do j = 0, buff_size - 1
794+ do i = nVar + 1, nVar + 4
795+ do q = 1, nb
796+ r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
797+ (j + buff_size*(k + (n + 1)*l))
798+ buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp)
800799 end do
801800 end do
802801 end do
803802 end do
804- #:endcall GPU_PARALLEL_LOOP_OLD
803+ end do
804+ $:END_GPU_PARALLEL_LOOP()
805805 end if
806806 #:elif mpi_dir == 2
807- #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private=' [r]' )
808- do i = 1, nVar
809- do l = 0, p
810- do k = 0, buff_size - 1
811- do j = -buff_size, m + buff_size
812- r = (i - 1) + v_size* &
813- ((j + buff_size) + (m + 2*buff_size + 1)* &
814- (k + buff_size*l))
815- buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp)
816- end do
807+ $:GPU_PARALLEL_LOOP(collapse=4,private=' [r]' )
808+ do i = 1, nVar
809+ do l = 0, p
810+ do k = 0, buff_size - 1
811+ do j = -buff_size, m + buff_size
812+ r = (i - 1) + v_size* &
813+ ((j + buff_size) + (m + 2*buff_size + 1)* &
814+ (k + buff_size*l))
815+ buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp)
817816 end do
818817 end do
819818 end do
820- #:endcall GPU_PARALLEL_LOOP_OLD
819+ end do
820+ $:END_GPU_PARALLEL_LOOP()
821821
822822 if (qbmm_comm) then
823- #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private=' [r]' )
824- do i = nVar + 1, nVar + 4
825- do l = 0, p
826- do k = 0, buff_size - 1
827- do j = -buff_size, m + buff_size
828- do q = 1, nb
829- r = (i - 1) + (q - 1)*4 + v_size* &
830- ((j + buff_size) + (m + 2*buff_size + 1)* &
831- (k + buff_size*l))
832- buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp)
833- end do
823+ $:GPU_PARALLEL_LOOP(collapse=5,private=' [r]' )
824+ do i = nVar + 1, nVar + 4
825+ do l = 0, p
826+ do k = 0, buff_size - 1
827+ do j = -buff_size, m + buff_size
828+ do q = 1, nb
829+ r = (i - 1) + (q - 1)*4 + v_size* &
830+ ((j + buff_size) + (m + 2*buff_size + 1)* &
831+ (k + buff_size*l))
832+ buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp)
834833 end do
835834 end do
836835 end do
837836 end do
838- #:endcall GPU_PARALLEL_LOOP_OLD
837+ end do
838+ $:END_GPU_PARALLEL_LOOP()
839839
840- #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private=' [r]' )
841- do i = nVar + 1, nVar + 4
842- do l = 0, p
843- do k = 0, buff_size - 1
844- do j = -buff_size, m + buff_size
845- do q = 1, nb
846- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
847- ((j + buff_size) + (m + 2*buff_size + 1)* &
848- (k + buff_size*l))
849- buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp)
850- end do
840+ $:GPU_PARALLEL_LOOP(collapse=5,private=' [r]' )
841+ do i = nVar + 1, nVar + 4
842+ do l = 0, p
843+ do k = 0, buff_size - 1
844+ do j = -buff_size, m + buff_size
845+ do q = 1, nb
846+ r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
847+ ((j + buff_size) + (m + 2*buff_size + 1)* &
848+ (k + buff_size*l))
849+ buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp)
851850 end do
852851 end do
853852 end do
854853 end do
855- #:endcall GPU_PARALLEL_LOOP_OLD
854+ end do
855+ $:END_GPU_PARALLEL_LOOP()
856856 end if
857857 #:else
858- #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private=' [r]' )
859- do i = 1, nVar
860- do l = 0, buff_size - 1
861- do k = -buff_size, n + buff_size
862- do j = -buff_size, m + buff_size
863- r = (i - 1) + v_size* &
864- ((j + buff_size) + (m + 2*buff_size + 1)* &
865- ((k + buff_size) + (n + 2*buff_size + 1)*l))
866- buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp)
867- end do
858+ $:GPU_PARALLEL_LOOP(collapse=4,private=' [r]' )
859+ do i = 1, nVar
860+ do l = 0, buff_size - 1
861+ do k = -buff_size, n + buff_size
862+ do j = -buff_size, m + buff_size
863+ r = (i - 1) + v_size* &
864+ ((j + buff_size) + (m + 2*buff_size + 1)* &
865+ ((k + buff_size) + (n + 2*buff_size + 1)*l))
866+ buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp)
868867 end do
869868 end do
870869 end do
871- #:endcall GPU_PARALLEL_LOOP_OLD
870+ end do
871+ $:END_GPU_PARALLEL_LOOP()
872872
873873 if (qbmm_comm) then
874- #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private=' [r]' )
875- do i = nVar + 1, nVar + 4
876- do l = 0, buff_size - 1
877- do k = -buff_size, n + buff_size
878- do j = -buff_size, m + buff_size
879- do q = 1, nb
880- r = (i - 1) + (q - 1)*4 + v_size* &
881- ((j + buff_size) + (m + 2*buff_size + 1)* &
882- ((k + buff_size) + (n + 2*buff_size + 1)*l))
883- buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
884- end do
874+ $:GPU_PARALLEL_LOOP(collapse=5,private=' [r]' )
875+ do i = nVar + 1, nVar + 4
876+ do l = 0, buff_size - 1
877+ do k = -buff_size, n + buff_size
878+ do j = -buff_size, m + buff_size
879+ do q = 1, nb
880+ r = (i - 1) + (q - 1)*4 + v_size* &
881+ ((j + buff_size) + (m + 2*buff_size + 1)* &
882+ ((k + buff_size) + (n + 2*buff_size + 1)*l))
883+ buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
885884 end do
886885 end do
887886 end do
888887 end do
889- #:endcall GPU_PARALLEL_LOOP_OLD
888+ end do
889+ $:END_GPU_PARALLEL_LOOP()
890890
891- #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private=' [r]' )
892- do i = nVar + 1, nVar + 4
893- do l = 0, buff_size - 1
894- do k = -buff_size, n + buff_size
895- do j = -buff_size, m + buff_size
896- do q = 1, nb
897- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
898- ((j + buff_size) + (m + 2*buff_size + 1)* &
899- ((k + buff_size) + (n + 2*buff_size + 1)*l))
900- buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
901- end do
891+ $:GPU_PARALLEL_LOOP(collapse=5,private=' [r]' )
892+ do i = nVar + 1, nVar + 4
893+ do l = 0, buff_size - 1
894+ do k = -buff_size, n + buff_size
895+ do j = -buff_size, m + buff_size
896+ do q = 1, nb
897+ r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
898+ ((j + buff_size) + (m + 2*buff_size + 1)* &
899+ ((k + buff_size) + (n + 2*buff_size + 1)*l))
900+ buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
902901 end do
903902 end do
904903 end do
905904 end do
906- #:endcall GPU_PARALLEL_LOOP_OLD
905+ end do
906+ $:END_GPU_PARALLEL_LOOP()
907907 end if
908908 #:endif
909909 end if
0 commit comments