Skip to content

Commit d5a3f74

Browse files
author
Daniel Vickers
committed
Previous test passed except for gres resource errors, which are kind of fake errors. Now testing with half the loops in the new configuration.
1 parent 2ede9a1 commit d5a3f74

File tree

1 file changed

+101
-101
lines changed

1 file changed

+101
-101
lines changed

src/common/m_mpi_common.fpp

Lines changed: 101 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -757,153 +757,153 @@ contains
757757
#:for mpi_dir in [1, 2, 3]
758758
if (mpi_dir == ${mpi_dir}$) then
759759
#:if mpi_dir == 1
760-
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
761-
do l = 0, p
762-
do k = 0, n
763-
do j = 0, buff_size - 1
764-
do i = 1, nVar
765-
r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
766-
buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp)
767-
end do
760+
$:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
761+
do l = 0, p
762+
do k = 0, n
763+
do j = 0, buff_size - 1
764+
do i = 1, nVar
765+
r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
766+
buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp)
768767
end do
769768
end do
770769
end do
771-
#:endcall GPU_PARALLEL_LOOP_OLD
770+
end do
771+
$:END_GPU_PARALLEL_LOOP()
772772
773773
if (qbmm_comm) then
774-
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
775-
do l = 0, p
776-
do k = 0, n
777-
do j = 0, buff_size - 1
778-
do i = nVar + 1, nVar + 4
779-
do q = 1, nb
780-
r = (i - 1) + (q - 1)*4 + v_size* &
781-
(j + buff_size*(k + (n + 1)*l))
782-
buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp)
783-
end do
774+
$:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
775+
do l = 0, p
776+
do k = 0, n
777+
do j = 0, buff_size - 1
778+
do i = nVar + 1, nVar + 4
779+
do q = 1, nb
780+
r = (i - 1) + (q - 1)*4 + v_size* &
781+
(j + buff_size*(k + (n + 1)*l))
782+
buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp)
784783
end do
785784
end do
786785
end do
787786
end do
788-
#:endcall GPU_PARALLEL_LOOP_OLD
787+
end do
788+
$:END_GPU_PARALLEL_LOOP()
789789
790-
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
791-
do l = 0, p
792-
do k = 0, n
793-
do j = 0, buff_size - 1
794-
do i = nVar + 1, nVar + 4
795-
do q = 1, nb
796-
r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
797-
(j + buff_size*(k + (n + 1)*l))
798-
buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp)
799-
end do
790+
$:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
791+
do l = 0, p
792+
do k = 0, n
793+
do j = 0, buff_size - 1
794+
do i = nVar + 1, nVar + 4
795+
do q = 1, nb
796+
r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
797+
(j + buff_size*(k + (n + 1)*l))
798+
buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp)
800799
end do
801800
end do
802801
end do
803802
end do
804-
#:endcall GPU_PARALLEL_LOOP_OLD
803+
end do
804+
$:END_GPU_PARALLEL_LOOP()
805805
end if
806806
#:elif mpi_dir == 2
807-
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
808-
do i = 1, nVar
809-
do l = 0, p
810-
do k = 0, buff_size - 1
811-
do j = -buff_size, m + buff_size
812-
r = (i - 1) + v_size* &
813-
((j + buff_size) + (m + 2*buff_size + 1)* &
814-
(k + buff_size*l))
815-
buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp)
816-
end do
807+
$:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
808+
do i = 1, nVar
809+
do l = 0, p
810+
do k = 0, buff_size - 1
811+
do j = -buff_size, m + buff_size
812+
r = (i - 1) + v_size* &
813+
((j + buff_size) + (m + 2*buff_size + 1)* &
814+
(k + buff_size*l))
815+
buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp)
817816
end do
818817
end do
819818
end do
820-
#:endcall GPU_PARALLEL_LOOP_OLD
819+
end do
820+
$:END_GPU_PARALLEL_LOOP()
821821
822822
if (qbmm_comm) then
823-
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
824-
do i = nVar + 1, nVar + 4
825-
do l = 0, p
826-
do k = 0, buff_size - 1
827-
do j = -buff_size, m + buff_size
828-
do q = 1, nb
829-
r = (i - 1) + (q - 1)*4 + v_size* &
830-
((j + buff_size) + (m + 2*buff_size + 1)* &
831-
(k + buff_size*l))
832-
buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp)
833-
end do
823+
$:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
824+
do i = nVar + 1, nVar + 4
825+
do l = 0, p
826+
do k = 0, buff_size - 1
827+
do j = -buff_size, m + buff_size
828+
do q = 1, nb
829+
r = (i - 1) + (q - 1)*4 + v_size* &
830+
((j + buff_size) + (m + 2*buff_size + 1)* &
831+
(k + buff_size*l))
832+
buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp)
834833
end do
835834
end do
836835
end do
837836
end do
838-
#:endcall GPU_PARALLEL_LOOP_OLD
837+
end do
838+
$:END_GPU_PARALLEL_LOOP()
839839
840-
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
841-
do i = nVar + 1, nVar + 4
842-
do l = 0, p
843-
do k = 0, buff_size - 1
844-
do j = -buff_size, m + buff_size
845-
do q = 1, nb
846-
r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
847-
((j + buff_size) + (m + 2*buff_size + 1)* &
848-
(k + buff_size*l))
849-
buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp)
850-
end do
840+
$:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
841+
do i = nVar + 1, nVar + 4
842+
do l = 0, p
843+
do k = 0, buff_size - 1
844+
do j = -buff_size, m + buff_size
845+
do q = 1, nb
846+
r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
847+
((j + buff_size) + (m + 2*buff_size + 1)* &
848+
(k + buff_size*l))
849+
buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp)
851850
end do
852851
end do
853852
end do
854853
end do
855-
#:endcall GPU_PARALLEL_LOOP_OLD
854+
end do
855+
$:END_GPU_PARALLEL_LOOP()
856856
end if
857857
#:else
858-
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
859-
do i = 1, nVar
860-
do l = 0, buff_size - 1
861-
do k = -buff_size, n + buff_size
862-
do j = -buff_size, m + buff_size
863-
r = (i - 1) + v_size* &
864-
((j + buff_size) + (m + 2*buff_size + 1)* &
865-
((k + buff_size) + (n + 2*buff_size + 1)*l))
866-
buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp)
867-
end do
858+
$:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
859+
do i = 1, nVar
860+
do l = 0, buff_size - 1
861+
do k = -buff_size, n + buff_size
862+
do j = -buff_size, m + buff_size
863+
r = (i - 1) + v_size* &
864+
((j + buff_size) + (m + 2*buff_size + 1)* &
865+
((k + buff_size) + (n + 2*buff_size + 1)*l))
866+
buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp)
868867
end do
869868
end do
870869
end do
871-
#:endcall GPU_PARALLEL_LOOP_OLD
870+
end do
871+
$:END_GPU_PARALLEL_LOOP()
872872
873873
if (qbmm_comm) then
874-
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
875-
do i = nVar + 1, nVar + 4
876-
do l = 0, buff_size - 1
877-
do k = -buff_size, n + buff_size
878-
do j = -buff_size, m + buff_size
879-
do q = 1, nb
880-
r = (i - 1) + (q - 1)*4 + v_size* &
881-
((j + buff_size) + (m + 2*buff_size + 1)* &
882-
((k + buff_size) + (n + 2*buff_size + 1)*l))
883-
buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
884-
end do
874+
$:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
875+
do i = nVar + 1, nVar + 4
876+
do l = 0, buff_size - 1
877+
do k = -buff_size, n + buff_size
878+
do j = -buff_size, m + buff_size
879+
do q = 1, nb
880+
r = (i - 1) + (q - 1)*4 + v_size* &
881+
((j + buff_size) + (m + 2*buff_size + 1)* &
882+
((k + buff_size) + (n + 2*buff_size + 1)*l))
883+
buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
885884
end do
886885
end do
887886
end do
888887
end do
889-
#:endcall GPU_PARALLEL_LOOP_OLD
888+
end do
889+
$:END_GPU_PARALLEL_LOOP()
890890
891-
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
892-
do i = nVar + 1, nVar + 4
893-
do l = 0, buff_size - 1
894-
do k = -buff_size, n + buff_size
895-
do j = -buff_size, m + buff_size
896-
do q = 1, nb
897-
r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
898-
((j + buff_size) + (m + 2*buff_size + 1)* &
899-
((k + buff_size) + (n + 2*buff_size + 1)*l))
900-
buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
901-
end do
891+
$:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
892+
do i = nVar + 1, nVar + 4
893+
do l = 0, buff_size - 1
894+
do k = -buff_size, n + buff_size
895+
do j = -buff_size, m + buff_size
896+
do q = 1, nb
897+
r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
898+
((j + buff_size) + (m + 2*buff_size + 1)* &
899+
((k + buff_size) + (n + 2*buff_size + 1)*l))
900+
buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp)
902901
end do
903902
end do
904903
end do
905904
end do
906-
#:endcall GPU_PARALLEL_LOOP_OLD
905+
end do
906+
$:END_GPU_PARALLEL_LOOP()
907907
end if
908908
#:endif
909909
end if

0 commit comments

Comments
 (0)