Skip to content

Commit 973ea73

Browse files
author
Daniel Vickers
committed
Fixed old macros
1 parent b521697 commit 973ea73

File tree

4 files changed

+132
-36
lines changed

4 files changed

+132
-36
lines changed

src/common/include/acc_macros.fpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,38 @@
129129
$:end_acc_directive
130130
#:enddef
131131

132+
#:def ACC_PARALLEL_LOOP_OLD(code, collapse=None, private=None, parallelism='[gang, vector]', &
133+
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
134+
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
135+
& no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None)
136+
#:set collapse_val = GEN_COLLAPSE_STR(collapse)
137+
#:set parallelism_val = GEN_PARALLELISM_STR(parallelism)
138+
#:set default_val = GEN_DEFAULT_STR(default)
139+
#:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n')
140+
#:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp)
141+
#:set copy_val = GEN_COPY_STR(copy)
142+
#:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n')
143+
#:set copyout_val = GEN_COPYOUT_STR(copyout)
144+
#:set create_val = GEN_CREATE_STR(create)
145+
#:set no_create_val = GEN_NOCREATE_STR(no_create)
146+
#:set present_val = GEN_PRESENT_STR(present)
147+
#:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr)
148+
#:set attach_val = GEN_ATTACH_STR(attach)
149+
#:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
150+
#:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + &
151+
& default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + &
152+
& copy_val.strip('\n') + copyin_val.strip('\n') + &
153+
& copyout_val.strip('\n') + create_val.strip('\n') + &
154+
& no_create_val.strip('\n') + present_val.strip('\n') + &
155+
& deviceptr_val.strip('\n') + attach_val.strip('\n')
156+
#:set acc_directive = '!$acc parallel loop ' + &
157+
& clause_val + extraAccArgs_val.strip('\n')
158+
#:set acc_end_directive = '!$acc end parallel loop'
159+
$:acc_directive
160+
$:code
161+
$:acc_end_directive
162+
#:enddef
163+
132164
#:def ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
133165
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
134166
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &

src/common/include/omp_macros.fpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,53 @@
149149
$:omp_end_directive
150150
#:enddef
151151

152+
#:def OMP_PARALLEL_LOOP_OLD(code, collapse=None, private=None, parallelism='[gang, vector]', &
153+
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
154+
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
155+
& no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None)
156+
157+
#:set collapse_val = GEN_COLLAPSE_STR(collapse)
158+
#:set parallelism_val = OMP_PARALLELISM_STR(parallelism)
159+
#:set default_val = OMP_DEFAULT_STR(default)
160+
#:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n')
161+
#:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp)
162+
#:set copy_val = OMP_COPY_STR(copy)
163+
#:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n')
164+
#:set copyout_val = OMP_COPYOUT_STR(copyout)
165+
#:set create_val = OMP_CREATE_STR(create)
166+
#:set no_create_val = OMP_NOCREATE_STR(no_create)
167+
#:set present_val = OMP_PRESENT_STR(present)
168+
#:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr)
169+
#:set attach_val = OMP_MAP_STR('always,tofrom', attach)
170+
#:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs)
171+
#:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + &
172+
& default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + &
173+
& copy_val.strip('\n') + copyin_val.strip('\n') + &
174+
& copyout_val.strip('\n') + create_val.strip('\n') + &
175+
& no_create_val.strip('\n') + present_val.strip('\n') + &
176+
& deviceptr_val.strip('\n') + attach_val.strip('\n')
177+
#! Hardcoding the parallelism for now
178+
179+
#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
180+
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
181+
#:set omp_end_directive = '!$omp end target teams loop'
182+
#:elif MFC_COMPILER == CCE_COMPILER_ID
183+
#:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) '
184+
#:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
185+
#:elif MFC_COMPILER == AMD_COMPILER_ID
186+
#:set omp_start_directive = '!$omp target teams distribute parallel do '
187+
#:set omp_end_directive = '!$omp end target teams distribute parallel do'
188+
#:else
189+
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
190+
#:set omp_end_directive = '!$omp end target teams loop'
191+
#:endif
192+
193+
#:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n')
194+
$:omp_directive
195+
$:code
196+
$:omp_end_directive
197+
#:enddef
198+
152199
#:def OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
153200
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
154201
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &

src/common/include/parallel_macros.fpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,23 @@
1919

2020
#:enddef
2121

22+
#:def GPU_PARALLEL_LOOP_OLD(code, collapse=None, private=None, parallelism='[gang, vector]', &
23+
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
24+
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
25+
& no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None)
26+
27+
#:set acc_code = ACC_PARALLEL_LOOP_OLD(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs)
28+
#:set omp_code = OMP_PARALLEL_LOOP_OLD(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs)
29+
30+
#if defined(MFC_OpenACC)
31+
$:acc_code
32+
#elif defined(MFC_OpenMP)
33+
$:omp_code
34+
#else
35+
$:code
36+
#endif
37+
#:enddef
38+
2239
#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
2340
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
2441
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &

src/common/m_mpi_common.fpp

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,7 @@ contains
757757
#:for mpi_dir in [1, 2, 3]
758758
if (mpi_dir == ${mpi_dir}$) then
759759
#:if mpi_dir == 1
760-
#:call GPU_PARALLEL(collapse=4,private='[r]')
760+
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
761761
do l = 0, p
762762
do k = 0, n
763763
do j = 0, buff_size - 1
@@ -768,10 +768,10 @@ contains
768768
end do
769769
end do
770770
end do
771-
#:endcall GPU_PARALLEL
771+
#:endcall GPU_PARALLEL_LOOP_OLD
772772
773773
if (qbmm_comm) then
774-
#:call GPU_PARALLEL(collapse=4,private='[r]')
774+
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
775775
do l = 0, p
776776
do k = 0, n
777777
do j = 0, buff_size - 1
@@ -785,9 +785,9 @@ contains
785785
end do
786786
end do
787787
end do
788-
#:endcall GPU_PARALLEL
788+
#:endcall GPU_PARALLEL_LOOP_OLD
789789
790-
#:call GPU_PARALLEL(collapse=5,private='[r]')
790+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
791791
do l = 0, p
792792
do k = 0, n
793793
do j = 0, buff_size - 1
@@ -801,10 +801,10 @@ contains
801801
end do
802802
end do
803803
end do
804-
#:endcall GPU_PARALLEL
804+
#:endcall GPU_PARALLEL_LOOP_OLD
805805
end if
806806
#:elif mpi_dir == 2
807-
#:call GPU_PARALLEL(collapse=4,private='[r]')
807+
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
808808
do i = 1, nVar
809809
do l = 0, p
810810
do k = 0, buff_size - 1
@@ -817,10 +817,10 @@ contains
817817
end do
818818
end do
819819
end do
820-
#:endcall GPU_PARALLEL
820+
#:endcall GPU_PARALLEL_LOOP_OLD
821821
822822
if (qbmm_comm) then
823-
#:call GPU_PARALLEL(collapse=5,private='[r]')
823+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
824824
do i = nVar + 1, nVar + 4
825825
do l = 0, p
826826
do k = 0, buff_size - 1
@@ -835,9 +835,9 @@ contains
835835
end do
836836
end do
837837
end do
838-
#:endcall GPU_PARALLEL
838+
#:endcall GPU_PARALLEL_LOOP_OLD
839839
840-
#:call GPU_PARALLEL(collapse=5,private='[r]')
840+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
841841
do i = nVar + 1, nVar + 4
842842
do l = 0, p
843843
do k = 0, buff_size - 1
@@ -852,10 +852,10 @@ contains
852852
end do
853853
end do
854854
end do
855-
#:endcall GPU_PARALLEL
855+
#:endcall GPU_PARALLEL_LOOP_OLD
856856
end if
857857
#:else
858-
#:call GPU_PARALLEL(collapse=4,private='[r]')
858+
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
859859
do i = 1, nVar
860860
do l = 0, buff_size - 1
861861
do k = -buff_size, n + buff_size
@@ -868,10 +868,10 @@ contains
868868
end do
869869
end do
870870
end do
871-
#:endcall GPU_PARALLEL
871+
#:endcall GPU_PARALLEL_LOOP_OLD
872872
873873
if (qbmm_comm) then
874-
#:call GPU_PARALLEL(collapse=5,private='[r]')
874+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
875875
do i = nVar + 1, nVar + 4
876876
do l = 0, buff_size - 1
877877
do k = -buff_size, n + buff_size
@@ -886,9 +886,9 @@ contains
886886
end do
887887
end do
888888
end do
889-
#:endcall GPU_PARALLEL
889+
#:endcall GPU_PARALLEL_LOOP_OLD
890890
891-
#:call GPU_PARALLEL(collapse=5,private='[r]')
891+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
892892
do i = nVar + 1, nVar + 4
893893
do l = 0, buff_size - 1
894894
do k = -buff_size, n + buff_size
@@ -903,7 +903,7 @@ contains
903903
end do
904904
end do
905905
end do
906-
#:endcall GPU_PARALLEL
906+
#:endcall GPU_PARALLEL_LOOP_OLD
907907
end if
908908
#:endif
909909
end if
@@ -958,7 +958,7 @@ contains
958958
#:for mpi_dir in [1, 2, 3]
959959
if (mpi_dir == ${mpi_dir}$) then
960960
#:if mpi_dir == 1
961-
#:call GPU_PARALLEL(collapse=4,private='[r]')
961+
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
962962
do l = 0, p
963963
do k = 0, n
964964
do j = -buff_size, -1
@@ -976,10 +976,10 @@ contains
976976
end do
977977
end do
978978
end do
979-
#:endcall GPU_PARALLEL
979+
#:endcall GPU_PARALLEL_LOOP_OLD
980980
981981
if (qbmm_comm) then
982-
#:call GPU_PARALLEL(collapse=5,private='[r]')
982+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
983983
do l = 0, p
984984
do k = 0, n
985985
do j = -buff_size, -1
@@ -993,9 +993,9 @@ contains
993993
end do
994994
end do
995995
end do
996-
#:endcall GPU_PARALLEL
996+
#:endcall GPU_PARALLEL_LOOP_OLD
997997
998-
#:call GPU_PARALLEL(collapse=5,private='[r]')
998+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
999999
do l = 0, p
10001000
do k = 0, n
10011001
do j = -buff_size, -1
@@ -1009,10 +1009,10 @@ contains
10091009
end do
10101010
end do
10111011
end do
1012-
#:endcall GPU_PARALLEL
1012+
#:endcall GPU_PARALLEL_LOOP_OLD
10131013
end if
10141014
#:elif mpi_dir == 2
1015-
#:call GPU_PARALLEL(collapse=4,private='[r]')
1015+
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
10161016
do i = 1, nVar
10171017
do l = 0, p
10181018
do k = -buff_size, -1
@@ -1031,10 +1031,10 @@ contains
10311031
end do
10321032
end do
10331033
end do
1034-
#:endcall GPU_PARALLEL
1034+
#:endcall GPU_PARALLEL_LOOP_OLD
10351035
10361036
if (qbmm_comm) then
1037-
#:call GPU_PARALLEL(collapse=5,private='[r]')
1037+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
10381038
do i = nVar + 1, nVar + 4
10391039
do l = 0, p
10401040
do k = -buff_size, -1
@@ -1049,9 +1049,9 @@ contains
10491049
end do
10501050
end do
10511051
end do
1052-
#:endcall GPU_PARALLEL
1052+
#:endcall GPU_PARALLEL_LOOP_OLD
10531053
1054-
#:call GPU_PARALLEL(collapse=5,private='[r]')
1054+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
10551055
do i = nVar + 1, nVar + 4
10561056
do l = 0, p
10571057
do k = -buff_size, -1
@@ -1066,11 +1066,11 @@ contains
10661066
end do
10671067
end do
10681068
end do
1069-
#:endcall GPU_PARALLEL
1069+
#:endcall GPU_PARALLEL_LOOP_OLD
10701070
end if
10711071
#:else
10721072
! Unpacking buffer from bc_z%beg
1073-
#:call GPU_PARALLEL(collapse=4,private='[r]')
1073+
#:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
10741074
do i = 1, nVar
10751075
do l = -buff_size, -1
10761076
do k = -buff_size, n + buff_size
@@ -1090,10 +1090,10 @@ contains
10901090
end do
10911091
end do
10921092
end do
1093-
#:endcall GPU_PARALLEL
1093+
#:endcall GPU_PARALLEL_LOOP_OLD
10941094
10951095
if (qbmm_comm) then
1096-
#:call GPU_PARALLEL(collapse=5,private='[r]')
1096+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
10971097
do i = nVar + 1, nVar + 4
10981098
do l = -buff_size, -1
10991099
do k = -buff_size, n + buff_size
@@ -1109,9 +1109,9 @@ contains
11091109
end do
11101110
end do
11111111
end do
1112-
#:endcall GPU_PARALLEL
1112+
#:endcall GPU_PARALLEL_LOOP_OLD
11131113
1114-
#:call GPU_PARALLEL(collapse=5,private='[r]')
1114+
#:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
11151115
do i = nVar + 1, nVar + 4
11161116
do l = -buff_size, -1
11171117
do k = -buff_size, n + buff_size
@@ -1127,7 +1127,7 @@ contains
11271127
end do
11281128
end do
11291129
end do
1130-
#:endcall GPU_PARALLEL
1130+
#:endcall GPU_PARALLEL_LOOP_OLD
11311131
end if
11321132
#:endif
11331133
end if

0 commit comments

Comments
 (0)