Skip to content

Commit d52abd2

Browse files
author
Tanush Prathi
committed
Add AMD compiler support, different macro expansions based on compiler
1 parent d7dfc0d commit d52abd2

File tree

6 files changed

+80
-15
lines changed

6 files changed

+80
-15
lines changed

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,9 @@ function(MFC_SETUP_TARGET)
494494
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
495495
target_compile_options(${a_target} PRIVATE -fopenmp)
496496
target_link_options(${a_target} PRIVATE -fopenmp)
497+
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
498+
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a)
499+
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a)
497500
endif()
498501
endif()
499502

@@ -533,6 +536,9 @@ function(MFC_SETUP_TARGET)
533536
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
534537
find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
535538
target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
539+
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
540+
find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
541+
target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn flang_rt.hostdevice)
536542
endif()
537543
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
538544
target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")

src/common/include/omp_macros.fpp

Lines changed: 65 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
#:include 'shared_parallel_macros.fpp'
22

3+
#:set NVIDIA_COMPILER_ID="NVHPC"
4+
#:set PGI_COMPILER_ID="PGI"
5+
#:set INTEL_COMPILER_ID="Intel"
6+
#:set CCE_COMPILER_ID="Cray"
7+
#:set AMD_COMPILER_ID="LLVMFlang"
8+
39
#:def OMP_MAP_STR(map_type, var_list)
410
#:assert map_type is not None
511
#:assert isinstance(map_type, str)
@@ -17,8 +23,15 @@
1723
#:assert isinstance(default, str)
1824
#:assert (default == 'present' or default == 'none')
1925
#:if default == 'present'
20-
#! #:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) defaultmap(present:pointer) '
21-
#:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) '
26+
#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
27+
#:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) '
28+
#:elif MFC_COMPILER == CCE_COMPILER_ID
29+
#:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) defaultmap(present:pointer) '
30+
#:elif MFC_COMPILER == AMD_COMPILER_ID
31+
#:set default_val = ''
32+
#:else
33+
#:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) '
34+
#:endif
2235
#:elif default == 'none'
2336
#:stop 'Not Supported Yet'
2437
#:endif
@@ -160,12 +173,22 @@
160173
& no_create_val.strip('\n') + present_val.strip('\n') + &
161174
& deviceptr_val.strip('\n') + attach_val.strip('\n')
162175
#! Hardcoding the parallelism for now
163-
!#:set omp_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + &
164-
!& clause_val + extraOmpArgs_val.strip('\n')
165-
!#:set omp_end_directive = '!$omp end target teams loop'
166-
#:set omp_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) ' + &
167-
& clause_val + extraOmpArgs_val.strip('\n')
168-
#:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
176+
177+
#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
178+
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
179+
#:set omp_end_directive = '!$omp end target teams loop'
180+
#:elif MFC_COMPILER == CCE_COMPILER_ID
181+
#:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) '
182+
#:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
183+
#:elif MFC_COMPILER == AMD_COMPILER_ID
184+
#:set omp_start_directive = '!$omp target teams distribute parallel do '
185+
#:set omp_end_directive = '!$omp end target teams distribute parallel do'
186+
#:else
187+
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
188+
#:set omp_end_directive = '!$omp end target teams loop'
189+
#:endif
190+
191+
#:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n')
169192
$:omp_directive
170193
$:code
171194
$:omp_end_directive
@@ -184,7 +207,13 @@
184207
#:else
185208
#:set function_name_val = ''
186209
#:endif
187-
#:set clause_val = nohost_val.strip('\n')
210+
211+
#:if MFC_COMPILER == AMD_COMPILER_ID
212+
#:set clause_val = ''
213+
#:else
214+
#:set clause_val = nohost_val.strip('\n')
215+
#:endif
216+
188217
#:set omp_directive = '!$omp declare target ' + &
189218
& clause_val + extraOmpArgs_val.strip('\n')
190219
$:omp_directive
@@ -201,11 +230,16 @@
201230
$:omp_directive
202231
#:enddef
203232

204-
#! Not implemented yet
233+
#! Not fully implemented yet (ignores most args right now)
205234
#:def OMP_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraOmpArgs=None)
206-
#! loop is going to be ignored since all loops right now are seq
207-
#:set temp = ''
208-
$:temp
235+
#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
236+
#:set omp_directive = '!$omp loop bind(thread)'
237+
#:elif MFC_COMPILER == CRAY_COMPILER_ID or MFC_COMPILER == AMD_COMPILER_ID
238+
#:set omp_directive = ''
239+
#:else
240+
#:set omp_directive = ''
241+
#:endif
242+
$:omp_directive
209243
#:enddef
210244

211245
#:def OMP_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraOmpArgs=None)
@@ -298,4 +332,22 @@
298332
#:set omp_directive = '!$omp barrier ' + clause_val + extraOmpArgs_val.strip('\n')
299333
$:omp_directive
300334
#:enddef
335+
336+
#:def UNDEF_AMD(code)
337+
#:if MFC_COMPILER != AMD_COMPILER_ID
338+
$:code
339+
#:endif
340+
#:enddef
341+
342+
#:def UNDEF_CCE(code)
343+
#:if MFC_COMPILER != CCE_COMPILER_ID
344+
$:code
345+
#:endif
346+
#:enddef
347+
348+
#:def UNDEF_NVIDIA(code)
349+
#:if MFC_COMPILER != NVIDIA_COMPILER_ID and MFC_COMPILER != PGI_COMPILER_ID
350+
$:code
351+
#:endif
352+
#:enddef
301353
! New line at end of file is required for FYPP

src/common/m_chemistry.fpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ contains
9999
real(wp), dimension(num_species) :: Ys
100100
real(wp), dimension(num_species) :: omega
101101

102+
#:block UNDEF_AMD
102103
#:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]')
103104
do z = bounds(3)%beg, bounds(3)%end
104105
do y = bounds(2)%beg, bounds(2)%end
@@ -127,6 +128,7 @@ contains
127128
end do
128129
end do
129130
#:endcall GPU_PARALLEL_LOOP
131+
#:endblock UNDEF_AMD
130132

131133
end subroutine s_compute_chemistry_reaction_flux
132134

src/simulation/m_cbc.fpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,7 @@ contains
773773
end if
774774
775775
! FD2 or FD4 of RHS at j = 0
776+
#:block UNDEF_AMD
776777
#:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]')
777778
do r = is3%beg, is3%end
778779
do k = is2%beg, is2%end
@@ -1105,6 +1106,7 @@ contains
11051106
end do
11061107
end do
11071108
#:endcall GPU_PARALLEL_LOOP
1109+
#:endblock UNDEF_AMD
11081110
end if
11091111
#:endfor
11101112

src/simulation/m_fftw.fpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ contains
136136
integer :: i, j, k, l !< Generic loop iterators
137137
integer :: ierr !< Generic flag used to identify and report GPU errors
138138

139-
#if 0
139+
#:block UNDEF_CCE
140140
! Restrict filter to processors that have cells adjacent to axis
141141
if (bc_y%beg >= 0) return
142142
#if defined(MFC_GPU)
@@ -304,7 +304,8 @@ contains
304304
end do
305305
end do
306306
#endif
307-
#endif
307+
#:endblock UNDEF_CCE
308+
308309
end subroutine s_apply_fourier_filter
309310

310311
!> The purpose of this subroutine is to destroy the fftw plan

src/simulation/m_riemann_solvers.fpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2943,6 +2943,7 @@ contains
29432943

29442944
#:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
29452945
if (norm_dir == ${NORM_DIR}$) then
2946+
#:block UNDEF_AMD
29462947
#:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld]')
29472948
do l = is3%beg, is3%end
29482949
do k = is2%beg, is2%end
@@ -3116,6 +3117,7 @@ contains
31163117
end do
31173118
end do
31183119
#:endcall GPU_PARALLEL_LOOP
3120+
#:endblock UNDEF_AMD
31193121
end if
31203122
#:endfor
31213123

0 commit comments

Comments
 (0)