Skip to content

Commit b464e88

Browse files
authored
Merge pull request #7 from semi-h/backend
Backend structure
2 parents d6cd868 + a0abdd1 commit b464e88

File tree

10 files changed

+1300
-0
lines changed

10 files changed

+1300
-0
lines changed

src/CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
set(SRC
22
allocator.f90
3+
backend.f90
34
common.f90
5+
solver.f90
46
tdsops.f90
7+
time_integrator.f90
8+
omp/backend.f90
59
omp/common.f90
610
omp/kernels_dist.f90
711
)
812
set(CUDASRC
13+
cuda/backend.f90
914
cuda/common.f90
1015
cuda/allocator.f90
1116
cuda/exec_dist.f90
@@ -21,20 +26,34 @@ endif()
2126
add_library(x3d2 STATIC ${SRC})
2227
target_include_directories(x3d2 INTERFACE ${CMAKE_CURRENT_BINARY_DIR})
2328

29+
add_executable(xcompact xcompact.f90)
30+
target_link_libraries(xcompact PRIVATE x3d2)
31+
2432
target_compile_options(x3d2 PRIVATE "-O3")
33+
target_compile_options(xcompact PRIVATE "-O3")
34+
target_compile_options(xcompact PRIVATE "-cpp")
2535

2636
if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI")
2737
target_compile_options(x3d2 PRIVATE "-cuda")
2838
target_compile_options(x3d2 PRIVATE "-fast")
2939
target_link_options(x3d2 INTERFACE "-cuda")
40+
target_compile_options(xcompact PRIVATE "-cuda")
41+
target_compile_options(xcompact PRIVATE "-fast")
42+
43+
target_compile_options(xcompact PRIVATE "-DCUDA")
44+
# target_link_options(xcompact INTERFACE "-cuda")
3045
endif()
3146

3247
if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "GNU")
3348
target_compile_options(x3d2 PRIVATE "-ffast-math")
49+
target_compile_options(xcompact PRIVATE "-ffast-math")
3450
endif()
3551

3652
find_package(OpenMP REQUIRED)
3753
target_link_libraries(x3d2 PRIVATE OpenMP::OpenMP_Fortran)
54+
target_link_libraries(xcompact PRIVATE OpenMP::OpenMP_Fortran)
3855

3956
find_package(MPI REQUIRED)
4057
target_link_libraries(x3d2 PRIVATE MPI::MPI_Fortran)
58+
target_link_libraries(xcompact PRIVATE MPI::MPI_Fortran)
59+

src/allocator.f90

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ module m_allocator
6363
module procedure field_constructor
6464
end interface field_t
6565

66+
type :: flist_t
67+
class(field_t), pointer :: ptr
68+
end type flist_t
69+
6670
contains
6771

6872
function field_constructor(dims, next, id) result(m)

src/backend.f90

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
module m_base_backend
2+
use m_allocator, only: allocator_t, field_t
3+
use m_common, only: dp
4+
use m_tdsops, only: tdsops_t, dirps_t
5+
6+
implicit none
7+
8+
type, abstract :: base_backend_t
9+
!! base_backend class defines all the abstract operations that the
10+
!! solver class requires.
11+
!!
12+
!! For example, transport equation in solver class evaluates the
13+
!! derivatives in x, y, and z directions, and reorders the input
14+
!! fields as required. Then finally, combines all the directional
15+
!! derivatives to obtain the divergence of U*.
16+
!!
17+
!! All these high level operations solver class executes are
18+
!! defined here using the abstract interfaces. Every backend
19+
!! implementation extends the present abstact backend class to
20+
!! define the specifics of these operations based on the target
21+
!! architecture.
22+
23+
real(dp) :: nu
24+
class(allocator_t), pointer :: allocator
25+
class(dirps_t), pointer :: xdirps, ydirps, zdirps
26+
contains
27+
procedure(transeq_ders), deferred :: transeq_x
28+
procedure(transeq_ders), deferred :: transeq_y
29+
procedure(transeq_ders), deferred :: transeq_z
30+
procedure(transposer), deferred :: trans_x2y
31+
procedure(transposer), deferred :: trans_x2z
32+
procedure(sum9into3), deferred :: sum_yzintox
33+
procedure(get_fields), deferred :: get_fields
34+
procedure(set_fields), deferred :: set_fields
35+
procedure(alloc_tdsops), deferred :: alloc_tdsops
36+
end type base_backend_t
37+
38+
abstract interface
39+
subroutine transeq_ders(self, du, dv, dw, u, v, w, dirps)
40+
!! transeq equation obtains the derivatives direction by
41+
!! direction, and the exact algorithm used to obtain these
42+
!! derivatives are decided at runtime. Backend implementations
43+
!! are responsible from directing calls to transeq_ders into
44+
!! the correct algorithm.
45+
import :: base_backend_t
46+
import :: field_t
47+
import :: dirps_t
48+
implicit none
49+
50+
class(base_backend_t) :: self
51+
class(field_t), intent(inout) :: du, dv, dw
52+
class(field_t), intent(in) :: u, v, w
53+
type(dirps_t), intent(in) :: dirps
54+
end subroutine transeq_ders
55+
end interface
56+
57+
abstract interface
58+
subroutine transposer(self, u_, v_, w_, u, v, w)
59+
!! transposer subroutines are straightforward, they rearrange
60+
!! data into our specialist data structure so that regardless
61+
!! of the direction tridiagonal systems are solved efficiently
62+
!! and fast.
63+
import :: base_backend_t
64+
import :: field_t
65+
implicit none
66+
67+
class(base_backend_t) :: self
68+
class(field_t), intent(inout) :: u_, v_, w_
69+
class(field_t), intent(in) :: u, v, w
70+
end subroutine transposer
71+
end interface
72+
73+
abstract interface
74+
subroutine sum9into3(self, du, dv, dw, du_y, dv_y, dw_y, du_z, dv_z, dw_z)
75+
!! sum9into3 subroutine combines all the directional velocity
76+
!! derivatives into the corresponding x directional fields.
77+
import :: base_backend_t
78+
import :: field_t
79+
implicit none
80+
81+
class(base_backend_t) :: self
82+
class(field_t), intent(inout) :: du, dv, dw
83+
class(field_t), intent(in) :: du_y, dv_y, dw_y, du_z, dv_z, dw_z
84+
end subroutine sum9into3
85+
end interface
86+
87+
abstract interface
88+
subroutine get_fields(self, u_out, v_out, w_out, u, v, w)
89+
!! copy the specialist data structure from device or host back
90+
!! to a regular 3D data structure.
91+
import :: base_backend_t
92+
import :: dp
93+
import :: field_t
94+
implicit none
95+
96+
class(base_backend_t) :: self
97+
real(dp), dimension(:, :, :), intent(out) :: u_out, v_out, w_out
98+
class(field_t), intent(in) :: u, v, w
99+
end subroutine get_fields
100+
101+
subroutine set_fields(self, u, v, w, u_in, v_in, w_in)
102+
!! copy the initial condition stored in a regular 3D data
103+
!! structure into the specialist data structure arrays on the
104+
!! device or host.
105+
import :: base_backend_t
106+
import :: dp
107+
import :: field_t
108+
implicit none
109+
110+
class(base_backend_t) :: self
111+
class(field_t), intent(inout) :: u, v, w
112+
real(dp), dimension(:, :, :), intent(in) :: u_in, v_in, w_in
113+
end subroutine set_fields
114+
end interface
115+
116+
abstract interface
117+
subroutine alloc_tdsops(self, tdsops, n, dx, operation, scheme)
118+
import :: base_backend_t
119+
import :: dp
120+
import :: tdsops_t
121+
implicit none
122+
123+
class(base_backend_t) :: self
124+
class(tdsops_t), allocatable, intent(inout) :: tdsops
125+
integer, intent(in) :: n
126+
real(dp), intent(in) :: dx
127+
character(*), intent(in) :: operation, scheme
128+
end subroutine alloc_tdsops
129+
end interface
130+
131+
end module m_base_backend

src/common.f90

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,71 @@ module m_common
44
integer, parameter :: dp=kind(0.0d0)
55
real(dp), parameter :: pi = 4*atan(1.0_dp)
66

7+
type :: globs_t
8+
integer :: nx, ny, nz
9+
integer :: nx_loc, ny_loc, nz_loc
10+
integer :: n_groups_x, n_groups_y, n_groups_z
11+
real(dp) :: Lx, Ly, Lz
12+
real(dp) :: dx, dy, dz
13+
integer :: nproc_x = 1, nproc_y = 1, nproc_z = 1
14+
character(len=20) :: BC_x_s, BC_x_e, BC_y_s, BC_y_e, BC_z_s, BC_z_e
15+
end type globs_t
16+
17+
contains
18+
19+
subroutine set_pprev_pnext(xprev, xnext, yprev, ynext, zprev, znext, &
20+
xnproc, ynproc, znproc, nrank)
21+
implicit none
22+
23+
integer, intent(out) :: xprev, xnext, yprev, ynext, zprev, znext
24+
integer, intent(in) :: xnproc, ynproc, znproc, nrank
25+
26+
integer :: i, ix, iy, iz
27+
28+
ix = modulo(nrank, xnproc)
29+
iy = modulo((nrank - ix)/xnproc, ynproc)
30+
iz = (nrank - ix - iy*xnproc)/(xnproc*ynproc)
31+
! nrank == ix + iy*xnproc + iz*xnproc*ynproc
32+
33+
! prev and next in x direction
34+
if (ix == 0) then
35+
xprev = nrank + (xnproc - 1)
36+
else
37+
xprev = nrank - 1
38+
end if
39+
40+
if (ix == xnproc - 1) then
41+
xnext = nrank - (xnproc - 1)
42+
else
43+
xnext = nrank + 1
44+
end if
45+
46+
! prev and next in y direction
47+
if (iy == 0) then
48+
yprev = nrank + (xnproc*(ynproc - 1))
49+
else
50+
yprev = nrank - xnproc
51+
end if
52+
53+
if (iy == ynproc - 1) then
54+
ynext = nrank - (xnproc*(ynproc - 1))
55+
else
56+
ynext = nrank + xnproc
57+
end if
58+
59+
! prev and next in z direction
60+
if (iz == 0) then
61+
zprev = nrank + (xnproc*ynproc*(znproc - 1))
62+
else
63+
zprev = nrank - xnproc*ynproc
64+
end if
65+
66+
if (iz == znproc - 1) then
67+
znext = nrank - (xnproc*ynproc*(znproc - 1))
68+
else
69+
znext = nrank + xnproc*ynproc
70+
end if
71+
72+
end subroutine set_pprev_pnext
73+
774
end module m_common

0 commit comments

Comments
 (0)