Skip to content

Commit 86327aa

Browse files
committed
feat: Add support for non-periodic BCs in the dist algorithm.
1 parent 3a1caf4 commit 86327aa

File tree

5 files changed

+193
-163
lines changed

5 files changed

+193
-163
lines changed

src/cuda/kernels_dist.f90

Lines changed: 95 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@ module m_cuda_kernels_dist
88
contains
99

1010
attributes(global) subroutine der_univ_dist( &
11-
du, send_u_b, send_u_e, u, u_b, u_e, coeffs_b, coeffs_e, coeffs, n, &
11+
du, send_u_s, send_u_e, u, u_s, u_e, coeffs_s, coeffs_e, coeffs, n, &
1212
ffr, fbc, faf &
1313
)
1414
implicit none
1515

1616
! Arguments
17-
real(dp), device, intent(out), dimension(:, :, :) :: du, send_u_b, &
17+
real(dp), device, intent(out), dimension(:, :, :) :: du, send_u_s, &
1818
send_u_e
19-
real(dp), device, intent(in), dimension(:, :, :) :: u, u_b, u_e
20-
real(dp), device, intent(in), dimension(:, :) :: coeffs_b, coeffs_e
19+
real(dp), device, intent(in), dimension(:, :, :) :: u, u_s, u_e
20+
real(dp), device, intent(in), dimension(:, :) :: coeffs_s, coeffs_e
2121
real(dp), device, intent(in), dimension(:) :: coeffs
2222
integer, value, intent(in) :: n
2323
real(dp), device, intent(in), dimension(:) :: ffr, fbc, faf
@@ -38,70 +38,102 @@ attributes(global) subroutine der_univ_dist( &
3838
c_p1 = coeffs(6); c_p2 = coeffs(7); c_p3 = coeffs(8); c_p4 = coeffs(9)
3939
last_r = ffr(1)
4040

41-
du(i, 1, b) = coeffs(1)*u_b(i, 1, b) + coeffs(2)*u_b(i, 2, b) &
42-
+ coeffs(3)*u_b(i, 3, b) + coeffs(4)*u_b(i, 4, b) &
43-
+ coeffs(5)*u(i, 1, b) &
44-
+ coeffs(6)*u(i, 2, b) + coeffs(7)*u(i, 3, b) &
45-
+ coeffs(8)*u(i, 4, b) + coeffs(9)*u(i, 5, b)
41+
du(i, 1, b) = coeffs_s(1, 1)*u_s(i, 1, b) &
42+
+ coeffs_s(2, 1)*u_s(i, 2, b) &
43+
+ coeffs_s(3, 1)*u_s(i, 3, b) &
44+
+ coeffs_s(4, 1)*u_s(i, 4, b) &
45+
+ coeffs_s(5, 1)*u(i, 1, b) &
46+
+ coeffs_s(6, 1)*u(i, 2, b) &
47+
+ coeffs_s(7, 1)*u(i, 3, b) &
48+
+ coeffs_s(8, 1)*u(i, 4, b) &
49+
+ coeffs_s(9, 1)*u(i, 5, b)
4650
du(i, 1, b) = du(i, 1, b)*faf(1)
47-
du(i, 2, b) = coeffs(1)*u_b(i, 2, b) + coeffs(2)*u_b(i, 3, b) &
48-
+ coeffs(3)*u_b(i, 4, b) + coeffs(4)*u(i, 1, b) &
49-
+ coeffs(5)*u(i, 2, b) &
50-
+ coeffs(6)*u(i, 3, b) + coeffs(7)*u(i, 4, b) &
51-
+ coeffs(8)*u(i, 5, b) + coeffs(9)*u(i, 6, b)
51+
du(i, 2, b) = coeffs_s(1, 2)*u_s(i, 2, b) &
52+
+ coeffs_s(2, 2)*u_s(i, 3, b) &
53+
+ coeffs_s(3, 2)*u_s(i, 4, b) &
54+
+ coeffs_s(4, 2)*u(i, 1, b) &
55+
+ coeffs_s(5, 2)*u(i, 2, b) &
56+
+ coeffs_s(6, 2)*u(i, 3, b) &
57+
+ coeffs_s(7, 2)*u(i, 4, b) &
58+
+ coeffs_s(8, 2)*u(i, 5, b) &
59+
+ coeffs_s(9, 2)*u(i, 6, b)
5260
du(i, 2, b) = du(i, 2, b)*faf(2)
53-
du(i, 3, b) = coeffs(1)*u_b(i, 3, b) + coeffs(2)*u_b(i, 4, b) &
54-
+ coeffs(3)*u(i, 1, b) + coeffs(4)*u(i, 2, b) &
55-
+ coeffs(5)*u(i, 3, b) &
56-
+ coeffs(6)*u(i, 4, b) + coeffs(7)*u(i, 5, b) &
57-
+ coeffs(8)*u(i, 6, b) + coeffs(9)*u(i, 7, b)
61+
du(i, 3, b) = coeffs_s(1, 3)*u_s(i, 3, b) &
62+
+ coeffs_s(2, 3)*u_s(i, 4, b) &
63+
+ coeffs_s(3, 3)*u(i, 1, b) &
64+
+ coeffs_s(4, 3)*u(i, 2, b) &
65+
+ coeffs_s(5, 3)*u(i, 3, b) &
66+
+ coeffs_s(6, 3)*u(i, 4, b) &
67+
+ coeffs_s(7, 3)*u(i, 5, b) &
68+
+ coeffs_s(8, 3)*u(i, 6, b) &
69+
+ coeffs_s(9, 3)*u(i, 7, b)
5870
du(i, 3, b) = ffr(3)*(du(i, 3, b) - faf(3)*du(i, 2, b))
59-
du(i, 4, b) = coeffs(1)*u_b(i, 4, b) + coeffs(2)*u(i, 1, b) &
60-
+ coeffs(3)*u(i, 2, b) + coeffs(4)*u(i, 3, b) &
61-
+ coeffs(5)*u(i, 4, b) &
62-
+ coeffs(6)*u(i, 5, b) + coeffs(7)*u(i, 6, b) &
63-
+ coeffs(8)*u(i, 7, b) + coeffs(9)*u(i, 8, b)
71+
du(i, 4, b) = coeffs_s(1, 4)*u_s(i, 4, b) &
72+
+ coeffs_s(2, 4)*u(i, 1, b) &
73+
+ coeffs_s(3, 4)*u(i, 2, b) &
74+
+ coeffs_s(4, 4)*u(i, 3, b) &
75+
+ coeffs_s(5, 4)*u(i, 4, b) &
76+
+ coeffs_s(6, 4)*u(i, 5, b) &
77+
+ coeffs_s(7, 4)*u(i, 6, b) &
78+
+ coeffs_s(8, 4)*u(i, 7, b) &
79+
+ coeffs_s(9, 4)*u(i, 8, b)
6480
du(i, 4, b) = ffr(4)*(du(i, 4, b) - faf(3)*du(i, 3, b))
6581

6682
alpha = faf(5)
6783

68-
do j = 5, n-4
69-
temp_du = c_m4*u(i, j-4, b) + c_m3*u(i, j-3, b) &
70-
+ c_m2*u(i, j-2, b) + c_m1*u(i, j-1, b) &
71-
+ c_j*u(i, j, b) &
72-
+ c_p1*u(i, j+1, b) + c_p2*u(i, j+2, b) &
73-
+ c_p3*u(i, j+3, b) + c_p4*u(i, j+4, b)
74-
du(i, j, b) = ffr(j)*(temp_du - alpha*du(i, j-1, b))
84+
do j = 5, n - 4
85+
temp_du = c_m4*u(i, j - 4, b) + c_m3*u(i, j - 3, b) &
86+
+ c_m2*u(i, j - 2, b) + c_m1*u(i, j - 1, b) &
87+
+ c_j*u(i, j, b) &
88+
+ c_p1*u(i, j + 1, b) + c_p2*u(i, j + 2, b) &
89+
+ c_p3*u(i, j + 3, b) + c_p4*u(i, j + 4, b)
90+
du(i, j, b) = ffr(j)*(temp_du - alpha*du(i, j - 1, b))
7591
end do
7692

77-
j = n-3
78-
du(i, j, b) = coeffs(1)*u(i, j-4, b) + coeffs(2)*u(i, j-3, b) &
79-
+ coeffs(3)*u(i, j-2, b) + coeffs(4)*u(i, j-1, b) &
80-
+ coeffs(5)*u(i, j, b) &
81-
+ coeffs(6)*u(i, j+1, b) + coeffs(7)*u(i, j+2, b) &
82-
+ coeffs(8)*u(i, j+3, b) + coeffs(9)*u_e(i, 1, b)
83-
du(i, j, b) = ffr(j)*(du(i, j, b) - faf(j)*du(i, j-1, b))
84-
j = n-2
85-
du(i, j, b) = coeffs(1)*u(i, j-4, b) + coeffs(2)*u(i, j-3, b) &
86-
+ coeffs(3)*u(i, j-2, b) + coeffs(4)*u(i, j-1, b) &
87-
+ coeffs(5)*u(i, j, b) &
88-
+ coeffs(6)*u(i, j+1, b) + coeffs(7)*u(i, j+2, b) &
89-
+ coeffs(8)*u_e(i, 1, b) + coeffs(9)*u_e(i, 2, b)
90-
du(i, j, b) = ffr(j)*(du(i, j, b) - faf(j)*du(i, j-1, b))
91-
j = n-1
92-
du(i, j, b) = coeffs(1)*u(i, j-4, b) + coeffs(2)*u(i, j-3, b) &
93-
+ coeffs(3)*u(i, j-2, b) + coeffs(4)*u(i, j-1, b) &
94-
+ coeffs(5)*u(i, j, b) &
95-
+ coeffs(6)*u(i, j+1, b) + coeffs(7)*u_e(i, 1, b) &
96-
+ coeffs(8)*u_e(i, 2, b) + coeffs(9)*u_e(i, 3, b)
97-
du(i, j, b) = ffr(j)*(du(i, j, b) - faf(j)*du(i, j-1, b))
93+
j = n - 3
94+
du(i, j, b) = coeffs_e(1, 1)*u(i, j - 4, b) &
95+
+ coeffs_e(2, 1)*u(i, j - 3, b) &
96+
+ coeffs_e(3, 1)*u(i, j - 2, b) &
97+
+ coeffs_e(4, 1)*u(i, j - 1, b) &
98+
+ coeffs_e(5, 1)*u(i, j, b) &
99+
+ coeffs_e(6, 1)*u(i, j + 1, b) &
100+
+ coeffs_e(7, 1)*u(i, j + 2, b) &
101+
+ coeffs_e(8, 1)*u(i, j + 3, b) &
102+
+ coeffs_e(9, 1)*u_e(i, 1, b)
103+
du(i, j, b) = ffr(j)*(du(i, j, b) - faf(j)*du(i, j - 1, b))
104+
j = n - 2
105+
du(i, j, b) = coeffs_e(1, 2)*u(i, j - 4, b) &
106+
+ coeffs_e(2, 2)*u(i, j - 3, b) &
107+
+ coeffs_e(3, 2)*u(i, j - 2, b) &
108+
+ coeffs_e(4, 2)*u(i, j - 1, b) &
109+
+ coeffs_e(5, 2)*u(i, j, b) &
110+
+ coeffs_e(6, 2)*u(i, j + 1, b) &
111+
+ coeffs_e(7, 2)*u(i, j + 2, b) &
112+
+ coeffs_e(8, 2)*u_e(i, 1, b) &
113+
+ coeffs_e(9, 2)*u_e(i, 2, b)
114+
du(i, j, b) = ffr(j)*(du(i, j, b) - faf(j)*du(i, j - 1, b))
115+
j = n - 1
116+
du(i, j, b) = coeffs_e(1, 3)*u(i, j - 4, b) &
117+
+ coeffs_e(2, 3)*u(i, j - 3, b) &
118+
+ coeffs_e(3, 3)*u(i, j - 2, b) &
119+
+ coeffs_e(4, 3)*u(i, j - 1, b) &
120+
+ coeffs_e(5, 3)*u(i, j, b) &
121+
+ coeffs_e(6, 3)*u(i, j + 1, b) &
122+
+ coeffs_e(7, 3)*u_e(i, 1, b) &
123+
+ coeffs_e(8, 3)*u_e(i, 2, b) &
124+
+ coeffs_e(9, 3)*u_e(i, 3, b)
125+
du(i, j, b) = ffr(j)*(du(i, j, b) - faf(j)*du(i, j - 1, b))
98126
j = n
99-
du(i, j, b) = coeffs(1)*u(i, j-4, b) + coeffs(2)*u(i, j-3, b) &
100-
+ coeffs(3)*u(i, j-2, b) + coeffs(4)*u(i, j-1, b) &
101-
+ coeffs(5)*u(i, j, b) &
102-
+ coeffs(6)*u_e(i, 1, b) + coeffs(7)*u_e(i, 2, b) &
103-
+ coeffs(8)*u_e(i, 3, b) + coeffs(9)*u_e(i, 4, b)
104-
du(i, j, b) = ffr(j)*(du(i, j, b) - faf(j)*du(i, j-1, b))
127+
du(i, j, b) = coeffs_e(1, 4)*u(i, j - 4, b) &
128+
+ coeffs_e(2, 4)*u(i, j - 3, b) &
129+
+ coeffs_e(3, 4)*u(i, j - 2, b) &
130+
+ coeffs_e(4, 4)*u(i, j - 1, b) &
131+
+ coeffs_e(5, 4)*u(i, j, b) &
132+
+ coeffs_e(6, 4)*u_e(i, 1, b) &
133+
+ coeffs_e(7, 4)*u_e(i, 2, b) &
134+
+ coeffs_e(8, 4)*u_e(i, 3, b) &
135+
+ coeffs_e(9, 4)*u_e(i, 4, b)
136+
du(i, j, b) = ffr(j)*(du(i, j, b) - faf(j)*du(i, j - 1, b))
105137

106138
send_u_e(i, 1, b) = du(i, n, b)
107139

@@ -110,17 +142,17 @@ attributes(global) subroutine der_univ_dist( &
110142
du(i, j, b) = du(i, j, b) - fbc(j)*du(i, j + 1, b)
111143
end do
112144
du(i, 1, b) = last_r*(du(i, 1, b) - fbc(1)*du(i, 2, b))
113-
send_u_b(i, 1, b) = du(i, 1, b)
145+
send_u_s(i, 1, b) = du(i, 1, b)
114146

115147
end subroutine der_univ_dist
116148

117-
attributes(global) subroutine der_univ_subs(du, recv_u_b, recv_u_e, &
149+
attributes(global) subroutine der_univ_subs(du, recv_u_s, recv_u_e, &
118150
n, dist_sa, dist_sc)
119151
implicit none
120152

121153
! Arguments
122154
real(dp), device, intent(out), dimension(:, :, :) :: du
123-
real(dp), device, intent(in), dimension(:, :, :) :: recv_u_b, recv_u_e
155+
real(dp), device, intent(in), dimension(:, :, :) :: recv_u_s, recv_u_e
124156
real(dp), device, intent(in), dimension(:) :: dist_sa, dist_sc
125157
integer, value, intent(in) :: n
126158

@@ -135,13 +167,11 @@ attributes(global) subroutine der_univ_subs(du, recv_u_b, recv_u_e, &
135167
ur = dist_sc(n)
136168
recp = 1._dp/(1._dp - ur*bl)
137169

138-
!du(i, 1, b) = recp*(du(i, 1, b) - bl*recv_u_b(i, 1, b))
139-
!du(i, n, b) = recp*(du(i, n, b) - ur*recv_u_e(i, 1, b))
140-
du_1 = recp*(du(i, 1, b) - bl*recv_u_b(i, 1, b))
170+
du_1 = recp*(du(i, 1, b) - bl*recv_u_s(i, 1, b))
141171
du_n = recp*(du(i, n, b) - ur*recv_u_e(i, 1, b))
142172

143173
du(i, 1, b) = du_1
144-
do j = 2, n-1
174+
do j = 2, n - 1
145175
du(i, j, b) = (du(i, j, b) - dist_sa(j)*du_1 - dist_sc(j)*du_n)
146176
end do
147177
du(i, n, b) = du_n

src/derparams.f90

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ subroutine der_1_vv()
99
implicit none
1010
end subroutine der_1_vv
1111

12-
subroutine der_2_vv(coeffs, coeffs_b, coeffs_e, &
12+
subroutine der_2_vv(coeffs, coeffs_s, coeffs_e, &
1313
dist_fr, dist_bc, dist_af, dist_sa, dist_sc, &
1414
n_halo, dx2, n, bcond)
1515
implicit none
1616

1717
real(dp), allocatable, dimension(:), intent(out) :: coeffs, &
1818
dist_fr, dist_bc, dist_af, dist_sa, dist_sc
19-
real(dp), allocatable, dimension(:,:), intent(out) :: coeffs_b, coeffs_e
19+
real(dp), allocatable, dimension(:,:), intent(out) :: coeffs_s, coeffs_e
2020
integer, intent(out) :: n_halo
2121
real(dp), intent(in) :: dx2
2222
integer, intent(in) :: n
@@ -44,11 +44,11 @@ subroutine der_2_vv(coeffs, coeffs_b, coeffs_e, &
4444
select case (bcond)
4545
case ('periodic')
4646
dist_sa(:) = alfa; dist_sc(:) = alfa; dist_b(:) = 1._dp
47-
allocate(coeffs_b(n_halo, n_stencil))
48-
allocate(coeffs_e(n_halo, n_stencil))
47+
allocate(coeffs_s(n_stencil, n_halo))
48+
allocate(coeffs_e(n_stencil, n_halo))
4949
do i = 1, n_halo
50-
coeffs_b(i,:) = coeffs(:)
51-
coeffs_e(i,:) = coeffs(:)
50+
coeffs_s(:, i) = coeffs(:)
51+
coeffs_e(:, i) = coeffs(:)
5252
end do
5353
case default
5454
print*, 'Boundary condition is not recognized :', bcond

0 commit comments

Comments
 (0)