@@ -8,16 +8,16 @@ module m_cuda_kernels_dist
88contains
99
1010 attributes(global) subroutine der_univ_dist ( &
11- du , send_u_b , send_u_e , u , u_b , u_e , coeffs_b , coeffs_e , coeffs , n , &
11+ du , send_u_s , send_u_e , u , u_s , u_e , coeffs_s , coeffs_e , coeffs , n , &
1212 ffr , fbc , faf &
1313 )
1414 implicit none
1515
1616 ! Arguments
17- real (dp), device, intent (out ), dimension (:, :, :) :: du, send_u_b , &
17+ real (dp), device, intent (out ), dimension (:, :, :) :: du, send_u_s , &
1818 send_u_e
19- real (dp), device, intent (in ), dimension (:, :, :) :: u, u_b , u_e
20- real (dp), device, intent (in ), dimension (:, :) :: coeffs_b , coeffs_e
19+ real (dp), device, intent (in ), dimension (:, :, :) :: u, u_s , u_e
20+ real (dp), device, intent (in ), dimension (:, :) :: coeffs_s , coeffs_e
2121 real (dp), device, intent (in ), dimension (:) :: coeffs
2222 integer , value, intent (in ) :: n
2323 real (dp), device, intent (in ), dimension (:) :: ffr, fbc, faf
@@ -38,70 +38,102 @@ attributes(global) subroutine der_univ_dist( &
3838 c_p1 = coeffs(6 ); c_p2 = coeffs(7 ); c_p3 = coeffs(8 ); c_p4 = coeffs(9 )
3939 last_r = ffr(1 )
4040
41- du(i, 1 , b) = coeffs(1 )* u_b(i, 1 , b) + coeffs(2 )* u_b(i, 2 , b) &
42- + coeffs(3 )* u_b(i, 3 , b) + coeffs(4 )* u_b(i, 4 , b) &
43- + coeffs(5 )* u(i, 1 , b) &
44- + coeffs(6 )* u(i, 2 , b) + coeffs(7 )* u(i, 3 , b) &
45- + coeffs(8 )* u(i, 4 , b) + coeffs(9 )* u(i, 5 , b)
41+ du(i, 1 , b) = coeffs_s(1 , 1 )* u_s(i, 1 , b) &
42+ + coeffs_s(2 , 1 )* u_s(i, 2 , b) &
43+ + coeffs_s(3 , 1 )* u_s(i, 3 , b) &
44+ + coeffs_s(4 , 1 )* u_s(i, 4 , b) &
45+ + coeffs_s(5 , 1 )* u(i, 1 , b) &
46+ + coeffs_s(6 , 1 )* u(i, 2 , b) &
47+ + coeffs_s(7 , 1 )* u(i, 3 , b) &
48+ + coeffs_s(8 , 1 )* u(i, 4 , b) &
49+ + coeffs_s(9 , 1 )* u(i, 5 , b)
4650 du(i, 1 , b) = du(i, 1 , b)* faf(1 )
47- du(i, 2 , b) = coeffs(1 )* u_b(i, 2 , b) + coeffs(2 )* u_b(i, 3 , b) &
48- + coeffs(3 )* u_b(i, 4 , b) + coeffs(4 )* u(i, 1 , b) &
49- + coeffs(5 )* u(i, 2 , b) &
50- + coeffs(6 )* u(i, 3 , b) + coeffs(7 )* u(i, 4 , b) &
51- + coeffs(8 )* u(i, 5 , b) + coeffs(9 )* u(i, 6 , b)
51+ du(i, 2 , b) = coeffs_s(1 , 2 )* u_s(i, 2 , b) &
52+ + coeffs_s(2 , 2 )* u_s(i, 3 , b) &
53+ + coeffs_s(3 , 2 )* u_s(i, 4 , b) &
54+ + coeffs_s(4 , 2 )* u(i, 1 , b) &
55+ + coeffs_s(5 , 2 )* u(i, 2 , b) &
56+ + coeffs_s(6 , 2 )* u(i, 3 , b) &
57+ + coeffs_s(7 , 2 )* u(i, 4 , b) &
58+ + coeffs_s(8 , 2 )* u(i, 5 , b) &
59+ + coeffs_s(9 , 2 )* u(i, 6 , b)
5260 du(i, 2 , b) = du(i, 2 , b)* faf(2 )
53- du(i, 3 , b) = coeffs(1 )* u_b(i, 3 , b) + coeffs(2 )* u_b(i, 4 , b) &
54- + coeffs(3 )* u(i, 1 , b) + coeffs(4 )* u(i, 2 , b) &
55- + coeffs(5 )* u(i, 3 , b) &
56- + coeffs(6 )* u(i, 4 , b) + coeffs(7 )* u(i, 5 , b) &
57- + coeffs(8 )* u(i, 6 , b) + coeffs(9 )* u(i, 7 , b)
61+ du(i, 3 , b) = coeffs_s(1 , 3 )* u_s(i, 3 , b) &
62+ + coeffs_s(2 , 3 )* u_s(i, 4 , b) &
63+ + coeffs_s(3 , 3 )* u(i, 1 , b) &
64+ + coeffs_s(4 , 3 )* u(i, 2 , b) &
65+ + coeffs_s(5 , 3 )* u(i, 3 , b) &
66+ + coeffs_s(6 , 3 )* u(i, 4 , b) &
67+ + coeffs_s(7 , 3 )* u(i, 5 , b) &
68+ + coeffs_s(8 , 3 )* u(i, 6 , b) &
69+ + coeffs_s(9 , 3 )* u(i, 7 , b)
5870 du(i, 3 , b) = ffr(3 )* (du(i, 3 , b) - faf(3 )* du(i, 2 , b))
59- du(i, 4 , b) = coeffs(1 )* u_b(i, 4 , b) + coeffs(2 )* u(i, 1 , b) &
60- + coeffs(3 )* u(i, 2 , b) + coeffs(4 )* u(i, 3 , b) &
61- + coeffs(5 )* u(i, 4 , b) &
62- + coeffs(6 )* u(i, 5 , b) + coeffs(7 )* u(i, 6 , b) &
63- + coeffs(8 )* u(i, 7 , b) + coeffs(9 )* u(i, 8 , b)
71+ du(i, 4 , b) = coeffs_s(1 , 4 )* u_s(i, 4 , b) &
72+ + coeffs_s(2 , 4 )* u(i, 1 , b) &
73+ + coeffs_s(3 , 4 )* u(i, 2 , b) &
74+ + coeffs_s(4 , 4 )* u(i, 3 , b) &
75+ + coeffs_s(5 , 4 )* u(i, 4 , b) &
76+ + coeffs_s(6 , 4 )* u(i, 5 , b) &
77+ + coeffs_s(7 , 4 )* u(i, 6 , b) &
78+ + coeffs_s(8 , 4 )* u(i, 7 , b) &
79+ + coeffs_s(9 , 4 )* u(i, 8 , b)
6480 du(i, 4 , b) = ffr(4 )* (du(i, 4 , b) - faf(3 )* du(i, 3 , b))
6581
6682 alpha = faf(5 )
6783
68- do j = 5 , n- 4
69- temp_du = c_m4* u(i, j- 4 , b) + c_m3* u(i, j- 3 , b) &
70- + c_m2* u(i, j- 2 , b) + c_m1* u(i, j- 1 , b) &
71- + c_j* u(i, j, b) &
72- + c_p1* u(i, j+ 1 , b) + c_p2* u(i, j+ 2 , b) &
73- + c_p3* u(i, j+ 3 , b) + c_p4* u(i, j+ 4 , b)
74- du(i, j, b) = ffr(j)* (temp_du - alpha* du(i, j- 1 , b))
84+ do j = 5 , n - 4
85+ temp_du = c_m4* u(i, j - 4 , b) + c_m3* u(i, j - 3 , b) &
86+ + c_m2* u(i, j - 2 , b) + c_m1* u(i, j - 1 , b) &
87+ + c_j* u(i, j, b) &
88+ + c_p1* u(i, j + 1 , b) + c_p2* u(i, j + 2 , b) &
89+ + c_p3* u(i, j + 3 , b) + c_p4* u(i, j + 4 , b)
90+ du(i, j, b) = ffr(j)* (temp_du - alpha* du(i, j - 1 , b))
7591 end do
7692
77- j = n-3
78- du(i, j, b) = coeffs(1 )* u(i, j-4 , b) + coeffs(2 )* u(i, j-3 , b) &
79- + coeffs(3 )* u(i, j-2 , b) + coeffs(4 )* u(i, j-1 , b) &
80- + coeffs(5 )* u(i, j, b) &
81- + coeffs(6 )* u(i, j+1 , b) + coeffs(7 )* u(i, j+2 , b) &
82- + coeffs(8 )* u(i, j+3 , b) + coeffs(9 )* u_e(i, 1 , b)
83- du(i, j, b) = ffr(j)* (du(i, j, b) - faf(j)* du(i, j-1 , b))
84- j = n-2
85- du(i, j, b) = coeffs(1 )* u(i, j-4 , b) + coeffs(2 )* u(i, j-3 , b) &
86- + coeffs(3 )* u(i, j-2 , b) + coeffs(4 )* u(i, j-1 , b) &
87- + coeffs(5 )* u(i, j, b) &
88- + coeffs(6 )* u(i, j+1 , b) + coeffs(7 )* u(i, j+2 , b) &
89- + coeffs(8 )* u_e(i, 1 , b) + coeffs(9 )* u_e(i, 2 , b)
90- du(i, j, b) = ffr(j)* (du(i, j, b) - faf(j)* du(i, j-1 , b))
91- j = n-1
92- du(i, j, b) = coeffs(1 )* u(i, j-4 , b) + coeffs(2 )* u(i, j-3 , b) &
93- + coeffs(3 )* u(i, j-2 , b) + coeffs(4 )* u(i, j-1 , b) &
94- + coeffs(5 )* u(i, j, b) &
95- + coeffs(6 )* u(i, j+1 , b) + coeffs(7 )* u_e(i, 1 , b) &
96- + coeffs(8 )* u_e(i, 2 , b) + coeffs(9 )* u_e(i, 3 , b)
97- du(i, j, b) = ffr(j)* (du(i, j, b) - faf(j)* du(i, j-1 , b))
93+ j = n - 3
94+ du(i, j, b) = coeffs_e(1 , 1 )* u(i, j - 4 , b) &
95+ + coeffs_e(2 , 1 )* u(i, j - 3 , b) &
96+ + coeffs_e(3 , 1 )* u(i, j - 2 , b) &
97+ + coeffs_e(4 , 1 )* u(i, j - 1 , b) &
98+ + coeffs_e(5 , 1 )* u(i, j, b) &
99+ + coeffs_e(6 , 1 )* u(i, j + 1 , b) &
100+ + coeffs_e(7 , 1 )* u(i, j + 2 , b) &
101+ + coeffs_e(8 , 1 )* u(i, j + 3 , b) &
102+ + coeffs_e(9 , 1 )* u_e(i, 1 , b)
103+ du(i, j, b) = ffr(j)* (du(i, j, b) - faf(j)* du(i, j - 1 , b))
104+ j = n - 2
105+ du(i, j, b) = coeffs_e(1 , 2 )* u(i, j - 4 , b) &
106+ + coeffs_e(2 , 2 )* u(i, j - 3 , b) &
107+ + coeffs_e(3 , 2 )* u(i, j - 2 , b) &
108+ + coeffs_e(4 , 2 )* u(i, j - 1 , b) &
109+ + coeffs_e(5 , 2 )* u(i, j, b) &
110+ + coeffs_e(6 , 2 )* u(i, j + 1 , b) &
111+ + coeffs_e(7 , 2 )* u(i, j + 2 , b) &
112+ + coeffs_e(8 , 2 )* u_e(i, 1 , b) &
113+ + coeffs_e(9 , 2 )* u_e(i, 2 , b)
114+ du(i, j, b) = ffr(j)* (du(i, j, b) - faf(j)* du(i, j - 1 , b))
115+ j = n - 1
116+ du(i, j, b) = coeffs_e(1 , 3 )* u(i, j - 4 , b) &
117+ + coeffs_e(2 , 3 )* u(i, j - 3 , b) &
118+ + coeffs_e(3 , 3 )* u(i, j - 2 , b) &
119+ + coeffs_e(4 , 3 )* u(i, j - 1 , b) &
120+ + coeffs_e(5 , 3 )* u(i, j, b) &
121+ + coeffs_e(6 , 3 )* u(i, j + 1 , b) &
122+ + coeffs_e(7 , 3 )* u_e(i, 1 , b) &
123+ + coeffs_e(8 , 3 )* u_e(i, 2 , b) &
124+ + coeffs_e(9 , 3 )* u_e(i, 3 , b)
125+ du(i, j, b) = ffr(j)* (du(i, j, b) - faf(j)* du(i, j - 1 , b))
98126 j = n
99- du(i, j, b) = coeffs(1 )* u(i, j-4 , b) + coeffs(2 )* u(i, j-3 , b) &
100- + coeffs(3 )* u(i, j-2 , b) + coeffs(4 )* u(i, j-1 , b) &
101- + coeffs(5 )* u(i, j, b) &
102- + coeffs(6 )* u_e(i, 1 , b) + coeffs(7 )* u_e(i, 2 , b) &
103- + coeffs(8 )* u_e(i, 3 , b) + coeffs(9 )* u_e(i, 4 , b)
104- du(i, j, b) = ffr(j)* (du(i, j, b) - faf(j)* du(i, j-1 , b))
127+ du(i, j, b) = coeffs_e(1 , 4 )* u(i, j - 4 , b) &
128+ + coeffs_e(2 , 4 )* u(i, j - 3 , b) &
129+ + coeffs_e(3 , 4 )* u(i, j - 2 , b) &
130+ + coeffs_e(4 , 4 )* u(i, j - 1 , b) &
131+ + coeffs_e(5 , 4 )* u(i, j, b) &
132+ + coeffs_e(6 , 4 )* u_e(i, 1 , b) &
133+ + coeffs_e(7 , 4 )* u_e(i, 2 , b) &
134+ + coeffs_e(8 , 4 )* u_e(i, 3 , b) &
135+ + coeffs_e(9 , 4 )* u_e(i, 4 , b)
136+ du(i, j, b) = ffr(j)* (du(i, j, b) - faf(j)* du(i, j - 1 , b))
105137
106138 send_u_e(i, 1 , b) = du(i, n, b)
107139
@@ -110,17 +142,17 @@ attributes(global) subroutine der_univ_dist( &
110142 du(i, j, b) = du(i, j, b) - fbc(j)* du(i, j + 1 , b)
111143 end do
112144 du(i, 1 , b) = last_r* (du(i, 1 , b) - fbc(1 )* du(i, 2 , b))
113- send_u_b (i, 1 , b) = du(i, 1 , b)
145+ send_u_s (i, 1 , b) = du(i, 1 , b)
114146
115147 end subroutine der_univ_dist
116148
117- attributes(global) subroutine der_univ_subs (du , recv_u_b , recv_u_e , &
149+ attributes(global) subroutine der_univ_subs (du , recv_u_s , recv_u_e , &
118150 n , dist_sa , dist_sc )
119151 implicit none
120152
121153 ! Arguments
122154 real (dp), device, intent (out ), dimension (:, :, :) :: du
123- real (dp), device, intent (in ), dimension (:, :, :) :: recv_u_b , recv_u_e
155+ real (dp), device, intent (in ), dimension (:, :, :) :: recv_u_s , recv_u_e
124156 real (dp), device, intent (in ), dimension (:) :: dist_sa, dist_sc
125157 integer , value, intent (in ) :: n
126158
@@ -135,13 +167,11 @@ attributes(global) subroutine der_univ_subs(du, recv_u_b, recv_u_e, &
135167 ur = dist_sc(n)
136168 recp = 1._dp / (1._dp - ur* bl)
137169
138- ! du(i, 1, b) = recp*(du(i, 1, b) - bl*recv_u_b(i, 1, b))
139- ! du(i, n, b) = recp*(du(i, n, b) - ur*recv_u_e(i, 1, b))
140- du_1 = recp* (du(i, 1 , b) - bl* recv_u_b(i, 1 , b))
170+ du_1 = recp* (du(i, 1 , b) - bl* recv_u_s(i, 1 , b))
141171 du_n = recp* (du(i, n, b) - ur* recv_u_e(i, 1 , b))
142172
143173 du(i, 1 , b) = du_1
144- do j = 2 , n- 1
174+ do j = 2 , n - 1
145175 du(i, j, b) = (du(i, j, b) - dist_sa(j)* du_1 - dist_sc(j)* du_n)
146176 end do
147177 du(i, n, b) = du_n
0 commit comments