@@ -158,23 +158,38 @@ attributes(global) subroutine der_univ_subs(du, recv_u_s, recv_u_e, &
158158
159159 ! Local variables
160160 integer :: i, j, b
161- real (dp) :: ur, bl, recp, du_1, du_n
161+ real (dp) :: ur, bl, recp, du_s, du_e
162162
163163 i = threadIdx% x
164164 b = blockIdx% x
165165
166+ ! A small trick we do here is valid for symmetric Toeplitz matrices.
167+ ! In our case our matrices satisfy this criteria in the (5:n-4) region
168+ ! and as long as a rank has around at least 20 entries the assumptions
169+ ! we make here are perfectly valid.
170+
171+ ! bl is the bottom left entry in the 2x2 matrix
172+ ! ur is the upper right entry in the 2x2 matrix
173+
174+ ! Start
175+ ! At the start we have the 'bl', and assume 'ur'
166176 bl = dist_sa(1 )
167- ur = dist_sc(n )
177+ ur = dist_sa( 1 )
168178 recp = 1._dp / (1._dp - ur* bl)
179+ du_s = recp* (du(i, 1 , b) - bl* recv_u_s(i, 1 , b))
169180
170- du_1 = recp* (du(i, 1 , b) - bl* recv_u_s(i, 1 , b))
171- du_n = recp* (du(i, n, b) - ur* recv_u_e(i, 1 , b))
181+ ! End
182+ ! At the end we have the 'ur', and assume 'bl'
183+ bl = dist_sc(n)
184+ ur = dist_sc(n)
185+ recp = 1._dp / (1._dp - ur* bl)
186+ du_e = recp* (du(i, n, b) - ur* recv_u_e(i, 1 , b))
172187
173- du(i, 1 , b) = du_1
188+ du(i, 1 , b) = du_s
174189 do j = 2 , n - 1
175- du(i, j, b) = (du(i, j, b) - dist_sa(j)* du_1 - dist_sc(j)* du_n )
190+ du(i, j, b) = (du(i, j, b) - dist_sa(j)* du_s - dist_sc(j)* du_e )
176191 end do
177- du(i, n, b) = du_n
192+ du(i, n, b) = du_e
178193
179194 end subroutine der_univ_subs
180195
0 commit comments