Skip to content

Commit 5c35982

Browse files
committed
work on speeding up clenshaw
1 parent 111c7a2 commit 5c35982

File tree

1 file changed

+205
-20
lines changed

1 file changed

+205
-20
lines changed

src/clenshaw.jl

Lines changed: 205 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,33 +20,33 @@ function ClenshawRecurrenceData{T,S}(sp::S, N) where {T,S<:JacobiTriangle}
2020
B = Vector{BandedMatrix{T,Matrix{T}}}(undef, N-1)
2121
C = Vector{BandedMatrix{T,Matrix{T}}}(undef, N-2)
2222
Jx_∞, Jy_∞ = jacobioperators(sp)
23-
Jx, Jy = BandedBlockBandedMatrix(Jx_∞[Block.(1:N), Block.(1:N-1)]'),
24-
BandedBlockBandedMatrix(Jy_∞[Block.(1:N), Block.(1:N-1)]')
23+
Jx, Jy = BandedBlockBandedMatrix(Jx_∞[Block.(1:N), Block.(1:N-1)]),
24+
BandedBlockBandedMatrix(Jy_∞[Block.(1:N), Block.(1:N-1)])
2525

2626

2727
for K = 1:N-1
2828
Ax,Ay = view(Jx,Block(K,K)), view(Jy,Block(K,K))
29-
Bx,By = view(Jx,Block(K,K+1)), view(Jy,Block(K,K+1))
30-
b₂ = By[K,K+1]
29+
Bx,By = view(Jx,Block(K+1,K)), view(Jy,Block(K+1,K))
30+
b₂ = By[K+1,K]
3131

32-
B̃ˣ[K] = BandedMatrix{T}(Zeros(K+1,K), (2,0))
33-
B̃ʸ[K] = BandedMatrix{T}(Zeros(K+1,K), (2,0))
34-
B[K] = BandedMatrix{T}(undef, (K+1,K), (2,0))
32+
B̃ˣ[K] = BandedMatrix{T}(Zeros(K,K+1), (0,2))
33+
B̃ʸ[K] = BandedMatrix{T}(Zeros(K,K+1), (0,2))
34+
B[K] = BandedMatrix{T}(undef, (K,K+1), (0,2))
3535

3636
B̃ˣ[K][band(0)] .= inv.(view(Bx, band(0)))
3737
B̃ˣ[K][end,end] = - inv(b₂) * By[K,K]/Bx[K,K]
3838
B̃ʸ[K][end,end] = inv(b₂)
3939

4040
if K > 1
41-
Cx,Cy = view(Jx,Block(K,K-1)), view(Jy,Block(K,K-1))
42-
C[K-1] = BandedMatrix{T}(undef, (K+1,K-1), (2,0))
43-
B̃ˣ[K][end,end-1] = - inv(b₂) * By[K,K-1]/Bx[K-1,K-1]
44-
C[K-1] .= Mul(B̃ˣ[K] , Cx)
45-
C[K-1] .= Mul(B̃ʸ[K] , Cy) .+ C[K-1]
41+
Cx,Cy = view(Jx,Block(K-1,K)), view(Jy,Block(K-1,K))
42+
C[K-1] = BandedMatrix{T}(undef, (K-1,K+1), (0,2))
43+
B̃ˣ[K][end-1,end] = - inv(b₂) * By[K-1,K]/Bx[K-1,K-1]
44+
C[K-1] .= Mul(Cx , B̃ˣ[K])
45+
C[K-1] .= Mul(Cy, B̃ʸ[K]) .+ C[K-1]
4646
end
4747

48-
B[K] .= Mul(B̃ˣ[K] , Ax)
49-
B[K] .= Mul(B̃ʸ[K] , Ay) .+ B[K]
48+
B[K] .= Mul(Ax, B̃ˣ[K])
49+
B[K] .= Mul(Ay, B̃ʸ[K]) .+ B[K]
5050
end
5151

5252
ClenshawRecurrenceData{T,S}(sp, B̃ˣ, B̃ʸ, B, C)
@@ -67,8 +67,8 @@ subblockbandwidths(::ClenshawRecurrence) = (0,2)
6767

6868
@inline function getblock(U::ClenshawRecurrence{T}, K::Int, J::Int) where T
6969
J == K && return BandedMatrix(Eye{T}(K,K))
70-
J == K+1 && return BandedMatrix(transpose(U.data.B[K] - U.x*U.data.B̃ˣ[K] - U.y*U.data.B̃ʸ[K]))
71-
J == K+2 && return BandedMatrix(transpose(U.data.C[K]))
70+
J == K+1 && return BandedMatrix((U.data.B[K] - U.x*U.data.B̃ˣ[K] - U.y*U.data.B̃ʸ[K]))
71+
J == K+2 && return BandedMatrix((U.data.C[K]))
7272
return BandedMatrix(Zeros{T}(K,J))
7373
end
7474

@@ -85,13 +85,198 @@ function getindex(U::ClenshawRecurrence, i::Int, j::Int)
8585
return v
8686
end
8787

88-
@time A = ClenshawRecurrence(JacobiTriangle(), 10, 0.1, 0.2);
89-
A.data.B[2]
88+
function myclenshaw(C, cfs, sp, N)
89+
Jx_∞, Jy_∞ = jacobioperators(sp)
90+
Jx, Jy = BandedBlockBandedMatrix(Jx_∞[Block.(1:N), Block.(1:N)]),
91+
BandedBlockBandedMatrix(Jy_∞[Block.(1:N), Block.(1:N)])
92+
93+
M = length(C.B)+1
94+
95+
Q = BandedBlockBandedMatrix(Eye{eltype(Jx)}(size(Jx)), blocksizes(Jx), (0,0), (0,0))
96+
B2 = Fill(Q,M) .* view(cfs,Block(M))
97+
B1 = Fill(Q,M-1) .* view(cfs,Block(M-1)) .+ Fill(Jx,M-1) .* (C.B̃ˣ[M-1]*B2) .+ Fill(Jy,M-1) .* (C.B̃ʸ[M-1]*B2) .- C.B[M-1]*B2
98+
for K = M-2:-1:1
99+
B1, B2 = Fill(Q,K) .* view(cfs,Block(K)) .+ Fill(Jx,K) .* (C.B̃ˣ[K]*B1) .+ Fill(Jy,K) .* (C.B̃ʸ[K]*B1) .- C.B[K]*B1 .- C.C[K] * B2 , B1
100+
end
101+
first(B1)
102+
end
103+
104+
105+
using Juno, Atom, Profile
106+
@time C = ClenshawRecurrenceData(JacobiTriangle(), 3)
107+
108+
@time myclenshaw(C, cfs, sp, N)
109+
110+
111+
sum(1:1000)
112+
113+
N = 200
114+
Jx_∞, Jy_∞ = jacobioperators(sp)
115+
@time Jx, Jy = BandedBlockBandedMatrix(Jx_∞[Block.(1:N), Block.(1:N)]),
116+
BandedBlockBandedMatrix(Jy_∞[Block.(1:N), Block.(1:N)])
117+
118+
@time Q = BandedBlockBandedMatrix(Eye{eltype(Jx)}(size(Jx)), blocksizes(Jx), (0,0), (0,0))
119+
@time B2 = Fill(Q,M) .* view(cfs,Block(M))
120+
@time B1 = Fill(Q,M-1) .* view(cfs,Block(M-1)) .+ Fill(Jx,M-1) .* (C.B̃ˣ[M-1]*B2) .+ Fill(Jy,M-1) .* (C.B̃ʸ[M-1]*B2) .- C.B[M-1]*B2
121+
122+
@time Jx + Jy
123+
124+
@time Fill(Q,M-1) .* view(cfs,Block(M-1))
125+
@time Fill(Jx,M-1) .* (C.B̃ˣ[M-1]*B2)
126+
127+
@time (C.B̃ˣ[M-1]*B2)
128+
129+
M = length(C.B)+1
130+
@which BandedBlockBandedMatrix(Zeros{eltype(Jx)}(size(Jx)), blocksizes(Jx), (0,0), (0,0))
131+
@time Q = BandedBlockBandedMatrix(Zeros{eltype(Jx)}(size(Jx)), blocksizes(Jx).block_sizes, (0,0), (0,0))
132+
133+
@which BandedBlockBandedMatrix(Zeros{eltype(Jx)}(size(Jx)), blocksizes(Jx).block_sizes, (0,0), (0,0))
134+
@time Q.data[1,:] .= 1
135+
136+
blockbandwidths(Q)
137+
138+
139+
140+
141+
142+
@time Q = BandedBlockBandedMatrix(Eye{eltype(Jx)}(size(Jx)), blocksizes(Jx), (0,0), (0,0))
143+
@time B2 = Fill(Q,M) .* view(cfs,Block(M))
144+
@time A,B = Fill(Q,M-1) .* view(cfs,Block(M-1)) , Fill(Jx,M-1) .* (C.B̃ˣ[M-1]*B2)
145+
146+
@btime B2 = Fill(Q,M) .* view(cfs,Block(M))
147+
148+
@which Q*cfs[1]
149+
150+
151+
152+
C = similar(B[1])
153+
154+
A, B = A[1] , B[1]
155+
using BenchmarkTools
156+
@btime view(C, Block(200,200)) .= view(B, Block(200,200))
157+
VC, VB = view(C, Block(200,200)) , view(B, Block(200,200))
158+
@btime BandedMatrices._banded_copyto!(VC, VB, L, L)
159+
160+
@btime BandedMatrices.banded_copyto!(VC, VB)
161+
using LazyArrays
162+
L = LazyArrays.MemoryLayout(VC)
163+
LazyArrays.MemoryLayout(VB)
164+
@btime copyto!(BandedMatrices.bandeddata(VC) , BandedMatrices.bandeddata(VB))
165+
166+
167+
168+
bandwidths(view(C, Block(200,200)))
169+
170+
bandwidths(view(B, Block(200,200)))
171+
172+
using BlockBandedMatrices
173+
@which BlockBandedMatrices.blockbanded_copyto!(C, B)
174+
blockbanded_axpy!(one(T), A, dest)
175+
176+
@which broadcast(+,A[1],B[1])
177+
178+
A,B
179+
180+
@time Fill(Q,M-1) .* view(cfs,Block(M-1))
181+
182+
using InteractiveUtils
183+
@time Q*1.0
184+
185+
@time Q.data*1.0
186+
187+
@time 1.0*Q
188+
189+
Fill(Q,M-1) .* view(cfs,Block(M-1))
190+
191+
192+
@time A + B
193+
194+
195+
for K = M-2:-1:1
196+
B1, B2 = Fill(Q,K) .* view(cfs,Block(K)) .+ Fill(Jx,K) .* (C.B̃ˣ[K]*B1) .+ Fill(Jy,K) .* (C.B̃ʸ[K]*B1) .- C.B[K]*B1 .- C.C[K] * B2 , B1
197+
end
198+
first(B1)
199+
200+
201+
202+
203+
@time Multiplication(Fun(Triangle(), cfs), JacobiTriangle())
204+
Profile.clear()
205+
C.C
206+
207+
B1 = Fill(Q,M) .* view(cfs,Block(M))
208+
209+
210+
Q = BandedBlockBandedMatrix(Eye{eltype(Jx)}(size(Jx)), blocksizes(Jx), (0,0), (0,0))
211+
rhs = PseudoBlockArray(Fill(Q, length(cfs)) .* cfs, 1:M)
212+
213+
214+
215+
216+
217+
cfs[1]*I + cfs[2] * (3Jx - I) .+ cfs[3] * (2Jy + Jx -I)
218+
219+
Fun(Triangle(),[0.1])(0.1,0.2)
220+
Fun(Triangle(),[0,1.])(0.1,0.0)
221+
Fun(Triangle(),[0,0,1.0])(0.1,0.1)
222+
223+
2y + x- 1
224+
225+
3x-1
226+
227+
-0.2 * 2
228+
229+
230+
231+
Fill(Q,K) .* cfs[Block(1)]
232+
233+
C.B̃ˣ[K]*view(cfs,Block(K+1))
234+
235+
236+
c = randn(2,2)
237+
238+
x = [randn(2), randn(2)]
239+
240+
241+
Fill(c,2) .* x
242+
Diagonal([c,c])*x
243+
244+
c*x[1]
245+
246+
c*x
247+
248+
249+
250+
@time C.B̃ˣ[K]*view(cfs,Block(K+1))
251+
@time Jx*0.1
252+
253+
254+
255+
90256
@time M = BandedBlockBandedMatrix(A)
91257

258+
v = randn(55)
259+
Fun(JacobiTriangle(), v)(0.1,0.2)
260+
UpperTriangular(M) \ v
261+
262+
ClenshawRecurrence(JacobiTriangle(), 2, 0.1,0.2)
263+
264+
265+
266+
267+
268+
269+
270+
271+
272+
A.data.B[2]
273+
274+
275+
M * [[1,2],[3,4],[5,6]]
276+
92277
Jx_∞, Jy_∞ = jacobioperators(sp)
93-
Jx, Jy = BandedBlockBandedMatrix(Jx_∞[Block.(1:N), Block.(1:N-1)]'),
94-
BandedBlockBandedMatrix(Jy_∞[Block.(1:N), Block.(1:N-1)]')
278+
Jx, Jy = BandedBlockBandedMatrix(Jx_∞[Block.(1:N), Block.(1:N-1)]),
279+
BandedBlockBandedMatrix(Jy_∞[Block.(1:N), Block.(1:N-1)])
95280
Ax, Ay = Jx[Block(K,K)], Jy[Block(K,K)]
96281
= [Matrix(A.data.B̃ˣ[2]) Matrix(A.data.B̃ʸ[2])]
97282

0 commit comments

Comments
 (0)