Skip to content

Commit a018a6d

Browse files
restructure A_mul_B_odd/even_cols to use a loop over A_mul_B_col_J
restructure coefficient matrix of spherical harmonics to include negative modes. Interlaced as follows: [c^{0} c^{-1} c^{1} c^{-2} c^{2} …] to match ApproxFun’s interlacing of Laurent and Fourier modes.
1 parent da24bec commit a018a6d

File tree

6 files changed

+328
-81
lines changed

6 files changed

+328
-81
lines changed

src/SphericalHarmonics/fastplan.jl

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,17 @@ immutable FastSphericalHarmonicPlan{T}
99
end
1010

1111
function FastSphericalHarmonicPlan{T}(A::Matrix{T}, L::Int)
12-
m, n = size(A)
12+
M, N = size(A)
13+
n = (N+1)÷2
1314
RP = RotationPlan(T, n-1)
1415
a1 = A[:,1]
1516
p1 = plan_normleg2cheb(a1)
1617
p2 = plan_normleg12cheb2(a1)
1718
p1inv = plan_cheb2normleg(a1)
1819
p2inv = plan_cheb22normleg1(a1)
1920
B = zeros(A)
20-
Ce = eye(A)
21-
Co = eye(A)
21+
Ce = eye(T, M)
22+
Co = eye(T, M)
2223
BF = Vector{Butterfly{T}}(n-2)
2324
for j = 1:2:n-2
2425
A_mul_B!(Ce, RP.layers[j])
@@ -37,12 +38,47 @@ end
3738
function A_mul_B!(Y::Matrix, FP::FastSphericalHarmonicPlan, X::Matrix)
3839
RP, BF, p1, p2, B = FP.RP, FP.BF, FP.p1, FP.p2, FP.B
3940
fill!(B, zero(eltype(B)))
40-
copy!(B, 1, X, 1, 2size(X, 1))
41-
for j = 3:size(X, 2)
42-
A_mul_B_col_J!(B, BF[j-2], X, j)
41+
M, N = size(X)
42+
copy!(B, 1, X, 1, 3M)
43+
for J = 2:N÷2
44+
A_mul_B_col_J!(B, BF[J-1], X, 2J)
45+
A_mul_B_col_J!(B, BF[J-1], X, 2J+1)
4346
end
44-
A_mul_B_odd_cols!!(Y, p1, B)
45-
A_mul_B_even_cols!!(Y, p2, B)
47+
48+
A_mul_B_col_J!!(Y, p1, B, 1)
49+
for J = 2:4:N
50+
A_mul_B_col_J!!(Y, p2, B, J)
51+
A_mul_B_col_J!!(Y, p2, B, J+1)
52+
end
53+
for J = 4:4:N
54+
A_mul_B_col_J!!(Y, p1, B, J)
55+
A_mul_B_col_J!!(Y, p1, B, J+1)
56+
end
57+
Y
58+
end
59+
60+
function At_mul_B!(Y::Matrix, FP::FastSphericalHarmonicPlan, X::Matrix)
61+
RP, BF, p1inv, p2inv, B = FP.RP, FP.BF, FP.p1inv, FP.p2inv, FP.B
62+
copy!(B, X)
63+
M, N = size(X)
64+
A_mul_B_col_J!!(Y, p1inv, B, 1)
65+
for J = 2:4:N
66+
A_mul_B_col_J!!(Y, p2inv, B, J)
67+
A_mul_B_col_J!!(Y, p2inv, B, J+1)
68+
end
69+
for J = 4:4:N
70+
A_mul_B_col_J!!(Y, p1inv, B, J)
71+
A_mul_B_col_J!!(Y, p1inv, B, J+1)
72+
end
73+
74+
copy!(B, Y)
75+
for J = 2:N÷2
76+
At_mul_B_col_J!(Y, BF[J-1], B, 2J)
77+
At_mul_B_col_J!(Y, BF[J-1], B, 2J+1)
78+
end
79+
Y
4680
end
4781

82+
Ac_mul_B!(Y::Matrix, FP::FastSphericalHarmonicPlan, X::Matrix) = At_mul_B!(Y, FP, X)
83+
4884
allranks(FP::FastSphericalHarmonicPlan) = mapreduce(allranks,vcat,FP.BF)

src/SphericalHarmonics/slowplan.jl

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,15 @@ function RotationPlan{T}(::Type{T}, n::Int)
8484
end
8585

8686
function A_mul_B!(P::RotationPlan, A::AbstractMatrix)
87-
n = length(P.layers)+1
88-
@inbounds for m = n-2:-1:0
87+
M, N = size(A)
88+
@inbounds for m = N÷2-2:-1:0
8989
layer = P.layers[m+1]
90-
for= m+2:2:n
90+
for= 2*(m+2):4:N
9191
@simd for i = 1:length(layer)
9292
G = layer[i]
93+
a1, a2 = A[G.i1,ℓ], A[G.i2,ℓ]
94+
A[G.i1,ℓ] = G.c*a1 + G.s*a2
95+
A[G.i2,ℓ] = G.c*a2 - G.s*a1
9396
a1, a2 = A[G.i1,ℓ+1], A[G.i2,ℓ+1]
9497
A[G.i1,ℓ+1] = G.c*a1 + G.s*a2
9598
A[G.i2,ℓ+1] = G.c*a2 - G.s*a1
@@ -100,12 +103,15 @@ function A_mul_B!(P::RotationPlan, A::AbstractMatrix)
100103
end
101104

102105
function At_mul_B!(P::RotationPlan, A::AbstractMatrix)
103-
n = length(P.layers)+1
104-
@inbounds for m = 0:n-2
106+
M, N = size(A)
107+
@inbounds for m = 0:N÷2-2
105108
layer = P.layers[m+1]
106-
for= m+2:2:n
109+
for= 2*(m+2):4:N
107110
@simd for i = length(layer):-1:1
108111
G = layer[i]
112+
a1, a2 = A[G.i1,ℓ], A[G.i2,ℓ]
113+
A[G.i1,ℓ] = G.c*a1 - G.s*a2
114+
A[G.i2,ℓ] = G.s*a1 + G.c*a2
109115
a1, a2 = A[G.i1,ℓ+1], A[G.i2,ℓ+1]
110116
A[G.i1,ℓ+1] = G.c*a1 - G.s*a2
111117
A[G.i2,ℓ+1] = G.s*a1 + G.c*a2
@@ -128,7 +134,8 @@ immutable SlowSphericalHarmonicPlan{T}
128134
end
129135

130136
function SlowSphericalHarmonicPlan{T}(A::Matrix{T})
131-
m, n = size(A)
137+
M, N = size(A)
138+
n = (N+1)÷2
132139
RP = RotationPlan(T, n-1)
133140
a1 = A[:,1]
134141
p1 = plan_normleg2cheb(a1)
@@ -143,15 +150,32 @@ function A_mul_B!(Y::Matrix, SP::SlowSphericalHarmonicPlan, X::Matrix)
143150
RP, p1, p2, B = SP.RP, SP.p1, SP.p2, SP.B
144151
copy!(B, X)
145152
A_mul_B!(RP, B)
146-
A_mul_B_odd_cols!!(Y, p1, B)
147-
A_mul_B_even_cols!!(Y, p2, B)
153+
M, N = size(X)
154+
A_mul_B_col_J!!(Y, p1, B, 1)
155+
for J = 2:4:N
156+
A_mul_B_col_J!!(Y, p2, B, J)
157+
A_mul_B_col_J!!(Y, p2, B, J+1)
158+
end
159+
for J = 4:4:N
160+
A_mul_B_col_J!!(Y, p1, B, J)
161+
A_mul_B_col_J!!(Y, p1, B, J+1)
162+
end
163+
Y
148164
end
149165

150166
function At_mul_B!(Y::Matrix, SP::SlowSphericalHarmonicPlan, X::Matrix)
151167
RP, p1inv, p2inv, B = SP.RP, SP.p1inv, SP.p2inv, SP.B
152168
copy!(B, X)
153-
A_mul_B_odd_cols!!(Y, p1inv, B)
154-
A_mul_B_even_cols!!(Y, p2inv, B)
169+
M, N = size(X)
170+
A_mul_B_col_J!!(Y, p1inv, B, 1)
171+
for J = 2:4:N
172+
A_mul_B_col_J!!(Y, p2inv, B, J)
173+
A_mul_B_col_J!!(Y, p2inv, B, J+1)
174+
end
175+
for J = 4:4:N
176+
A_mul_B_col_J!!(Y, p1inv, B, J)
177+
A_mul_B_col_J!!(Y, p1inv, B, J+1)
178+
end
155179
At_mul_B!(RP, Y)
156180
end
157181

src/hierarchical.jl

Lines changed: 32 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -11,51 +11,27 @@ end
1111
# A_mul_B!! mutates x while overwriting y. The generic fallback assumes it doesn't mutate x.
1212
A_mul_B!!(y::AbstractVector, P::HierarchicalPlan, x::AbstractVector) = A_mul_B!(y, P, x)
1313
A_mul_B!!(Y::AbstractMatrix, P::HierarchicalPlan, X::AbstractMatrix) = A_mul_B!(Y, P, X)
14-
A_mul_B_odd_cols!!(Y::AbstractMatrix, P::HierarchicalPlan, X::AbstractMatrix) = A_mul_B_odd_cols!(Y, P, X)
15-
A_mul_B_even_cols!!(Y::AbstractMatrix, P::HierarchicalPlan, X::AbstractMatrix) = A_mul_B_even_cols!(Y, P, X)
14+
A_mul_B_col_J!!(Y::AbstractMatrix, P::HierarchicalPlan, X::AbstractMatrix, J::Int) = A_mul_B_col_J!(Y, P, X, J)
1615

1716
# A_mul_B! falls back to the mutating version with a copy.
1817
A_mul_B!(y::AbstractVector, P::HierarchicalPlan, x::AbstractVector) = A_mul_B!!(y, P, copy(x))
1918
A_mul_B!(Y::AbstractMatrix, P::HierarchicalPlan, X::AbstractMatrix) = A_mul_B!!(Y, P, copy(X))
20-
A_mul_B_odd_cols!(Y::AbstractMatrix, P::HierarchicalPlan, X::AbstractMatrix) = A_mul_B_odd_cols!!(Y, P, copy(X))
21-
A_mul_B_even_cols!(Y::AbstractMatrix, P::HierarchicalPlan, X::AbstractMatrix) = A_mul_B_even_cols!!(Y, P, copy(X))
19+
A_mul_B_col_J!(Y::AbstractMatrix, P::HierarchicalPlan, X::AbstractMatrix) = A_mul_B_col_J!!(Y, P, copy(X), J)
2220

23-
function scale_odd_cols!(b::AbstractVector, A::AbstractMatrix)
21+
function scale_col_J!(b::AbstractVector, A::AbstractVecOrMat, J::Int)
2422
m, n = size(A)
25-
@inbounds for j = 1:2:n
26-
@simd for i = 1:m
27-
A[i,j] *= b[i]
28-
end
29-
end
30-
A
31-
end
32-
33-
function scale_odd_cols!(b::Number, A::AbstractMatrix)
34-
m, n = size(A)
35-
@inbounds for j = 1:2:n
36-
@simd for i = 1:m
37-
A[i,j] *= b
38-
end
23+
COLSHIFT = m*(J-1)
24+
@inbounds @simd for i = 1:m
25+
A[i+COLSHIFT] *= b[i]
3926
end
4027
A
4128
end
4229

43-
function scale_even_cols!(b::AbstractVector, A::AbstractMatrix)
30+
function scale_col_J!(b::Number, A::AbstractVecOrMat, J::Int)
4431
m, n = size(A)
45-
@inbounds for j = 2:2:n
46-
@simd for i = 1:m
47-
A[i,j] *= b[i]
48-
end
49-
end
50-
A
51-
end
52-
53-
function scale_even_cols!(b::Number, A::AbstractMatrix)
54-
m, n = size(A)
55-
@inbounds for j = 2:2:n
56-
@simd for i = 1:m
57-
A[i,j] *= b
58-
end
32+
COLSHIFT = m*(J-1)
33+
@inbounds @simd for i = 1:m
34+
A[i+COLSHIFT] *= b
5935
end
6036
A
6137
end
@@ -348,27 +324,23 @@ function A_mul_B!(Y::Matrix, P::ChebyshevToNormalizedLegendrePlan, X::Matrix)
348324
scale!(P.scl, Y)
349325
end
350326

351-
function A_mul_B_odd_cols!!(Y::Matrix, P::NormalizedLegendreToChebyshevPlan, X::Matrix)
327+
function A_mul_B_col_J!!(Y::Matrix, P::NormalizedLegendreToChebyshevPlan, X::Matrix, J::Int)
352328
m, n = size(X)
353-
scale_odd_cols!(P.scl, X)
354-
for j = 1:2:n
355-
A_mul_B!(Y, P.even, X, 1+m*(j-1), 1+m*(j-1), 2, 2)
356-
A_mul_B!(Y, P.odd, X, 2+m*(j-1), 2+m*(j-1), 2, 2)
357-
end
358-
scale_odd_cols!(2/π, Y)
359-
@inbounds @simd for j = 1:2:n
360-
Y[1+m*(j-1)] *= 0.5
361-
end
329+
COLSHIFT = m*(J-1)
330+
scale_col_J!(P.scl, X, J)
331+
A_mul_B!(Y, P.even, X, 1+COLSHIFT, 1+COLSHIFT, 2, 2)
332+
A_mul_B!(Y, P.odd, X, 2+COLSHIFT, 2+COLSHIFT, 2, 2)
333+
scale_col_J!(2/π, Y, J)
334+
@inbounds Y[1+COLSHIFT] *= 0.5
362335
Y
363336
end
364337

365-
function A_mul_B_odd_cols!(Y::Matrix, P::ChebyshevToNormalizedLegendrePlan, X::Matrix)
338+
function A_mul_B_col_J!(Y::Matrix, P::ChebyshevToNormalizedLegendrePlan, X::Matrix, J::Int)
366339
m, n = size(X)
367-
for j = 1:2:n
368-
A_mul_B!(Y, P.even, X, 1+m*(j-1), 1+m*(j-1), 2, 2)
369-
A_mul_B!(Y, P.odd, X, 2+m*(j-1), 2+m*(j-1), 2, 2)
370-
end
371-
scale_odd_cols!(P.scl, Y)
340+
COLSHIFT = m*(J-1)
341+
A_mul_B!(Y, P.even, X, 1+COLSHIFT, 1+COLSHIFT, 2, 2)
342+
A_mul_B!(Y, P.odd, X, 2+COLSHIFT, 2+COLSHIFT, 2, 2)
343+
scale_col_J!(P.scl, Y, J)
372344
end
373345

374346
################################################################################
@@ -468,21 +440,19 @@ function A_mul_B!(Y::Matrix, P::Chebyshev2ToNormalizedLegendre1Plan, X::Matrix)
468440
scale!(P.scl, Y)
469441
end
470442

471-
function A_mul_B_even_cols!!(Y::Matrix, P::NormalizedLegendre1ToChebyshev2Plan, X::Matrix)
443+
function A_mul_B_col_J!!(Y::Matrix, P::NormalizedLegendre1ToChebyshev2Plan, X::Matrix, J::Int)
472444
m, n = size(X)
473-
scale_even_cols!(P.scl, X)
474-
for j = 2:2:n
475-
A_mul_B!(Y, P.even, X, 1+m*(j-1), 1+m*(j-1), 2, 2)
476-
A_mul_B!(Y, P.odd, X, 2+m*(j-1), 2+m*(j-1), 2, 2)
477-
end
445+
COLSHIFT = m*(J-1)
446+
scale_col_J!(P.scl, X, J)
447+
A_mul_B!(Y, P.even, X, 1+COLSHIFT, 1+COLSHIFT, 2, 2)
448+
A_mul_B!(Y, P.odd, X, 2+COLSHIFT, 2+COLSHIFT, 2, 2)
478449
Y
479450
end
480451

481-
function A_mul_B_even_cols!(Y::Matrix, P::Chebyshev2ToNormalizedLegendre1Plan, X::Matrix)
452+
function A_mul_B_col_J!(Y::Matrix, P::Chebyshev2ToNormalizedLegendre1Plan, X::Matrix, J::Int)
482453
m, n = size(X)
483-
for j = 2:2:n
484-
A_mul_B!(Y, P.even, X, 1+m*(j-1), 1+m*(j-1), 2, 2)
485-
A_mul_B!(Y, P.odd, X, 2+m*(j-1), 2+m*(j-1), 2, 2)
486-
end
487-
scale_even_cols!(P.scl, Y)
454+
COLSHIFT = m*(J-1)
455+
A_mul_B!(Y, P.even, X, 1+COLSHIFT, 1+COLSHIFT, 2, 2)
456+
A_mul_B!(Y, P.odd, X, 2+COLSHIFT, 2+COLSHIFT, 2, 2)
457+
scale_col_J!(P.scl, Y, J)
488458
end

test/butterflytests.jl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,28 @@ for n in 7:N
3939
println(norm(w-A[n]\u))
4040
end
4141

42+
N = 10
43+
A = Vector{Matrix{Float64}}(N)
44+
for n in 1:N
45+
A[n] = Float64[1/(i+j-1) for i = 1:2^n,j=1:2^n]
46+
println(n)
47+
end
48+
49+
for n in 7:N
50+
println("N = ", n)
51+
@time B = Butterfly(A[n], n-5)
52+
b = rand(Float64,2^n)./(1:2^n)
53+
u = zero(b)
54+
@time uf = A[n]*b
55+
@time A_mul_B!(u, B, b)
56+
w = zero(b)
57+
@time At_mul_B!(w, B, b)
58+
println(norm(u-uf)/2^n)
59+
println(norm(w-A[n]'b))
60+
println(norm(u-w))
61+
end
62+
63+
4264
N = 10
4365
A = Vector{Matrix{Complex{Float64}}}(N)
4466
for n in 1:N

0 commit comments

Comments
 (0)