Skip to content

Commit 280b169

Browse files
fixes
1 parent 9917b57 commit 280b169

File tree

3 files changed

+38
-41
lines changed

3 files changed

+38
-41
lines changed

Project.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588"
1010
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
1111
SparseBandedMatrices = "bd59d7e1-4699-4102-944e-d05209cb92aa"
1212
StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da"
13-
TriangularSolve = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf"
1413
VectorizedRNG = "33b4df10-0173-11e9-2a0c-851a7edac40e"
1514

1615
[compat]
@@ -20,7 +19,6 @@ Polyester = "0.3.2,0.4.1, 0.5, 0.6, 0.7"
2019
PrecompileTools = "1"
2120
SparseBandedMatrices = "1"
2221
StrideArraysCore = "0.5.5"
23-
TriangularSolve = "0.2"
2422
VectorizedRNG = "0.2.25"
2523
julia = "1.10"
2624

src/butterflylu.jl

Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ end
2020
struct 🦋workspace{T}
2121
A::Matrix{T}
2222
b::Vector{T}
23-
B::Matrix{T}
2423
ws::Vector{T}
2524
U::Matrix{T}
2625
V::Matrix{T}
@@ -33,18 +32,17 @@ struct 🦋workspace{T}
3332
xn = 4 - M % 4
3433
b = [b; rand(xn)]
3534
end
36-
B = similar(A)
3735
U, V = (similar(A), similar(A))
3836
ws = 🦋generate_random!(A)
39-
new{eltype(A)}(A, b, B, ws, U, V, out)
37+
materializeUV(U, V, ws)
38+
new{eltype(A)}(A, b, ws, U, V, out)
4039
end
4140
end
4241

4342
function 🦋lu!(workspace::🦋workspace, M, thread)
44-
(;A, b, B, ws, U, V, out) = workspace
45-
🦋mul!(copyto!(B, A), ws)
46-
materializeUV(U, V, ws)
47-
F = RecursiveFactorization.lu!(B, Val(false), thread)
43+
(;A, b, ws, U, V, out) = workspace
44+
🦋mul!(A, ws)
45+
F = RecursiveFactorization.lu!(A, Val(false), thread)
4846
sol = V * (F \ (U' * b))
4947
out .= @view sol[1:M]
5048
out
@@ -55,14 +53,14 @@ const butterfly_workspace = 🦋workspace;
5553
function 🦋mul_level!(A, u, v)
5654
M, N = size(A)
5755
@assert M == length(u) && N == length(v)
58-
Mh = M >>> 1
59-
Nh = N >>> 1
60-
@turbo for n in 1 : Nh
61-
for m in 1 : Mh
56+
M_half = M >>> 1
57+
N_half = N >>> 1
58+
@turbo for n in 1 : N_half
59+
for m in 1 : M_half
6260
A11 = A[m, n]
63-
A21 = A[m + Mh, n]
64-
A12 = A[m, n + Nh]
65-
A22 = A[m + Mh, n + Nh]
61+
A21 = A[m + M_half, n]
62+
A12 = A[m, n + N_half]
63+
A22 = A[m + M_half, n + N_half]
6664

6765
T1 = A11 + A12
6866
T2 = A21 + A22
@@ -74,32 +72,32 @@ function 🦋mul_level!(A, u, v)
7472
C22 = T3 - T4
7573

7674
u1 = u[m]
77-
u2 = u[m + Mh]
75+
u2 = u[m + M_half]
7876
v1 = v[n]
79-
v2 = v[n + Nh]
77+
v2 = v[n + N_half]
8078

8179
A[m, n] = u1 * C11 * v1
82-
A[m + Mh, n] = u2 * C21 * v1
83-
A[m, n + Nh] = u1 * C12 * v2
84-
A[m + Mh, n + Nh] = u2 * C22 * v2
80+
A[m + M_half, n] = u2 * C21 * v1
81+
A[m, n + N_half] = u1 * C12 * v2
82+
A[m + M_half, n + N_half] = u2 * C22 * v2
8583
end
8684
end
8785
end
8886

8987
function 🦋mul!(A, uv)
9088
M, N = size(A)
9189
@assert M == N
92-
Mh = M >>> 1
90+
M_half = M >>> 1
9391

94-
U₁ = @view(uv[1:Mh])
95-
V₁ = @view(uv[(Mh + 1):(M)])
96-
U₂ = @view(uv[(1 + M):(M + Mh)])
97-
V₂ = @view(uv[(1 + M + Mh):(2 * M)])
92+
U₁ = @view(uv[1:M_half])
93+
V₁ = @view(uv[(M_half + 1):(M)])
94+
U₂ = @view(uv[(1 + M):(M + M_half)])
95+
V₂ = @view(uv[(1 + M + M_half):(2 * M)])
9896

99-
🦋mul_level!(@view(A[1:Mh, 1:Mh]), U₁, V₁)
100-
🦋mul_level!(@view(A[Mh + 1:M, 1:Mh]), U₂, V₁)
101-
🦋mul_level!(@view(A[1:Mh, Mh + 1:M]), U₁, V₂)
102-
🦋mul_level!(@view(A[Mh + 1:M, Mh + 1:M]), U₂, V₂)
97+
🦋mul_level!(@view(A[1:M_half, 1:M_half]), U₁, V₁)
98+
🦋mul_level!(@view(A[M_half + 1:M, 1:M_half]), U₂, V₁)
99+
🦋mul_level!(@view(A[1:M_half, M_half + 1:M]), U₁, V₂)
100+
🦋mul_level!(@view(A[M_half + 1:M, M_half + 1:M]), U₂, V₂)
103101

104102
U = @view(uv[(1 + 2 * M):(3 * M)])
105103
V = @view(uv[(1 + 3 * M):(4 * M)])
@@ -128,7 +126,7 @@ function 🦋!(C::SparseBandedMatrix, A::Diagonal, B::Diagonal)
128126
C
129127
end
130128

131-
function 🦋2!(C, A::Diagonal, B::Diagonal)
129+
function 🦋!(C, A::Diagonal, B::Diagonal)
132130
@assert size(A) == size(B)
133131
A1 = size(A, 1)
134132

@@ -144,27 +142,27 @@ end
144142

145143
function materializeUV(U, V, uv)
146144
M = size(U, 1)
147-
Mh = M >>> 1
145+
M_half = M >>> 1
148146

149-
U₁u, U₁l = diagnegbottom(@view(uv[1:Mh])) #Mh
150-
U₂u, U₂l = diagnegbottom(@view(uv[(1 + 2 * Mh):(M + Mh)])) #Mh
151-
V₁u, V₁l = diagnegbottom(@view(uv[(Mh + 1):(2 * Mh)])) #Mh
152-
V₂u, V₂l = diagnegbottom(@view(uv[(1 + 3 * Mh):(2 * Mh + M)])) #Mh
147+
U₁u, U₁l = diagnegbottom(@view(uv[1:M_half])) #M_half
148+
U₂u, U₂l = diagnegbottom(@view(uv[(1 + 2 * M_half):(M + M_half)])) #M_half
149+
V₁u, V₁l = diagnegbottom(@view(uv[(M_half + 1):(2 * M_half)])) #M_half
150+
V₂u, V₂l = diagnegbottom(@view(uv[(1 + 3 * M_half):(2 * M_half + M)])) #M_half
153151
Uu, Ul = diagnegbottom(@view(uv[(1 + 2 * M):(3 * M)])) #M
154152
Vu, Vl = diagnegbottom(@view(uv[(1 + 3 * M):(4 * M)])) #M
155153

156154
Bu2 = SparseBandedMatrix{typeof(uv[1])}(undef, M, M)
157155

158-
🦋2!(view(Bu2, 1 : Mh, 1 : Mh), U₁u, U₁l)
159-
🦋2!(view(Bu2, Mh + 1: M, Mh + 1: M), U₂u, U₂l)
156+
🦋!(view(Bu2, 1 : M_half, 1 : M_half), U₁u, U₁l)
157+
🦋!(view(Bu2, M_half + 1: M, M_half + 1: M), U₂u, U₂l)
160158

161159
Bu1 = SparseBandedMatrix{typeof(uv[1])}(undef, M, M)
162160
🦋!(Bu1, Uu, Ul)
163161

164162
Bv2 = SparseBandedMatrix{typeof(uv[1])}(undef, M, M)
165163

166-
🦋2!(view(Bv2, 1 : Mh, 1 : Mh), V₁u, V₁l)
167-
🦋2!(view(Bv2, Mh + 1: M, Mh + 1: M), V₂u, V₂l)
164+
🦋!(view(Bv2, 1 : M_half, 1 : M_half), V₁u, V₁l)
165+
🦋!(view(Bv2, M_half + 1: M, M_half + 1: M), V₂u, V₂l)
168166

169167
Bv1 = SparseBandedMatrix{typeof(uv[1])}(undef, M, M)
170168
🦋!(Bv1, Vu, Vl)

test/runtests.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@ end
8181
for i in 790 : 810
8282
A = wilkinson(i)
8383
b = rand(i)
84-
ws = RecursiveFactorization.🦋workspace(A, b)
84+
ws = RecursiveFactorization.🦋workspace(copy(A), copy(b))
8585
out = RecursiveFactorization.🦋lu!(ws, i, Val(true))
8686
@test norm(A * out .- b) <= 1e-10
8787
end
8888
end
89+

0 commit comments

Comments
 (0)