@@ -20,7 +20,7 @@ function 🦋generate_random!(A, ::Val{SEED} = Val(888)) where {SEED}
20
20
(uv,)
21
21
end
22
22
23
- function 🦋workspace (A, B:: Matrix{T} , U:: Adjoint{T, Matrix{T}} , V:: Matrix{T} , :: Val{SEED} = Val (888 )) where {T, SEED}
23
+ function 🦋workspace (A, b, B:: Matrix{T} , U:: Adjoint{T, Matrix{T}} , V:: Matrix{T} , thread , :: Val{SEED} = Val (888 )) where {T, SEED}
24
24
M = size (A, 1 )
25
25
if (M % 4 != 0 )
26
26
A = pad! (A)
@@ -29,9 +29,10 @@ function 🦋workspace(A, B::Matrix{T}, U::Adjoint{T, Matrix{T}}, V::Matrix{T},
29
29
ws = 🦋generate_random! (copyto! (B, A))
30
30
🦋mul! (copyto! (B, A), ws)
31
31
U, V = materializeUV (B, ws)
32
- F = RecursiveFactorization. lu! (B, Val (false ))
32
+ F = RecursiveFactorization. lu! (B, thread)
33
+ out = similar (b, M)
33
34
34
- U, V, F
35
+ U, V, F, out
35
36
end
36
37
37
38
const butterfly_workspace = 🦋workspace;
@@ -41,14 +42,12 @@ function 🦋mul_level!(A, u, v)
41
42
@assert M == length (u) && N == length (v)
42
43
Mh = M >>> 1
43
44
Nh = N >>> 1
44
- M2 = M - Mh
45
- N2 = N - Nh
46
45
@turbo for n in 1 : Nh
47
46
for m in 1 : Mh
48
47
A11 = A[m, n]
49
- A21 = A[m + M2 , n]
50
- A12 = A[m, n + N2 ]
51
- A22 = A[m + M2 , n + N2 ]
48
+ A21 = A[m + Mh , n]
49
+ A12 = A[m, n + Nh ]
50
+ A22 = A[m + Mh , n + Nh ]
52
51
53
52
T1 = A11 + A12
54
53
T2 = A21 + A22
@@ -60,36 +59,16 @@ function 🦋mul_level!(A, u, v)
60
59
C22 = T3 - T4
61
60
62
61
u1 = u[m]
63
- u2 = u[m + M2 ]
62
+ u2 = u[m + Mh ]
64
63
v1 = v[n]
65
- v2 = v[n + N2 ]
64
+ v2 = v[n + Nh ]
66
65
67
66
A[m, n] = u1 * C11 * v1
68
- A[m + M2 , n] = u2 * C21 * v1
69
- A[m, n + N2 ] = u1 * C12 * v2
70
- A[m + M2 , n + N2 ] = u2 * C22 * v2
67
+ A[m + Mh , n] = u2 * C21 * v1
68
+ A[m, n + Nh ] = u1 * C12 * v2
69
+ A[m + Mh , n + Nh ] = u2 * C22 * v2
71
70
end
72
71
end
73
- #=
74
- if (N % 2 == 1) # N odd
75
- n = N2
76
- for m in 1:M
77
- A[m, n] = u[m] * A[m, n] * v[n]
78
- end
79
- end
80
-
81
- if (M % 2 == 1) # M odd
82
- m = M2
83
- for n in 1:N
84
- A[m, n] = u[m] * A[m, n] * v[n]
85
- end
86
- end
87
-
88
- if (M % 2 == 1) && (N % 2 == 1)
89
- m = M2
90
- n = N2
91
- A[m, n] /= (u[m] * v[n])
92
- end =#
93
72
end
94
73
95
74
function 🦋mul! (A, (uv,))
@@ -98,8 +77,8 @@ function 🦋mul!(A, (uv,))
98
77
Mh = M >>> 1
99
78
100
79
U₁ = @view (uv[1 : Mh])
101
- V₁ = @view (uv[(Mh + 1 ): (2 * Mh )])
102
- U₂ = @view (uv[(1 + 2 * Mh ): (M + Mh)])
80
+ V₁ = @view (uv[(Mh + 1 ): (M )])
81
+ U₂ = @view (uv[(1 + M ): (M + Mh)])
103
82
V₂ = @view (uv[(1 + M + Mh): (2 * M)])
104
83
105
84
🦋mul_level! (@view (A[1 : Mh, 1 : Mh]), U₁, V₁)
0 commit comments