1+ using VectorizedRNG
2+ using LinearAlgebra: Diagonal, I
3+ using LoopVectorization
4+ using RecursiveFactorization
5+
6+ @inline exphalf (x) = exp (x) * oftype (x, 0.5 )
7+ function 🦋! (wv, :: Val{SEED} = Val (888 )) where {SEED}
8+ T = eltype (wv)
9+ mrng = VectorizedRNG. MutableXoshift (SEED)
10+ GC. @preserve mrng begin rand! (exphalf, VectorizedRNG. Xoshift (mrng), wv, static (0 ),
11+ T (- 0.05 ), T (0.1 )) end
12+ end
13+
14+ function 🦋generate! (A, :: Val{SEED} = Val (888 )) where {SEED}
15+ Usz = 2 * size (A, 1 )
16+ Vsz = 2 * size (A, 2 )
17+ uv = similar (A, Usz + Vsz)
18+ 🦋! (uv, Val (SEED))
19+ (uv,)
20+ end
21+
22+ function 🦋workspace (A, :: Val{SEED} = Val (888 )) where {SEED}
23+ A = pad! (A)
24+ B = similar (A);
25+ ws = 🦋generate! (B)
26+ 🦋mul! (copyto! (B, A), ws)
27+ U, V, B = materializeUV (B, ws)
28+ F = RecursiveFactorization. lu! (B, Val (false ))
29+ A, B, U, V, F
30+ end
31+
32+ const butterfly_workspace = 🦋workspace;
33+
34+ function 🦋mul_level! (A, u, v)
35+ # for now, assume...
36+ M, N = size (A)
37+ Ml = M >>> 1
38+ Nl = N >>> 1
39+ Mh = M - Ml
40+ Nh = N - Nl
41+ @turbo for n in 1 : Nl
42+ for m in 1 : Ml
43+ A11 = A[m, n]
44+ A21 = A[m + Mh, n]
45+ A12 = A[m, n + Nh]
46+ A22 = A[m + Mh, n + Nh]
47+
48+ T1 = A11 + A12
49+ T2 = A21 + A22
50+ T3 = A11 - A12
51+ T4 = A21 - A22
52+ C11 = T1 + T2
53+ C21 = T1 - T2
54+ C12 = T3 + T4
55+ C22 = T3 - T4
56+
57+ u1 = u[m]
58+ u2 = u[m + Mh]
59+ v1 = v[n]
60+ v2 = v[n + Nh]
61+
62+ A[m, n] = u1 * C11 * v1
63+ A[m + Mh, n] = u2 * C21 * v1
64+ A[m, n + Nh] = u1 * C12 * v2
65+ A[m + Mh, n + Nh] = u2 * C22 * v2
66+ end
67+ end
68+ end
69+
70+ function 🦋mul! (A, (uv,))
71+ M, N = size (A)
72+ Mh = M >>> 1
73+ Nh = N >>> 1
74+ U₁ = @view (uv[1 : Mh])
75+ U₂ = @view (uv[(1 + Mh + Nh): (2 * Mh + Nh)])
76+ V₁ = @view (uv[(Mh + 1 ): (Mh + Nh)])
77+ V₂ = @view (uv[(1 + 2 * Mh + Nh): (2 * Mh + 2 * Nh)])
78+ 🦋mul_level! (@view (A[1 : Mh, 1 : Nh]), U₁, V₁)
79+ 🦋mul_level! (@view (A[(1 + Mh): M, 1 : Nh]), U₂, V₁)
80+ 🦋mul_level! (@view (A[1 : Mh, (1 + Nh): N]), U₁, V₂)
81+ 🦋mul_level! (@view (A[(1 + Mh): M, (1 + Nh): N]), U₂, V₂)
82+ U = @view (uv[(1 + 2 * Mh + 2 * Nh): (2 * Mh + 2 * Nh + M)])
83+ V = @view (uv[(1 + 2 * Mh + 2 * Nh + M): (2 * Mh + 2 * Nh + M + N)])
84+ 🦋mul_level! (@view (A[1 : M, 1 : N]), U, V)
85+ A
86+ end
87+
88+ function diagnegbottom (x)
89+ N = length (x)
90+ y = similar (x, N >>> 1 )
91+ z = similar (x, N >>> 1 )
92+ for n in 1 : (N >>> 1 )
93+ y[n] = x[n]
94+ end
95+ for n in 1 : (N >>> 1 )
96+ z[n] = x[n + (N >>> 1 )]
97+ end
98+ Diagonal (y), Diagonal (z)
99+ end
100+ 🦋(A, B) = [A B
101+ A - B]
102+
103+ function materializeUV (A, (uv,))
104+ M, N = size (A)
105+ Mh = M >>> 1
106+ Nh = N >>> 1
107+
108+ U₁u, U₁l = diagnegbottom (@view (uv[1 : Mh]))
109+ U₂u, U₂l = diagnegbottom (@view (uv[(1 + Mh + Nh): (2 * Mh + Nh)]))
110+ V₁u, V₁l = diagnegbottom (@view (uv[(Mh + 1 ): (Mh + Nh)]))
111+ V₂u, V₂l = diagnegbottom (@view (uv[(1 + 2 * Mh + Nh): (2 * Mh + 2 * Nh)]))
112+ Uu, Ul = diagnegbottom (@view (uv[(1 + 2 * Mh + 2 * Nh): (2 * Mh + 2 * Nh + M)]))
113+ Vu, Vl = diagnegbottom (@view (uv[(1 + 2 * Mh + 2 * Nh + M): (2 * Mh + 2 * Nh + M + N)]))
114+
115+ Bu2 = [🦋(U₁u, U₁l) 0 * I
116+ 0 * I 🦋(U₂u, U₂l)]
117+ Bu1 = 🦋(Uu, Ul)
118+
119+ Bv2 = [🦋(V₁u, V₁l) 0 * I
120+ 0 * I 🦋(V₂u, V₂l)]
121+ Bv1 = 🦋(Vu, Vl)
122+
123+ (Bu2 * Bu1)' , Bv2 * Bv1, A
124+ end
125+
126+ function pad! (A)
127+ M, N = size (A)
128+ xn = 4 - M % 4
129+ A = [A zeros (N, xn)
130+ zeros (xn, N) I (xn)
131+ ]
132+ A
133+ end
0 commit comments