1
+ using VectorizedRNG
2
+ using LinearAlgebra: Diagonal, I
3
+ using LoopVectorization
4
+ using RecursiveFactorization
5
+
6
+ @inline exphalf (x) = exp (x) * oftype (x, 0.5 )
7
+ function 🦋! (wv, :: Val{SEED} = Val (888 )) where {SEED}
8
+ T = eltype (wv)
9
+ mrng = VectorizedRNG. MutableXoshift (SEED)
10
+ GC. @preserve mrng begin rand! (exphalf, VectorizedRNG. Xoshift (mrng), wv, static (0 ),
11
+ T (- 0.05 ), T (0.1 )) end
12
+ end
13
+
14
+ function 🦋generate! (A, :: Val{SEED} = Val (888 )) where {SEED}
15
+ Usz = 2 * size (A, 1 )
16
+ Vsz = 2 * size (A, 2 )
17
+ uv = similar (A, Usz + Vsz)
18
+ 🦋! (uv, Val (SEED))
19
+ (uv,)
20
+ end
21
+
22
+ function 🦋workspace (A, :: Val{SEED} = Val (888 )) where {SEED}
23
+ A = pad! (A)
24
+ B = similar (A);
25
+ ws = 🦋generate! (B)
26
+ 🦋mul! (copyto! (B, A), ws)
27
+ U, V, B = materializeUV (B, ws)
28
+ F = RecursiveFactorization. lu! (B, Val (false ))
29
+ A, B, U, V, F
30
+ end
31
+
32
+ const butterfly_workspace = 🦋workspace;
33
+
34
+ function 🦋mul_level! (A, u, v)
35
+ # for now, assume...
36
+ M, N = size (A)
37
+ Ml = M >>> 1
38
+ Nl = N >>> 1
39
+ Mh = M - Ml
40
+ Nh = N - Nl
41
+ @turbo for n in 1 : Nl
42
+ for m in 1 : Ml
43
+ A11 = A[m, n]
44
+ A21 = A[m + Mh, n]
45
+ A12 = A[m, n + Nh]
46
+ A22 = A[m + Mh, n + Nh]
47
+
48
+ T1 = A11 + A12
49
+ T2 = A21 + A22
50
+ T3 = A11 - A12
51
+ T4 = A21 - A22
52
+ C11 = T1 + T2
53
+ C21 = T1 - T2
54
+ C12 = T3 + T4
55
+ C22 = T3 - T4
56
+
57
+ u1 = u[m]
58
+ u2 = u[m + Mh]
59
+ v1 = v[n]
60
+ v2 = v[n + Nh]
61
+
62
+ A[m, n] = u1 * C11 * v1
63
+ A[m + Mh, n] = u2 * C21 * v1
64
+ A[m, n + Nh] = u1 * C12 * v2
65
+ A[m + Mh, n + Nh] = u2 * C22 * v2
66
+ end
67
+ end
68
+ end
69
+
70
+ function 🦋mul! (A, (uv,))
71
+ M, N = size (A)
72
+ Mh = M >>> 1
73
+ Nh = N >>> 1
74
+ U₁ = @view (uv[1 : Mh])
75
+ U₂ = @view (uv[(1 + Mh + Nh): (2 * Mh + Nh)])
76
+ V₁ = @view (uv[(Mh + 1 ): (Mh + Nh)])
77
+ V₂ = @view (uv[(1 + 2 * Mh + Nh): (2 * Mh + 2 * Nh)])
78
+ 🦋mul_level! (@view (A[1 : Mh, 1 : Nh]), U₁, V₁)
79
+ 🦋mul_level! (@view (A[(1 + Mh): M, 1 : Nh]), U₂, V₁)
80
+ 🦋mul_level! (@view (A[1 : Mh, (1 + Nh): N]), U₁, V₂)
81
+ 🦋mul_level! (@view (A[(1 + Mh): M, (1 + Nh): N]), U₂, V₂)
82
+ U = @view (uv[(1 + 2 * Mh + 2 * Nh): (2 * Mh + 2 * Nh + M)])
83
+ V = @view (uv[(1 + 2 * Mh + 2 * Nh + M): (2 * Mh + 2 * Nh + M + N)])
84
+ 🦋mul_level! (@view (A[1 : M, 1 : N]), U, V)
85
+ A
86
+ end
87
+
88
+ function diagnegbottom (x)
89
+ N = length (x)
90
+ y = similar (x, N >>> 1 )
91
+ z = similar (x, N >>> 1 )
92
+ for n in 1 : (N >>> 1 )
93
+ y[n] = x[n]
94
+ end
95
+ for n in 1 : (N >>> 1 )
96
+ z[n] = x[n + (N >>> 1 )]
97
+ end
98
+ Diagonal (y), Diagonal (z)
99
+ end
100
+ 🦋(A, B) = [A B
101
+ A - B]
102
+
103
+ function materializeUV (A, (uv,))
104
+ M, N = size (A)
105
+ Mh = M >>> 1
106
+ Nh = N >>> 1
107
+
108
+ U₁u, U₁l = diagnegbottom (@view (uv[1 : Mh]))
109
+ U₂u, U₂l = diagnegbottom (@view (uv[(1 + Mh + Nh): (2 * Mh + Nh)]))
110
+ V₁u, V₁l = diagnegbottom (@view (uv[(Mh + 1 ): (Mh + Nh)]))
111
+ V₂u, V₂l = diagnegbottom (@view (uv[(1 + 2 * Mh + Nh): (2 * Mh + 2 * Nh)]))
112
+ Uu, Ul = diagnegbottom (@view (uv[(1 + 2 * Mh + 2 * Nh): (2 * Mh + 2 * Nh + M)]))
113
+ Vu, Vl = diagnegbottom (@view (uv[(1 + 2 * Mh + 2 * Nh + M): (2 * Mh + 2 * Nh + M + N)]))
114
+
115
+ Bu2 = [🦋(U₁u, U₁l) 0 * I
116
+ 0 * I 🦋(U₂u, U₂l)]
117
+ Bu1 = 🦋(Uu, Ul)
118
+
119
+ Bv2 = [🦋(V₁u, V₁l) 0 * I
120
+ 0 * I 🦋(V₂u, V₂l)]
121
+ Bv1 = 🦋(Vu, Vl)
122
+
123
+ (Bu2 * Bu1)' , Bv2 * Bv1, A
124
+ end
125
+
126
+ function pad! (A)
127
+ M, N = size (A)
128
+ xn = 4 - M % 4
129
+ A = [A zeros (N, xn)
130
+ zeros (xn, N) I (xn)
131
+ ]
132
+ A
133
+ end
0 commit comments