1
- function alternatingSum (x:: AbstractVector{T} ) where T
2
- y = x[1 ]
3
- @turbo for i in 2 : length (x)
4
- y += (x[i] * convert (T,(2 * (i % 2 ) - 1 )))
5
- end
6
- y
7
- end
8
-
9
1
fft! (:: AbstractVector{T} , :: AbstractVector{T} , :: Int , :: Int , :: Direction , :: AbstractFFTType , :: CallGraph{T} , :: Int ) where {T} = nothing
10
2
11
3
@inline function direction_sign (d:: Direction )
@@ -31,56 +23,21 @@ function fft!(out::AbstractVector{T}, in::AbstractVector{U}, start_out::Int, sta
31
23
w1 = convert (T, cispi (direction_sign (d)* 2 / N))
32
24
wj1 = one (T)
33
25
tmp = g. workspace[idx]
34
- for j1 in 0 : N1- 1
26
+ @inbounds for j1 in 0 : N1- 1
35
27
wk2 = wj1
36
28
g (tmp, in, N2* j1+ 1 , start_in + j1* s_in, d, right. type, right_idx)
37
- j1 > 0 && for k2 in 1 : N2- 1
29
+ j1 > 0 && @inbounds for k2 in 1 : N2- 1
38
30
tmp[N2* j1 + k2 + 1 ] *= wk2
39
31
wk2 *= wj1
40
32
end
41
33
wj1 *= w1
42
34
end
43
35
44
- for k2 in 0 : N2- 1
36
+ @inbounds for k2 in 0 : N2- 1
45
37
g (out, tmp, start_out + k2* s_out, k2+ 1 , d, left. type, left_idx)
46
38
end
47
39
end
48
40
49
- function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , start_out:: Int , start_in:: Int , d:: Direction , :: Pow2FFT , g:: CallGraph{T} , idx:: Int ) where {T,U}
50
- root = g[idx]
51
- N = root. sz
52
- s_in = root. s_in
53
- s_out = root. s_out
54
- fft_pow2! (out, in, N, start_out, s_out, start_in, s_in, d)
55
- end
56
-
57
- """
58
- Power of 2 FFT in place, complex
59
-
60
- """
61
- function fft_pow2! (out:: AbstractVector{T} , in:: AbstractVector{U} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d:: Direction ) where {T, U}
62
- if N == 2
63
- out[start_out] = in[start_in] + in[start_in + stride_in]
64
- out[start_out + stride_out] = in[start_in] - in[start_in + stride_in]
65
- return
66
- end
67
- m = N ÷ 2
68
-
69
- fft_pow2! (out, in, m, start_out , stride_out, start_in , stride_in* 2 , d)
70
- fft_pow2! (out, in, m, start_out + m* stride_out, stride_out, start_in + stride_in, stride_in* 2 , d)
71
-
72
- w1 = convert (T, cispi (direction_sign (d)* 2 / N))
73
- wj = one (T)
74
- @inbounds for j in 0 : m- 1
75
- j1_out = start_out + j* stride_out
76
- j2_out = start_out + (j+ m)* stride_out
77
- out_j = out[j1_out]
78
- out[j1_out] = out_j + wj* out[j2_out]
79
- out[j2_out] = out_j - wj* out[j2_out]
80
- wj *= w1
81
- end
82
- end
83
-
84
41
function fft_dft! (out:: AbstractVector{T} , in:: AbstractVector{T} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d:: Direction ) where {T}
85
42
tmp = in[start_in]
86
43
@inbounds for j in 1 : N- 1
131
88
function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , start_out:: Int , start_in:: Int , d:: Direction , :: DFT , g:: CallGraph{T} , idx:: Int ) where {T,U}
132
89
root = g[idx]
133
90
fft_dft! (out, in, root. sz, start_out, root. s_out, start_in, root. s_in, d)
134
- end
91
+ end
92
+
93
+ """
94
+ Power of 2 FFT in place
95
+
96
+ """
97
+ function fft_pow2! (out:: AbstractVector{T} , in:: AbstractVector{U} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d:: Direction ) where {T, U}
98
+ if N == 2
99
+ out[start_out] = in[start_in] + in[start_in + stride_in]
100
+ out[start_out + stride_out] = in[start_in] - in[start_in + stride_in]
101
+ return
102
+ end
103
+ m = N ÷ 2
104
+
105
+ fft_pow2! (out, in, m, start_out , stride_out, start_in , stride_in* 2 , d)
106
+ fft_pow2! (out, in, m, start_out + m* stride_out, stride_out, start_in + stride_in, stride_in* 2 , d)
107
+
108
+ w1 = convert (T, cispi (direction_sign (d)* 2 / N))
109
+ wj = one (T)
110
+ @inbounds for j in 0 : m- 1
111
+ j1_out = start_out + j* stride_out
112
+ j2_out = start_out + (j+ m)* stride_out
113
+ out_j = out[j1_out]
114
+ out[j1_out] = out_j + wj* out[j2_out]
115
+ out[j2_out] = out_j - wj* out[j2_out]
116
+ wj *= w1
117
+ end
118
+ end
119
+
120
+ function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , start_out:: Int , start_in:: Int , d:: Direction , :: Pow2FFT , g:: CallGraph{T} , idx:: Int ) where {T,U}
121
+ root = g[idx]
122
+ N = root. sz
123
+ s_in = root. s_in
124
+ s_out = root. s_out
125
+ fft_pow2! (out, in, N, start_out, s_out, start_in, s_in, d)
126
+ end
127
+
128
+ """
129
+ Power of 4 FFT in place
130
+
131
+ """
132
+ function fft_pow4! (out:: AbstractVector{T} , in:: AbstractVector{U} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d:: Direction ) where {T, U}
133
+ ds = direction_sign (d)
134
+ plusi = ds* 1im
135
+ minusi = ds*- 1im
136
+ if N == 4
137
+ out[start_out + 0 ] = in[start_in] + in[start_in + stride_in] + in[start_in + 2 * stride_in] + in[start_in + 3 * stride_in]
138
+ out[start_out + stride_out] = in[start_in] + in[start_in + stride_in]* plusi - in[start_in + 2 * stride_in] + in[start_in + 3 * stride_in]* minusi
139
+ out[start_out + 2 * stride_out] = in[start_in] - in[start_in + stride_in] + in[start_in + 2 * stride_in] - in[start_in + 3 * stride_in]
140
+ out[start_out + 3 * stride_out] = in[start_in] + in[start_in + stride_in]* minusi - in[start_in + 2 * stride_in] + in[start_in + 3 * stride_in]* plusi
141
+ return
142
+ end
143
+ m = N ÷ 4
144
+
145
+ @muladd fft_pow4! (out, in, m, start_out , stride_out, start_in , stride_in* 4 , d)
146
+ @muladd fft_pow4! (out, in, m, start_out + m* stride_out, stride_out, start_in + stride_in, stride_in* 4 , d)
147
+ @muladd fft_pow4! (out, in, m, start_out + 2 * m* stride_out, stride_out, start_in + 2 * stride_in, stride_in* 4 , d)
148
+ @muladd fft_pow4! (out, in, m, start_out + 3 * m* stride_out, stride_out, start_in + 3 * stride_in, stride_in* 4 , d)
149
+
150
+ w1 = convert (T, cispi (direction_sign (d)* 2 / N))
151
+ wj = one (T)
152
+
153
+ w1 = convert (T, cispi (ds* 2 / N))
154
+ w2 = convert (T, cispi (ds* 4 / N))
155
+ w3 = convert (T, cispi (ds* 6 / N))
156
+ wk1 = wk2 = wk3 = one (T)
157
+
158
+ @inbounds for k in 0 : m- 1
159
+ @muladd k0 = start_out + k* stride_out
160
+ @muladd k1 = start_out + (k+ m)* stride_out
161
+ @muladd k2 = start_out + (k+ 2 * m)* stride_out
162
+ @muladd k3 = start_out + (k+ 3 * m)* stride_out
163
+ y_k0, y_k1, y_k2, y_k3 = out[k0], out[k1], out[k2], out[k3]
164
+ @muladd out[k0] = (y_k0 + y_k2* wk2) + (y_k1* wk1 + y_k3* wk2)
165
+ @muladd out[k1] = (y_k0 - y_k2* wk2) + (y_k1* wk1 - y_k3* wk3) * plusi
166
+ @muladd out[k2] = (y_k0 + y_k2* wk2) - (y_k1* wk1 + y_k3* wk3)
167
+ @muladd out[k3] = (y_k0 - y_k2* wk2) + (y_k1* wk1 - y_k3* wk3) * minusi
168
+ wk1 *= w1
169
+ wk2 *= w2
170
+ wk3 *= w3
171
+ end
172
+ end
173
+
174
+ function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , start_out:: Int , start_in:: Int , d:: Direction , :: Pow4FFT , g:: CallGraph{T} , idx:: Int ) where {T,U}
175
+ root = g[idx]
176
+ N = root. sz
177
+ s_in = root. s_in
178
+ s_out = root. s_out
179
+ fft_pow4! (out, in, N, start_out, s_out, start_in, s_in, d)
180
+ end
0 commit comments