@@ -4,6 +4,8 @@ fft!(::AbstractVector{T}, ::AbstractVector{T}, ::Int, ::Int, ::Direction, ::Abst
44 Int (d)
55end
66
7+ @inline _conj (w:: Complex , d:: Direction ) = ifelse (direction_sign (d) === 1 , w, conj (w))
8+
79function (g:: CallGraph{T} )(out:: AbstractVector{T} , in:: AbstractVector{U} , start_out:: Int , start_in:: Int , v:: Direction , t:: FFTEnum , idx:: Int ) where {T,U}
810 fft! (out, in, start_out, start_in, v, t, g, idx)
911end
@@ -52,7 +54,7 @@ function fft!(out::AbstractVector{T}, in::AbstractVector{U}, start_out::Int, sta
5254 s_in = root. s_in
5355 s_out = root. s_out
5456
55- w1 = convert (T, cispi ( direction_sign (d) * 2 / N) )
57+ w1 = _conj (root . w, d )
5658 wj1 = one (T)
5759 tmp = g. workspace[idx]
5860 @inbounds for j1 in 0 : N1- 1
@@ -82,17 +84,17 @@ Discrete Fourier Transform, O(N^2) algorithm, in place.
8284`stride_out`: Stride of the output vector
8385`start_in`: Index of the first element of the input vector
8486`stride_in`: Stride of the input vector
85- `d `: Direction of the transform
87+ `w `: The value `cispi(direction_sign(d) * 2 / N)`
8688
8789"""
88- function fft_dft! (out:: AbstractVector{T} , in:: AbstractVector{T} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d :: Direction ) where {T}
90+ function fft_dft! (out:: AbstractVector{T} , in:: AbstractVector{T} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , w :: T ) where {T}
8991 tmp = in[start_in]
9092 @inbounds for j in 1 : N- 1
9193 tmp += in[start_in + j* stride_in]
9294 end
9395 out[start_out] = tmp
9496
95- wk = wkn = w = convert (T, cispi ( direction_sign (d) * 2 / N))
97+ wk = wkn = w
9698 @inbounds for d in 1 : N- 1
9799 tmp = in[start_in]
98100 @inbounds for k in 1 : N- 1
@@ -105,7 +107,7 @@ function fft_dft!(out::AbstractVector{T}, in::AbstractVector{T}, N::Int, start_o
105107 end
106108end
107109
108- function fft_dft! (out:: AbstractVector{Complex{T}} , in:: AbstractVector{T} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d :: Direction ) where {T<: Real }
110+ function fft_dft! (out:: AbstractVector{Complex{T}} , in:: AbstractVector{T} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , w :: Complex{T} ) where {T<: Real }
109111 halfN = N÷ 2
110112
111113 tmp = Complex {T} (in[start_in])
@@ -114,7 +116,7 @@ function fft_dft!(out::AbstractVector{Complex{T}}, in::AbstractVector{T}, N::Int
114116 end
115117 out[start_out] = tmp
116118
117- wk = wkn = w = convert (Complex{T}, cispi ( direction_sign (d) * 2 / N))
119+ wk = wkn = w
118120 @inbounds for d in 1 : halfN
119121 tmp = Complex {T} (in[start_in])
120122 @inbounds for k in 1 : N- 1
129131
130132function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , start_out:: Int , start_in:: Int , d:: Direction , :: DFT , g:: CallGraph{T} , idx:: Int ) where {T,U}
131133 root = g[idx]
132- fft_dft! (out, in, root. sz, start_out, root. s_out, start_in, root. s_in, d )
134+ fft_dft! (out, in, root. sz, start_out, root. s_out, start_in, root. s_in, _conj (root . w, d) )
133135end
134136
135137"""
@@ -144,29 +146,28 @@ Power of 2 FFT, in place
144146`stride_out`: Stride of the output vector
145147`start_in`: Index of the first element of the input vector
146148`stride_in`: Stride of the input vector
147- `d `: Direction of the transform
149+ `w `: The value `cispi(direction_sign(d) * 2 / N)`
148150
149151"""
150- function fft_pow2! (out:: AbstractVector{T} , in:: AbstractVector{U} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d :: Direction ) where {T, U}
152+ function fft_pow2! (out:: AbstractVector{T} , in:: AbstractVector{U} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , w :: T ) where {T, U}
151153 if N == 2
152154 out[start_out] = in[start_in] + in[start_in + stride_in]
153155 out[start_out + stride_out] = in[start_in] - in[start_in + stride_in]
154156 return
155157 end
156158 m = N ÷ 2
157159
158- fft_pow2! (out, in, m, start_out , stride_out, start_in , stride_in* 2 , d )
159- fft_pow2! (out, in, m, start_out + m* stride_out, stride_out, start_in + stride_in, stride_in* 2 , d )
160+ fft_pow2! (out, in, m, start_out , stride_out, start_in , stride_in* 2 , w * w )
161+ fft_pow2! (out, in, m, start_out + m* stride_out, stride_out, start_in + stride_in, stride_in* 2 , w * w )
160162
161- w1 = convert (T, cispi (direction_sign (d)* 2 / N))
162163 wj = one (T)
163164 @inbounds for j in 0 : m- 1
164165 j1_out = start_out + j* stride_out
165166 j2_out = start_out + (j+ m)* stride_out
166167 out_j = out[j1_out]
167168 out[j1_out] = out_j + wj* out[j2_out]
168169 out[j2_out] = out_j - wj* out[j2_out]
169- wj *= w1
170+ wj *= w
170171 end
171172end
172173
@@ -175,7 +176,7 @@ function fft!(out::AbstractVector{T}, in::AbstractVector{U}, start_out::Int, sta
175176 N = root. sz
176177 s_in = root. s_in
177178 s_out = root. s_out
178- fft_pow2! (out, in, N, start_out, s_out, start_in, s_in, d )
179+ fft_pow2! (out, in, N, start_out, s_out, start_in, s_in, _conj (root . w, d) )
179180end
180181
181182"""
@@ -190,13 +191,12 @@ Power of 4 FFT, in place
190191`stride_out`: Stride of the output vector
191192`start_in`: Index of the first element of the input vector
192193`stride_in`: Stride of the input vector
193- `d `: Direction of the transform
194+ `w `: The value `cispi(direction_sign(d) * 2 / N)`
194195
195196"""
196- function fft_pow4! (out:: AbstractVector{T} , in:: AbstractVector{U} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d:: Direction ) where {T, U}
197- ds = direction_sign (d)
198- plusi = ds* 1im
199- minusi = ds*- 1im
197+ function fft_pow4! (out:: AbstractVector{T} , in:: AbstractVector{U} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , w:: T ) where {T, U}
198+ plusi = sign (imag (w))* im
199+ minusi = - sign (imag (w))* im
200200 if N == 4
201201 out[start_out + 0 ] = in[start_in] + in[start_in + stride_in] + in[start_in + 2 * stride_in] + in[start_in + 3 * stride_in]
202202 out[start_out + stride_out] = in[start_in] + in[start_in + stride_in]* plusi - in[start_in + 2 * stride_in] + in[start_in + 3 * stride_in]* minusi
@@ -206,17 +206,14 @@ function fft_pow4!(out::AbstractVector{T}, in::AbstractVector{U}, N::Int, start_
206206 end
207207 m = N ÷ 4
208208
209- @muladd fft_pow4! (out, in, m, start_out , stride_out, start_in , stride_in* 4 , d )
210- @muladd fft_pow4! (out, in, m, start_out + m* stride_out, stride_out, start_in + stride_in, stride_in* 4 , d )
211- @muladd fft_pow4! (out, in, m, start_out + 2 * m* stride_out, stride_out, start_in + 2 * stride_in, stride_in* 4 , d )
212- @muladd fft_pow4! (out, in, m, start_out + 3 * m* stride_out, stride_out, start_in + 3 * stride_in, stride_in* 4 , d )
209+ @muladd fft_pow4! (out, in, m, start_out , stride_out, start_in , stride_in* 4 , w ^ 4 )
210+ @muladd fft_pow4! (out, in, m, start_out + m* stride_out, stride_out, start_in + stride_in, stride_in* 4 , w ^ 4 )
211+ @muladd fft_pow4! (out, in, m, start_out + 2 * m* stride_out, stride_out, start_in + 2 * stride_in, stride_in* 4 , w ^ 4 )
212+ @muladd fft_pow4! (out, in, m, start_out + 3 * m* stride_out, stride_out, start_in + 3 * stride_in, stride_in* 4 , w ^ 4 )
213213
214- w1 = convert (T, cispi (direction_sign (d)* 2 / N))
215- wj = one (T)
216-
217- w1 = convert (T, cispi (ds* 2 / N))
218- w2 = convert (T, cispi (ds* 4 / N))
219- w3 = convert (T, cispi (ds* 6 / N))
214+ w1 = w
215+ w2 = w* w1
216+ w3 = w* w2
220217 wk1 = wk2 = wk3 = one (T)
221218
222219 @inbounds for k in 0 : m- 1
@@ -240,7 +237,7 @@ function fft!(out::AbstractVector{T}, in::AbstractVector{U}, start_out::Int, sta
240237 N = root. sz
241238 s_in = root. s_in
242239 s_out = root. s_out
243- fft_pow4! (out, in, N, start_out, s_out, start_in, s_in, d )
240+ fft_pow4! (out, in, N, start_out, s_out, start_in, s_in, _conj (root . w, d) )
244241end
245242
246243"""
@@ -255,12 +252,12 @@ start_out: Index of the first element of the output vector
255252stride_out: Stride of the output vector
256253start_in: Index of the first element of the input vector
257254stride_in: Stride of the input vector
258- d: Direction of the transform
255+ w: The value `cispi(direction_sign(d) * 2 / N)`
259256plus120: Depending on direction, perform either ±120° rotation
260257minus120: Depending on direction, perform either ∓120° rotation
261258
262259"""
263- function fft_pow3! (out:: AbstractVector{T} , in:: AbstractVector{U} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d :: Direction , plus120:: T , minus120:: T ) where {T, U}
260+ function fft_pow3! (out:: AbstractVector{T} , in:: AbstractVector{U} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , w :: T , plus120:: T , minus120:: T ) where {T, U}
264261 if N == 3
265262 @muladd out[start_out + 0 ] = in[start_in] + in[start_in + stride_in] + in[start_in + 2 * stride_in]
266263 @muladd out[start_out + stride_out] = in[start_in] + in[start_in + stride_in]* plus120 + in[start_in + 2 * stride_in]* minus120
@@ -271,15 +268,13 @@ function fft_pow3!(out::AbstractVector{T}, in::AbstractVector{U}, N::Int, start_
271268 # Size of subproblem
272269 Nprime = N ÷ 3
273270
274- ds = direction_sign (d)
275-
276271 # Dividing into subproblems
277- fft_pow3! (out, in, Nprime, start_out, stride_out, start_in, stride_in* 3 , d , plus120, minus120)
278- fft_pow3! (out, in, Nprime, start_out + Nprime* stride_out, stride_out, start_in + stride_in, stride_in* 3 , d , plus120, minus120)
279- fft_pow3! (out, in, Nprime, start_out + 2 * Nprime* stride_out, stride_out, start_in + 2 * stride_in, stride_in* 3 , d , plus120, minus120)
272+ fft_pow3! (out, in, Nprime, start_out, stride_out, start_in, stride_in* 3 , w ^ 3 , plus120, minus120)
273+ fft_pow3! (out, in, Nprime, start_out + Nprime* stride_out, stride_out, start_in + stride_in, stride_in* 3 , w ^ 3 , plus120, minus120)
274+ fft_pow3! (out, in, Nprime, start_out + 2 * Nprime* stride_out, stride_out, start_in + 2 * stride_in, stride_in* 3 , w ^ 3 , plus120, minus120)
280275
281- w1 = convert (T, cispi (ds * 2 / N))
282- w2 = convert (T, cispi (ds * 4 / N))
276+ w1 = w
277+ w2 = w * w1
283278 wk1 = wk2 = one (T)
284279 for k in 0 : Nprime- 1
285280 @muladd k0 = start_out + k* stride_out
@@ -302,8 +297,8 @@ function fft!(out::AbstractVector{T}, in::AbstractVector{U}, start_out::Int, sta
302297 p_120 = convert (T, cispi (2 / 3 ))
303298 m_120 = convert (T, cispi (4 / 3 ))
304299 if d == FFT_FORWARD
305- fft_pow3! (out, in, N, start_out, s_out, start_in, s_in, d , m_120, p_120)
300+ fft_pow3! (out, in, N, start_out, s_out, start_in, s_in, _conj (root . w, d) , m_120, p_120)
306301 else
307- fft_pow3! (out, in, N, start_out, s_out, start_in, s_in, d , p_120, m_120)
302+ fft_pow3! (out, in, N, start_out, s_out, start_in, s_in, _conj (root . w, d) , p_120, m_120)
308303 end
309304end
0 commit comments