@@ -6,7 +6,7 @@ function alternatingSum(x::AbstractVector{T}) where T
6
6
y
7
7
end
8
8
9
- fft! (out :: AbstractVector{T} , in :: AbstractVector{T} , :: Direction , :: AbstractFFTType , :: CallGraph{T} , :: Int ) where {T} = nothing
9
+ fft! (:: AbstractVector{T} , :: AbstractVector{T} , :: Int , :: Int , :: Direction , :: AbstractFFTType , :: CallGraph{T} , :: Int ) where {T} = nothing
10
10
11
11
@inline function direction_sign (:: FFT_BACKWARD )
12
12
1
16
16
- 1
17
17
end
18
18
19
- function (g:: CallGraph{T} )(out:: AbstractVector{T} , in:: AbstractVector{U} , v:: Direction , t:: AbstractFFTType , idx:: Int ) where {T,U}
20
- fft! (out, in, v, t, g, idx)
19
+ function (g:: CallGraph{T} )(out:: AbstractVector{T} , in:: AbstractVector{U} , start_out :: Int , start_in :: Int , v:: Direction , t:: AbstractFFTType , idx:: Int ) where {T,U}
20
+ fft! (out, in, start_out, start_in, v, t, g, idx)
21
21
end
22
22
23
- function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , d:: Direction , :: CompositeFFT , g:: CallGraph{T} , idx:: Int ) where {T,U}
24
- N = length (out)
25
- left = leftNode (g,idx)
26
- right = rightNode (g,idx)
23
+ function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , start_out:: Int , start_in:: Int , d:: Direction , :: CompositeFFT , g:: CallGraph{T} , idx:: Int ) where {T,U}
24
+ root = g[idx]
25
+ left_idx = idx + root. left
26
+ right_idx = idx + root. right
27
+ left = g[left_idx]
28
+ right = g[right_idx]
29
+ N = root. sz
27
30
N1 = left. sz
28
31
N2 = right. sz
32
+ s_in = root. s_in
33
+ s_out = root. s_out
34
+ @info " " N N1 N2 s_in s_out start_in start_out
29
35
30
36
w1 = convert (T, cispi (direction_sign (d)* 2 / N))
31
37
wj1 = one (T)
32
38
tmp = g. workspace[idx]
33
- @inbounds for j1 in 1 : N1
39
+ @inbounds for j1 in 0 : N1- 1
34
40
wk2 = wj1;
35
- @views g (tmp[( N2* (j1 - 1 ) + 1 ) : (N2 * j1)], in[j1 : N1 : end ], d, right. type, idx + g[idx] . right )
36
- j1 > 1 && @inbounds for k2 in 2 : N2
37
- tmp[N2* (j1 - 1 ) + k2] *= wk2
41
+ g (tmp, in, N2* j1 + 1 , start_in + j1 * s_in, d, right. type, right_idx )
42
+ j1 > 0 && @inbounds for k2 in 2 : N2
43
+ tmp[N2* j1 + k2] *= wk2
38
44
wk2 *= wj1
39
45
end
40
46
wj1 *= w1
41
47
end
42
48
43
49
@inbounds for k2 in 1 : N2
44
- @views g (out[k2 : N2 : end ] , tmp[k2 : N2 : end ], d, left. type, idx + g[idx] . left )
50
+ g (out, tmp, start_out + (k2 - 1 ) * s_out, k2, d, left. type, left_idx )
45
51
end
46
52
end
47
53
48
- function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , d:: Direction , a:: Pow2FFT , b:: CallGraph{T} , c:: Int ) where {T,U}
49
- fft_pow2! (out, in, d)
54
+ function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , start_out:: Int , start_in:: Int , d:: Direction , :: Pow2FFT , g:: CallGraph{T} , idx:: Int ) where {T,U}
55
+ root = g[idx]
56
+ N = root. sz
57
+ s_in = root. s_in
58
+ s_out = root. s_out
59
+ fft_pow2! (out, in, N, start_out, s_out, start_in, s_in, d)
50
60
end
51
61
52
62
"""
53
63
Power of 2 FFT in place, complex
54
64
55
65
"""
56
- function fft_pow2! (out:: AbstractVector{T} , in:: AbstractVector{T} , d:: Direction ) where {T}
57
- N = length (out)
66
+ function fft_pow2! (out:: AbstractVector{T} , in:: AbstractVector{T} , N :: Int , start_out :: Int , stride_out :: Int , start_in :: Int , stride_in :: Int , d:: Direction ) where {T}
67
+
58
68
if N == 2
59
- out[1 ] = in[1 ] + in[2 ]
60
- out[2 ] = in[1 ] - in[2 ]
69
+ out[start_out] = in[start_in ] + in[start_in + stride_in ]
70
+ out[start_out + stride_out ] = in[start_in ] - in[start_in + stride_in ]
61
71
return
62
72
end
63
- fft_pow2! (@view (out[1 : (end ÷ 2 )]), @view (in[1 : 2 : end ]), d)
64
- fft_pow2! (@view (out[(end ÷ 2 + 1 ): end ]), @view (in[2 : 2 : end ]), d)
73
+ m = N ÷ 2
74
+
75
+ fft_pow2! (out, in, m, start_out , stride_out, start_in , stride_in* 2 , d)
76
+ fft_pow2! (out, in, m, start_out + m* stride_out, stride_out, start_in + stride_in, stride_in* 2 , d)
65
77
66
78
w1 = convert (T, cispi (direction_sign (d)* 2 / N))
67
79
wj = one (T)
68
- m = N ÷ 2
69
- @inbounds for j in 1 : m
70
- out_j = out[j]
71
- out[j] = out_j + wj* out[j+ m]
72
- out[j+ m] = out_j - wj* out[j+ m]
80
+ @inbounds for j in 0 : m- 1
81
+ j1_out = start_out + j* stride_out
82
+ j2_out = start_out + (j+ m)* stride_out
83
+ out_j = out[j1_out]
84
+ out[j1_out] = out_j + wj* out[j2_out]
85
+ out[j2_out] = out_j - wj* out[j2_out]
73
86
wj *= w1
74
87
end
75
88
end
78
91
Power of 2 FFT in place, real
79
92
80
93
"""
81
- function fft_pow2! (out:: AbstractVector{Complex{T}} , in:: AbstractVector{T} , d:: Direction ) where {T<: Real }
82
- N = length (out)
94
+ function fft_pow2! (out:: AbstractVector{T} , in:: AbstractVector{T} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d:: Direction ) where {T<: Real }
83
95
if N == 2
84
96
out[1 ] = in[1 ] + in[2 ]
85
97
out[2 ] = in[1 ] - in[2 ]
86
98
return
87
99
end
88
- fft_pow2! (@view (out[1 : (end ÷ 2 )]), @view (in[1 : 2 : end ]), d)
89
- fft_pow2! (@view (out[(end ÷ 2 + 1 ): end ]), @view (in[2 : 2 : end ]), d)
100
+ m = N ÷ 2
101
+ fft_pow2! (out, in, m, start_out , stride_out, start_in , stride_in* 2 , d)
102
+ fft_pow2! (out, in, m, start_out + m, stride_out, start_in + 1 , stride_in* 2 , d)
90
103
91
104
w1 = convert (Complex{T}, cispi (direction_sign (d)* 2 / N))
92
105
wj = one (Complex{T})
93
- m = N ÷ 2
94
- @inbounds @turbo for j in 2 : m
95
- out[j] = out[j] + wj* out[j+ m]
106
+ @inbounds @turbo for j in 1 : m- 1
107
+ j1_out = start_out + j* stride_out
108
+ j2_out = start_out + (j+ m)* stride_out
109
+ out[j1_out] = out[j1_out] + wj* out[j2_out]
96
110
wj *= w1
97
111
end
98
- @inbounds @turbo for j in 2 : m
99
- out[m+ j] = conj (out[m- j+ 2 ])
112
+ @inbounds @turbo for j in 1 : m- 1
113
+ j1_out = start_out + (j+ m)* stride_out
114
+ j2_out = start_out + (m- j+ 1 )* stride_out
115
+ out[j1_out] = conj (out[j2_out])
100
116
end
101
117
end
102
118
103
- function fft_dft! (out:: AbstractVector{T} , in:: AbstractVector{T} , d:: Direction ) where {T}
104
- N = length (out)
119
+ function fft_dft! (out:: AbstractVector{T} , in:: AbstractVector{T} , N :: Int , start_out :: Int , stride_out :: Int , start_in :: Int , stride_in :: Int , d:: Direction ) where {T}
120
+ @info " " start_out stride_out start_in stride_in N
105
121
wn² = wn = w = convert (T, cispi (direction_sign (d)* 2 / N))
106
122
wn_1 = one (T)
107
123
108
- tmp = in[1 ]
124
+ tmp = in[start_in ]
109
125
out .= tmp
110
- tmp = sum (in )
111
- out[1 ] = tmp
112
-
126
+ tmp = sum (@view in[start_in : stride_in : start_in + stride_in * (N - 1 )] )
127
+ out[start_out ] = tmp
128
+
113
129
wk = wn²
114
- @inbounds for d in 2 : N
115
- out[d] = in[d]* wk + out[d]
116
- @inbounds for k in (d+ 1 ): N
130
+ @inbounds for d in 1 : N- 1
131
+ d_in = start_in + d* stride_in
132
+ d_out = start_out + d* stride_out
133
+ out[d_out] = in[d_in]* wk + out[d_out]
134
+ @inbounds for k in d: N- 1
135
+ k_in = start_in + k* stride_in
136
+ k_out = start_out + k* stride_out
117
137
wk *= wn
118
- out[d ] = in[k ]* wk + out[d ]
119
- out[k ] = in[d ]* wk + out[k ]
138
+ out[d_out ] = in[k_in ]* wk + out[d_out ]
139
+ out[k_out ] = in[d_in ]* wk + out[k_out ]
120
140
end
121
141
wn_1 = wn
122
142
wn *= w
@@ -125,30 +145,30 @@ function fft_dft!(out::AbstractVector{T}, in::AbstractVector{T}, d::Direction) w
125
145
end
126
146
end
127
147
128
- function fft_dft! (out:: AbstractVector{Complex{T}} , in:: AbstractVector{T} , d:: Direction ) where {T<: Real }
129
- N = length (out)
148
+ function fft_dft! (out:: AbstractVector{Complex{T}} , in:: AbstractVector{T} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d:: Direction ) where {T<: Real }
130
149
halfN = N÷ 2
131
150
wk = wkn = w = convert (Complex{T}, cispi (direction_sign (d)* 2 / N))
132
151
133
- out[2 : N] .= in[1 ]
134
- out[1 ] = sum (in )
135
- iseven (N) && (out[halfN + 1 ] = alternatingSum (in ))
152
+ out[start_out + 1 : stride_out : start_out + stride_out * N] .= in[1 ]
153
+ out[1 ] = sum (@view in[start_in : stride_in : start_int + stride_out * N] )
154
+ iseven (N) && (out[start_out + stride_out * halfN ] = alternatingSum (@view in[start_in : stride_in : start_int + stride_out * N] ))
136
155
137
156
@inbounds for d in 2 : halfN+ 1
138
- tmp = in[1 ]
157
+ tmp = in[start_in ]
139
158
@inbounds for k in 2 : N
140
- tmp += wkn* in[k ]
159
+ tmp += wkn* in[start_in + k * stride_in ]
141
160
wkn *= wk
142
161
end
143
- out[d ] = tmp
162
+ out[start_out + d * stride_out ] = tmp
144
163
wk *= w
145
164
wkn = wk
146
165
end
147
166
@inbounds @turbo for i in 0 : halfN- 1
148
- out[N- i] = conj (out[halfN- i])
167
+ out[start_out + stride_out * ( N- i) ] = conj (out[start_out + stride_out * ( halfN- i) ])
149
168
end
150
169
end
151
170
152
- function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , d:: Direction , :: DFT , :: CallGraph{T} , :: Int ) where {T,U}
153
- fft_dft! (out, in, d)
171
+ function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , start_out:: Int , start_in:: Int , d:: Direction , :: DFT , g:: CallGraph{T} , idx:: Int ) where {T,U}
172
+ root = g[idx]
173
+ fft_dft! (out, in, root. sz, start_out, root. s_out, start_in, root. s_in, d)
154
174
end
0 commit comments