@@ -31,24 +31,24 @@ function fft!(out::AbstractVector{T}, in::AbstractVector{U}, start_out::Int, sta
31
31
N2 = right. sz
32
32
s_in = root. s_in
33
33
s_out = root. s_out
34
- @info " " N N1 N2 s_in s_out start_in start_out
35
34
36
35
w1 = convert (T, cispi (direction_sign (d)* 2 / N))
37
36
wj1 = one (T)
38
37
tmp = g. workspace[idx]
39
- @inbounds for j1 in 0 : N1- 1
38
+ for j1 in 0 : N1- 1
40
39
wk2 = wj1;
41
40
g (tmp, in, N2* j1+ 1 , start_in + j1* s_in, d, right. type, right_idx)
42
- j1 > 0 && @inbounds for k2 in 2 : N2
41
+ j1 > 0 && for k2 in 1 : N2- 1
43
42
tmp[N2* j1 + k2] *= wk2
44
43
wk2 *= wj1
45
44
end
46
45
wj1 *= w1
47
46
end
48
47
49
- @inbounds for k2 in 1 : N2
50
- g (out, tmp, start_out + (k2 - 1 ) * s_out, k2, d, left. type, left_idx)
48
+ for k2 in 0 : N2- 1
49
+ g (out, tmp, start_out + k2 * s_out, k2+ 1 , d, left. type, left_idx)
51
50
end
51
+ out .+ = 0
52
52
end
53
53
54
54
function fft! (out:: AbstractVector{T} , in:: AbstractVector{U} , start_out:: Int , start_in:: Int , d:: Direction , :: Pow2FFT , g:: CallGraph{T} , idx:: Int ) where {T,U}
@@ -117,31 +117,23 @@ function fft_pow2!(out::AbstractVector{T}, in::AbstractVector{T}, N::Int, start_
117
117
end
118
118
119
119
function fft_dft! (out:: AbstractVector{T} , in:: AbstractVector{T} , N:: Int , start_out:: Int , stride_out:: Int , start_in:: Int , stride_in:: Int , d:: Direction ) where {T}
120
- @info " " start_out stride_out start_in stride_in N
121
- wn² = wn = w = convert (T, cispi (direction_sign (d)* 2 / N))
122
- wn_1 = one (T)
123
-
124
120
tmp = in[start_in]
125
- out .= tmp
126
- tmp = sum (@view in[start_in: stride_in: start_in+ stride_in* (N- 1 )])
121
+ @inbounds for j in 1 : N- 1
122
+ tmp += in[start_in + j* stride_in]
123
+ end
127
124
out[start_out] = tmp
128
125
129
- wk = wn²
126
+ wk = wkn = w = convert (T, cispi (direction_sign (d)* 2 / N))
127
+
130
128
@inbounds for d in 1 : N- 1
131
- d_in = start_in + d* stride_in
132
- d_out = start_out + d* stride_out
133
- out[d_out] = in[d_in]* wk + out[d_out]
134
- @inbounds for k in d: N- 1
135
- k_in = start_in + k* stride_in
136
- k_out = start_out + k* stride_out
137
- wk *= wn
138
- out[d_out] = in[k_in]* wk + out[d_out]
139
- out[k_out] = in[d_in]* wk + out[k_out]
129
+ tmp = in[start_in]
130
+ @inbounds for k in 1 : N- 1
131
+ tmp += wkn* in[start_in + k* stride_in]
132
+ wkn *= wk
140
133
end
141
- wn_1 = wn
142
- wn *= w
143
- wn² *= (wn* wn_1)
144
- wk = wn²
134
+ out[start_out + d* stride_out] = tmp
135
+ wk *= w
136
+ wkn = wk
145
137
end
146
138
end
147
139
0 commit comments