Skip to content

Commit e2d4c2a

Browse files
committed
Progress on power of two
1 parent e6bd067 commit e2d4c2a

File tree

1 file changed

+38
-34
lines changed

1 file changed

+38
-34
lines changed

src/algos.jl

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,17 @@ function fft!(out::AbstractVector{T}, in::AbstractVector{U}, ::Val{FFT_FORWARD},
2626
w1 = convert(T, cispi(-2/N))
2727
wj1 = one(T)
2828
tmp = g.workspace[idx]
29-
for j1 in 1:N1
29+
@inbounds for j1 in 1:N1
3030
wk2 = wj1;
3131
@views g(tmp[(N2*(j1-1) + 1):(N2*j1)], in[j1:N1:end], Val(FFT_FORWARD), right.type, idx + g[idx].right)
32-
j1 > 1 && for k2 in 2:N2
32+
j1 > 1 && @inbounds for k2 in 2:N2
3333
tmp[N2*(j1-1) + k2] *= wk2
3434
wk2 *= wj1
3535
end
3636
wj1 *= w1
3737
end
3838

39-
for k2 in 1:N2
39+
@inbounds for k2 in 1:N2
4040
@views g(out[k2:N2:end], tmp[k2:N2:end], Val(FFT_FORWARD), left.type, idx + g[idx].left)
4141
end
4242
end
@@ -51,17 +51,17 @@ function fft!(out::AbstractVector{T}, in::AbstractVector{U}, ::Val{FFT_BACKWARD}
5151
w1 = convert(T, cispi(2/N))
5252
wj1 = one(T)
5353
tmp = g.workspace[idx]
54-
for j1 in 2:N1
54+
@inbounds for j1 in 2:N1
5555
Complex<F,L> wk2 = wj1;
5656
@views g(tmp[j1:N1:end], in[N2*j1:N2*(j1-1)-1], Val(FFT_BACKWARD), right.type, idx + g[idx].right)
57-
for k2 in 2:N2
57+
@inbounds for k2 in 2:N2
5858
tmp[j1*N2+k2] *= wk2
5959
wk2 *= wj1
6060
end
6161
wj1 *= w1
6262
end
6363

64-
for k2 in 1:N2
64+
@inbounds for k2 in 1:N2
6565
@views g(out[k2:N2:end], tmp[k2:N2:end], Val(FFT_BACKWARD), left.type, idx + g[idx].left)
6666
end
6767
end
@@ -80,8 +80,9 @@ Power of 2 FFT in place, forward
8080
"""
8181
function fft_pow2!(out::AbstractVector{T}, in::AbstractVector{T}, ::Val{FFT_FORWARD}) where {T}
8282
N = length(out)
83-
if N == 1
84-
out[1] = in[1]
83+
if N == 2
84+
out[1] = in[1] + in[2]
85+
out[2] = in[1] - in[2]
8586
return
8687
end
8788
fft_pow2!(@view(out[1:(end÷2)]), @view(in[1:2:end]), Val(FFT_FORWARD))
@@ -90,7 +91,7 @@ function fft_pow2!(out::AbstractVector{T}, in::AbstractVector{T}, ::Val{FFT_FORW
9091
w1 = convert(T, cispi(-2/N))
9192
wj = one(T)
9293
m = N ÷ 2
93-
for j in 1:m
94+
@inbounds for j in 1:m
9495
out_j = out[j]
9596
out[j] = out_j + wj*out[j+m]
9697
out[j+m] = out_j - wj*out[j+m]
@@ -104,8 +105,9 @@ Power of 2 FFT in place, backward
104105
"""
105106
function fft_pow2!(out::AbstractVector{T}, in::AbstractVector{T}, ::Val{FFT_BACKWARD}) where {T}
106107
N = length(out)
107-
if N == 1
108-
out[1] = in[1]
108+
if N == 2
109+
out[1] = in[1] + in[2]
110+
out[2] = in[1] - in[2]
109111
return
110112
end
111113
fft_pow2!(@view(out[1:(end÷2)]), @view(in[1:2:end]), Val(FFT_BACKWARD))
@@ -114,7 +116,7 @@ function fft_pow2!(out::AbstractVector{T}, in::AbstractVector{T}, ::Val{FFT_BACK
114116
w1 = convert(T, cispi(2/N))
115117
wj = one(T)
116118
m = N ÷ 2
117-
for j in 1:m
119+
@inbounds for j in 1:m
118120
out_j = out[j]
119121
out[j] = out_j + wj*out[j+m]
120122
out[j+m] = out_j - wj*out[j+m]
@@ -128,8 +130,9 @@ Power of 2 FFT in place, forward
128130
"""
129131
function fft_pow2!(out::AbstractVector{Complex{T}}, in::AbstractVector{T}, ::Val{FFT_FORWARD}) where {T<:Real}
130132
N = length(out)
131-
if N == 1
132-
out[1] = in[1]
133+
if N == 2
134+
out[1] = in[1] + in[2]
135+
out[2] = in[1] - in[2]
133136
return
134137
end
135138
fft_pow2!(@view(out[1:(end÷2)]), @view(in[1:2:end]), Val(FFT_FORWARD))
@@ -138,11 +141,11 @@ function fft_pow2!(out::AbstractVector{Complex{T}}, in::AbstractVector{T}, ::Val
138141
w1 = convert(Complex{T}, cispi(-2/N))
139142
wj = one(Complex{T})
140143
m = N ÷ 2
141-
@turbo for j in 2:m
144+
@inbounds @turbo for j in 2:m
142145
out[j] = out[j] + wj*out[j+m]
143146
wj *= w1
144147
end
145-
@turbo for j in 2:m
148+
@inbounds @turbo for j in 2:m
146149
out[m+j] = conj(out[m-j+2])
147150
end
148151
end
@@ -153,8 +156,9 @@ Power of 2 FFT in place, backward
153156
"""
154157
function fft_pow2!(out::AbstractVector{Complex{T}}, in::AbstractVector{T}, ::Val{FFT_BACKWARD}) where {T<:Real}
155158
N = length(out)
156-
if N == 1
157-
out[1] = in[1]
159+
if N == 2
160+
out[1] = in[1] + in[2]
161+
out[2] = in[1] - in[2]
158162
return
159163
end
160164
fft_pow2!(@view(out[1:(end÷2)]), @view(in[1:2:end]), Val(FFT_BACKWARD))
@@ -163,7 +167,7 @@ function fft_pow2!(out::AbstractVector{Complex{T}}, in::AbstractVector{T}, ::Val
163167
w1 = convert(Complex{T}, cispi(2/N))
164168
wj = one(Complex{T})
165169
m = N ÷ 2
166-
@turbo for j in 2:m
170+
@inbounds @turbo for j in 2:m
167171
out[j] = out[j] + wj*out[j+m]
168172
out[m+j] = conj(out[m-i+2])
169173
wj *= w1
@@ -175,15 +179,15 @@ function fft_dft!(out::AbstractVector{T}, in::AbstractVector{T}, ::Val{FFT_FORWA
175179
wn² = wn = w = convert(T, cispi(-2/N))
176180
wn_1 = one(T)
177181

178-
tmp = in[1];
179-
out .= tmp;
182+
tmp = in[1]
183+
out .= tmp
180184
tmp = sum(in)
181-
out[1] = tmp;
185+
out[1] = tmp
182186

183-
wk = wn²;
184-
for d in 2:N
187+
wk = wn²
188+
@inbounds for d in 2:N
185189
out[d] = in[d]*wk + out[d]
186-
for k in (d+1):N
190+
@inbounds for k in (d+1):N
187191
wk *= wn
188192
out[d] = in[k]*wk + out[d]
189193
out[k] = in[d]*wk + out[k]
@@ -205,10 +209,10 @@ function fft_dft!(out::AbstractVector{T}, in::AbstractVector{T}, ::Val{FFT_BACKW
205209
tmp = sum(in)
206210
out[1] = tmp
207211

208-
wk = wn²;
209-
for d in 2:N
212+
wk = wn²
213+
@inbounds @turbo for d in 2:N
210214
out[d] = in[d]*wk + out[d]
211-
for k in (d+1):N
215+
@inbounds for k in (d+1):N
212216
wk *= wn
213217
out[d] = in[k]*wk + out[d]
214218
out[k] = in[d]*wk + out[k]
@@ -229,17 +233,17 @@ function fft_dft!(out::AbstractVector{Complex{T}}, in::AbstractVector{T}, ::Val{
229233
out[1] = sum(in)
230234
iseven(N) && (out[halfN+1] = alternatingSum(in))
231235

232-
for d in 2:halfN+1
236+
@inbounds @turbo for d in 2:halfN+1
233237
tmp = in[1]
234-
for k in 2:N
238+
@inbounds for k in 2:N
235239
tmp += wkn*in[k]
236240
wkn *= wk
237241
end
238242
out[d] = tmp
239243
wk *= w
240244
wkn = wk
241245
end
242-
@turbo for i in 0:halfN-1
246+
@inbounds @turbo for i in 0:halfN-1
243247
out[N-i] = conj(out[halfN-i])
244248
end
245249
end
@@ -254,10 +258,10 @@ function fft_dft!(out::AbstractVector{Complex{T}}, in::AbstractVector{T}, ::Val{
254258
out[1] = sum(in)
255259
iseven(N) && (out[halfN+1] = alternatingSum(in))
256260

257-
wk = wn²;
258-
for d in 2:halfN
261+
wk = wn²
262+
@inbounds @turbo for d in 2:halfN
259263
out[d] = in[d]*wk + out[d]
260-
for k in (d+1):halfN
264+
@inbounds for k in (d+1):halfN
261265
wk *= wn
262266
out[d] = in[k]*wk + out[d]
263267
out[k] = in[d]*wk + out[k]

0 commit comments

Comments
 (0)