diff --git a/Project.toml b/Project.toml index 6520dc3..1071ad9 100644 --- a/Project.toml +++ b/Project.toml @@ -8,26 +8,34 @@ ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" IndexFunArrays = "613c443e-d742-454e-bfc6-1d7f8dd76566" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MutableShiftedArrays = "d3d30c82-a38e-471c-a45a-3d24d2f4d22d" NDTools = "98581153-e998-4eef-8d0d-5ec2c052313d" NFFT = "efe261a4-0d2b-5849-be55-fc731d526b0d" -PaddedViews = "5432bcbf-9aad-5242-b902-cca2824c8663" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" -ShiftedArrays = "1277b4bf-5013-50f5-be3d-901d8477a67a" + +[weakdeps] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + +[extensions] +CUDASupportExt_FT = ["CUDA", "Adapt"] [compat] -ChainRulesCore = "1, 1.0, 1.1" -FFTW = "1.5" -ImageTransformations = "0.9" +Adapt = "3.7, 4.0, 4.1" +CUDA = "5.2, 5.3, 5.4, 5.5, 5.6, 5.7" +ChainRulesCore = "1" +FFTW = "1.5, 1.6, 1.7, 1.8, 1.9" +ImageTransformations = "0.9, 0.10" IndexFunArrays = "0.2" -NDTools = "0.5.1, 0.6, 0.7, 0.8" +NDTools = "0.8" NFFT = "0.11, 0.12, 0.13" -PaddedViews = "0.5" Reexport = "1" -ShiftedArrays = "2" -Zygote = "0.6" -julia = "1, 1.6, 1.7, 1.8, 1.9, 1.10" +Zygote = "0.6, 0.7" +julia = "1, 1.6, 1.7, 1.8, 1.9, 1.10, 1.11" +MutableShiftedArrays = "0.3" [extras] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" FractionalTransforms = "e50ca838-b4f0-4a10-ad18-4b920bf1ae5c" ImageTransformations = "02fcd773-0e25-5acc-982a-7f6622650795" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -36,4 +44,4 @@ TestImages = "5e47fb64-e119-507b-a336-dd2b206d9990" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Test", "TestImages", "FractionalTransforms", "Random", "ImageTransformations", "Zygote"] +test = ["Test", "TestImages", "FractionalTransforms", "Random", "ImageTransformations", "Zygote", "CUDA"] diff --git a/docs/src/utils.md b/docs/src/utils.md index 718a828..dcbc8f0 100644 --- a/docs/src/utils.md +++ b/docs/src/utils.md @@ -8,4 +8,5 @@ FourierTools.get_indices_around_center FourierTools.center_extract FourierTools.odd_view FourierTools.fourier_reverse! +FourierTools.get_indexrange_around_center ``` diff --git a/ext/CUDASupportExt_FT.jl b/ext/CUDASupportExt_FT.jl new file mode 100644 index 0000000..f6e6bf8 --- /dev/null +++ b/ext/CUDASupportExt_FT.jl @@ -0,0 +1,102 @@ +module CUDASupportExt_FT +using CUDA +using Adapt +using FourierTools +using Base +# using NFFT +# using CuNFFT + +# define a number of Union types to not repeat all definitions for each type +const AllShiftedType = Union{FourierTools.FourierSplit{<:Any,<:Any,<:Any}, + FourierTools.FourierJoin{<:Any,<:Any,<:Any}} + +# these are special only if a CuArray is wrapped + +const AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any}, + Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any}, + SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any}, <:Any, <:Any}} +const AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType} + +const AllShiftedTypeCu{N, CD} = Union{FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}}, + FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{<:Any,N,CD}}} +const AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any, <:Any}, + Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, + SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, <:Any, <:Any}} +const AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}} + +Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierSplit(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_split); +Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierJoin(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_join); + +function Base.Broadcast.BroadcastStyle(::Type{T}) where {N, CD, T<:AllShiftedTypeCu{N, CD}} + CUDA.CuArrayStyle{N,CD}() +end + +# Define the BroadcastStyle for SubArray of MutableShiftedArray with CuArray + +function Base.Broadcast.BroadcastStyle(::Type{T}) where {N, CD, T<:AllSubArrayTypeCu{N, CD}} + CUDA.CuArrayStyle{N,CD}() +end + +function Base.copy(s::AllShiftedAndViews) + res = similar(get_base_arr(s), eltype(s), size(s)); + res .= s + return res +end + +function Base.collect(x::AllShiftedAndViews) + return copy(x) # stay on the GPU +end + +function Base.Array(x::AllShiftedAndViews) + return Array(copy(x)) # remove from GPU +end + +function Base.:(==)(x::AllShiftedAndViewsCu, y::AbstractArray) + return all(x .== y) +end + +function Base.:(==)(y::AbstractArray, x::AllShiftedAndViewsCu) + return all(x .== y) +end + +function Base.:(==)(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu) + return all(x .== y) +end + +function Base.isapprox(x::AllShiftedAndViewsCu, y::AbstractArray; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...) + atol = (atol != 0) ? atol : rtol * maximum(abs.(x)) + return all(abs.(x .- y) .<= atol) +end + +function Base.isapprox(y::AbstractArray, x::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...) + atol = (atol != 0) ? atol : rtol * maximum(abs.(x)) + return all(abs.(x .- y) .<= atol) +end + +function Base.isapprox(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...) + atol = (atol != 0) ? atol : rtol * maximum(abs.(x)) + return all(abs.(x .- y) .<= atol) +end + +function Base.show(io::IO, mm::MIME"text/plain", cs::AllShiftedAndViews) + CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) +end + +### addition functions specific to CUDA + +function FourierTools.optional_collect(a::CuArray) + a +end + +get_base_arr(arr::CuArray) = arr +get_base_arr(arr::Array) = arr +function get_base_arr(arr::AbstractArray) + p = parent(arr) + return (p === arr) ? arr : get_base_arr(parent(arr)) +end + +function similar_zeros(arr::CuArray, sz::NTuple=size(arr)) + CUDA.zeros(sz) +end + +end \ No newline at end of file diff --git a/src/FourierTools.jl b/src/FourierTools.jl index de4e2f7..99bd43b 100644 --- a/src/FourierTools.jl +++ b/src/FourierTools.jl @@ -2,17 +2,18 @@ module FourierTools using Reexport -using PaddedViews, ShiftedArrays +# using PaddedViews +using MutableShiftedArrays # for circshift @reexport using FFTW using LinearAlgebra using IndexFunArrays using ChainRulesCore using NDTools +import Base: checkbounds, getindex, setindex!, parent, size, axes, copy, collect + @reexport using NFFT FFTW.set_num_threads(4) - - include("utils.jl") include("nfft_nd.jl") include("resampling.jl") diff --git a/src/convolutions.jl b/src/convolutions.jl index 1081b34..ca18fb8 100644 --- a/src/convolutions.jl +++ b/src/convolutions.jl @@ -148,6 +148,7 @@ end plan_conv_buffer(u, v [, dims]; kwargs...) Similar to [`plan_conv`](@ref) but instead uses buffers to prevent memory allocations. +The three buffers are internal to the function and are not exposed to the user. Not AD friendly! """ @@ -161,7 +162,9 @@ function plan_conv_buffer(u::AbstractArray{T1, N}, v::AbstractArray{T2, M}, dims u_buff = P_u * u v_ft = P_v * v - uv_buff = u_buff .* v_ft + uv_sz = bc_size(u_buff, v_ft) + # this saves memory allocations: + uv_buff = (uv_sz == size(u_buff)) ? u_buff : u_buff .* v_ft; # for fourier space we need a new plan P = plan(u .* v, dims; kwargs...) diff --git a/src/custom_fourier_types.jl b/src/custom_fourier_types.jl index b2ff902..c81420e 100644 --- a/src/custom_fourier_types.jl +++ b/src/custom_fourier_types.jl @@ -6,9 +6,9 @@ and then replaces the value by half of the parent at L1 `do_split` is a Bool that indicates whether this mechanism is active. It is needed for type stability reasons of functions returnting this type. """ -struct FourierSplit{T,N, AA<:AbstractArray{T, N}} <: AbstractArray{T,N} +struct FourierSplit{T, N, AA<:AbstractArray{T, N}, D} <: AbstractArray{T,N} parent::AA # holds the data (or is another view) - D::Int # dimension along which to apply to copy + # D::Int # dimension along which to apply to copy L1::Int # low index position to copy from (and half) L2::Int # high index positon to copy to (and half) do_split::Bool @@ -19,32 +19,59 @@ struct FourierSplit{T,N, AA<:AbstractArray{T, N}} <: AbstractArray{T,N} This version below is needed to avoid a split for the first rft dimension, but still return half the value FFTs and other RFT dimension should use the version without L2. """ - function FourierSplit(parent::AA, D::Int,L1::Int,L2::Int, do_split::Bool) where {T,N, AA<:AbstractArray{T, N}} - return new{T,N, AA}(parent, D, L1, L2, do_split) + function FourierSplit(parent::AA, ::Val{D}, L1::Int,L2::Int, do_split::Bool) where {T,N, D, AA<:AbstractArray{T, N}} + return new{T,N, AA, D}(parent, L1, L2, do_split) end - function FourierSplit(parent::AA, D::Int, L1::Int, do_split::Bool) where {T,N, AA<:AbstractArray{T, N}} + function FourierSplit(parent::AA, ::Val{D}, L1::Int, do_split::Bool) where {T,N, D, AA<:AbstractArray{T, N}} mid = fft_center(size(parent)[D]) L2 = mid + (mid-L1) - return FourierSplit(parent, D,L1,L2, do_split) + return FourierSplit(parent, Val(D), L1, L2, do_split) end + # function FourierSplit(parent::AA, D::Int, L1::Int, do_split::Bool) where {T,N, AA<:AbstractArray{T, N}} + # FourierSplit(parent, Val(D), L1, do_split) + # end end +# get_D(A::FourierSplit{D}) where {D} = D + Base.IndexStyle(::Type{FD}) where {FD<:FourierSplit} = IndexStyle(parenttype(FD)) -parenttype(::Type{FourierSplit{T,N,AA}}) where {T,N,AA} = AA +parenttype(::Type{FourierSplit{T,N,AA,D}}) where {T,N,AA,D} = AA parenttype(A::FourierSplit) = parenttype(typeof(A)) + +Base.similar(s::FourierSplit, el::Type, v::NTuple{N, Int64}) where {N} = similar(s.parent, el, v) Base.parent(A::FourierSplit) = A.parent Base.size(A::FourierSplit) = size(parent(A)) -@inline function Base.getindex(A::FourierSplit{T,N, <:AbstractArray{T, N}}, i::Vararg{Int,N}) where {T,N} - if (i[A.D]==A.L2 || i[A.D]==A.L1) && A.do_split # index along this dimension A.D corrsponds to slice L2 - # not that "setindex" in the line below modifies only the index, not the array - @inbounds return parent(A)[Base.setindex(i,A.L1, A.D)...] / 2 - else i[A.D]==A.L2 +@inline function Base.getindex(A::FourierSplit{T,N, <:AbstractArray{T, N}, D}, i::Vararg{Int,N}) where {T,N, D} + # D = get_D(A) # causes huge troubles in CUDA! + # return eltype(A)(D) # + + if (i[D]==A.L2 || i[D]==A.L1) && A.do_split # index along this dimension A.D corrsponds to slice L2 + # note that "setindex" in the line below modifies only the index, not the array + @inbounds return parent(A)[Base.setindex(i, A.L1, D)...] / 2 + else i[D]==A.L2 @inbounds return parent(A)[i...] # @inbounds return parent(A)[i...] end end +# One-D version +@inline function Base.getindex(A::FourierSplit{T,N, <:AbstractArray{T, N}, D}, i::Int) where {T,N,D} + if A.do_split + # compute the ND index from the one-D index i + ind = Tuple(CartesianIndices(parent(A))[i]) + # D = get_D(A) # causes huge troubles in CUDA! + # return eltype(A)(D) # + if (ind[D]==A.L2 || ind[D]==A.L1) + return parent(A)[Base.setindex(ind, A.L1, D)...] / 2 + else + return parent(A)[i] + end + else + return parent(A)[i] + end +end + """ FourierJoin{T,N, AA<:AbstractArray{T, N}} <: AbstractArray{T, N} @@ -53,9 +80,9 @@ and then replaces the value by add the value at the mirrored position L2 `do_join` is a Bool that indicates whether this mechanism is active. It is needed for type stability reasons of functions returnting this type """ -struct FourierJoin{T,N, AA<:AbstractArray{T, N}} <: AbstractArray{T, N} +struct FourierJoin{T,N, AA<:AbstractArray{T, N}, D} <: AbstractArray{T, N} parent::AA - D::Int # dimension along which to apply to copy + # D::Int # dimension along which to apply to copy L1::Int # low index position to copy from (and half) L2::Int # high index positon to copy to (and half) do_join::Bool @@ -65,28 +92,52 @@ This version below is needed to avoid a split for the first rft dimension but still return half the value FFTs and other RFT dimension should use the version without L2 """ - function FourierJoin(parent::AA, D::Int, L1::Int, L2::Int, do_join::Bool) where {T, N, AA<:AbstractArray{T, N}} - return new{T, N, AA}(parent, D, L1, L2, do_join) + function FourierJoin(parent::AA, ::Val{D}, L1::Int, L2::Int, do_join::Bool) where {T, N, AA<:AbstractArray{T, N}, D} + return new{T, N, AA, D}(parent, L1, L2, do_join) end - function FourierJoin(parent::AA, D::Int,L1::Int, do_join::Bool) where {T, N, AA<:AbstractArray{T, N}} + function FourierJoin(parent::AA, ::Val{D}, L1::Int, do_join::Bool) where {T, N, AA<:AbstractArray{T, N}, D} mid = fft_center(size(parent)[D]) L2 = mid + (mid-L1) - return FourierJoin(parent, D, L1, L2, do_join) + return FourierJoin(parent, Val(D), L1, L2, do_join) end + + # function FourierJoin(parent::AA, D::Int, L1::Int, do_split::Bool) where {T,N, AA<:AbstractArray{T, N}} + # FourierJoin(parent, Val(D), L1, do_split) + # end end +# get_D(A::FourierJoin) = A.D + Base.IndexStyle(::Type{FS}) where {FS<:FourierJoin} = IndexStyle(parenttype(FS)) -parenttype(::Type{FourierJoin{T,N,AA}}) where {T,N,AA} = AA +parenttype(::Type{FourierJoin{T,N,AA,D}}) where {T,N,AA,D} = AA parenttype(A::FourierJoin) = parenttype(typeof(A)) + +Base.similar(s::FourierJoin, el::Type, v::NTuple{N, Int64}) where {N} = similar(s.parent, el, v) + Base.parent(A::FourierJoin) = A.parent Base.size(A::FourierJoin) = size(parent(A)) -@inline function Base.getindex(A::FourierJoin{T,N, <:AbstractArray{T, N}}, i::Vararg{Int,N}) where {T,N} - if i[A.D]==A.L1 && A.do_join - @inbounds return (parent(A)[i...] + parent(A)[Base.setindex(i, A.L2, A.D)...]) +@inline function Base.getindex(A::FourierJoin{T,N, <:AbstractArray{T, N}, D}, i::Vararg{Int,N}) where {T,N,D} + if i[D]==A.L1 && A.do_join + @inbounds return (parent(A)[i...] + parent(A)[Base.setindex(i, A.L2, D)...]) else @inbounds return (parent(A)[i...]) end end +# One-D version +@inline function Base.getindex(A::FourierJoin{T,N, <:AbstractArray{T, N},D}, i::Int) where {T,N,D} + if A.do_join + # compute the ND index from the one-D index i + ind = Tuple(CartesianIndices(parent(A))[i]) + if (ind[D]==A.L1) + return parent(A)[i] + parent(A)[Base.setindex(ind, A.L2, D)...] + else + return parent(A)[i] + end + else + return parent(A)[i] + end +end + diff --git a/src/czt.jl b/src/czt.jl index 91ff20c..8c91cf5 100644 --- a/src/czt.jl +++ b/src/czt.jl @@ -27,7 +27,7 @@ function get_kernel_1d(arr::AT, N::Integer, M::Integer; a= 1.0, w = cispi(-2/N), CT = (RT <: Real) ? Complex{RT} : RT RT = real(CT) - # converts ShiftedArrays.CircShiftedArray into a plain array type: + # converts MutableShiftedArrays.CircShiftedArray into a plain array type: tmp = similar(arr, RT, (1,)) RAT = real_arr_type(typeof(tmp), Val(1)) diff --git a/src/fft_helpers.jl b/src/fft_helpers.jl index bacd514..71315cf 100644 --- a/src/fft_helpers.jl +++ b/src/fft_helpers.jl @@ -7,7 +7,7 @@ export ffts2d, ffts2d!, iffts2d, rffts2d, irffts2d """ optional_collect(a) -Only collects certain arrays, for a pure `Array` there is no collect +Only collects certain arrays, for a pure `Array` or a `CuArray` there is no collect and it returns simply `a`. """ # collect @@ -16,8 +16,8 @@ optional_collect(a::AbstractArray) = collect(a) optional_collect(a::Array) = a # for CircShiftedArray we only need collect if shifts is non-zero -function optional_collect(csa::ShiftedArrays.CircShiftedArray) - if all(iszero.(csa.shifts)) +function optional_collect(csa::CircShiftedArray) + if all(iszero.(shifts(csa))) return optional_collect(parent(csa)) else return collect(csa) @@ -30,7 +30,7 @@ end ffts(A [, dims]) Result is semantically equivalent to `fftshift(fft(A, dims), dims)` -However, the shift is done with `ShiftedArrays` and therefore doesn't allocate memory. +However, the shift is done with `MutableShiftedArrays` and therefore doesn't allocate memory. See also: [`ft`](@ref ift), [`ift`](@ref ift), [`rft`](@ref rft), [`irft`](@ref irft), [`ffts`](@ref ffts), [`iffts`](@ref iffts), [`ffts!`](@ref ffts!), [`rffts`](@ref rffts), [`irffts`](@ref irffts), @@ -45,7 +45,7 @@ end Result is semantically equivalent to `fftshift(fft!(A, dims), dims)`. `A` is in-place modified. -However, the shift is done with `ShiftedArrays` and therefore doesn't allocate memory. +However, the shift is done with `MutableShiftedArrays` and therefore doesn't allocate memory. See also: [`ft`](@ref ift), [`ift`](@ref ift), [`rft`](@ref rft), [`irft`](@ref irft), [`ffts`](@ref ffts), [`iffts`](@ref iffts), [`ffts!`](@ref ffts!), [`rffts`](@ref rffts), [`irffts`](@ref irffts), @@ -59,7 +59,7 @@ end Result is semantically equivalent to `ifft(ifftshift(A, dims), dims)`. `A` is in-place modified. -However, the shift is done with `ShiftedArrays` and therefore doesn't allocate memory. +However, the shift is done with `MutableShiftedArrays` and therefore doesn't allocate memory. See also: [`ft`](@ref ift), [`ift`](@ref ift), [`rft`](@ref rft), [`irft`](@ref irft), [`ffts`](@ref ffts), [`iffts`](@ref iffts), [`ffts!`](@ref ffts!), [`rffts`](@ref rffts), [`irffts`](@ref irffts), @@ -74,7 +74,7 @@ end Calculates a `rfft(A, dims)` and then shift the frequencies to the center. `dims[1]` is not shifted, because there is no negative and positive frequency. -The shift is done with `ShiftedArrays` and therefore doesn't allocate memory. +The shift is done with `MutableShiftedArrays` and therefore doesn't allocate memory. See also: [`ft`](@ref ift), [`ift`](@ref ift), [`rft`](@ref rft), [`irft`](@ref irft), [`ffts`](@ref ffts), [`iffts`](@ref iffts), [`ffts!`](@ref ffts!), [`rffts`](@ref rffts), [`irffts`](@ref irffts), @@ -88,7 +88,7 @@ end Calculates a `irfft(A, d, dims)` and then shift the frequencies back to the corner. `dims[1]` is not shifted, because there is no negative and positive frequency. -The shift is done with `ShiftedArrays` and therefore doesn't allocate memory. +The shift is done with `MutableShiftedArrays` and therefore doesn't allocate memory. See also: [`ft`](@ref ift), [`ift`](@ref ift), [`rft`](@ref rft), [`irft`](@ref irft), [`ffts`](@ref ffts), [`iffts`](@ref iffts), [`ffts!`](@ref ffts!), [`rffts`](@ref rffts), [`irffts`](@ref irffts), diff --git a/src/fftshift_alternatives.jl b/src/fftshift_alternatives.jl index 14fd3ba..560eaac 100644 --- a/src/fftshift_alternatives.jl +++ b/src/fftshift_alternatives.jl @@ -13,7 +13,7 @@ If `dims` is not given then the signal is shifted along each dimension. function _fftshift!(dst::AbstractArray{T, N}, src::AbstractArray{T, N}, dims=ntuple(i -> i, Val(N))) where {T, N} Δ = ntuple(i -> i ∈ dims ? size(src, i) ÷ 2 : 0, Val(N)) - circshift!(dst, src, Δ) + MutableShiftedArrays.circshift!(dst, src, Δ) end """ @@ -27,7 +27,7 @@ If `dims` is not given then the signal is shifted along each dimension. function _ifftshift!(dst::AbstractArray{T, N}, src::AbstractArray{T, N}, dims=ntuple(i -> i, Val(N))) where {T, N} Δ = ntuple(i -> i ∈ dims ? - size(src, i) ÷ 2 : 0, Val(N)) - circshift!(dst, src, Δ) + MutableShiftedArrays.circshift!(dst, src, Δ) end @@ -39,7 +39,7 @@ Result is semantically equivalent to `fftshift(A, dims)` but returns a view instead. """ function fftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N} - ShiftedArrays.circshift(mat, ft_center_diff(size(mat), dims)) + MutableShiftedArrays.circshift(mat, ft_center_diff(size(mat), dims)) end @@ -51,7 +51,7 @@ a view instead. """ function ifftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N} diff = .-(ft_center_diff(size(mat), dims)) - return ShiftedArrays.circshift(mat, diff) + return MutableShiftedArrays.circshift(mat, diff) end @@ -63,7 +63,7 @@ Shifts the frequencies to the center expect for `dims[1]` because there os no ne and positive frequency. """ function rfftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N} - ShiftedArrays.circshift(mat, rft_center_diff(size(mat), dims)) + MutableShiftedArrays.circshift(mat, rft_center_diff(size(mat), dims)) end @@ -75,7 +75,7 @@ Shifts the frequencies back to the corner except for `dims[1]` because there os and positive frequency. """ function irfftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N} - ShiftedArrays.circshift(mat ,.-(rft_center_diff(size(mat), dims))) + MutableShiftedArrays.circshift(mat ,.-(rft_center_diff(size(mat), dims))) end """ diff --git a/src/fourier_resample_1D_based.jl b/src/fourier_resample_1D_based.jl index ebc3a74..68022ac 100644 --- a/src/fourier_resample_1D_based.jl +++ b/src/fourier_resample_1D_based.jl @@ -21,7 +21,8 @@ function resample_by_1D_FT!(arr::AbstractArray{<:Complex, N}, new_size; normaliz # go to fourier space arr = ffts!(arr, d) if ns > s - out = zeros(eltype(arr), Base.setindex(size(arr), ns, d)) + # out = zeros(eltype(arr), Base.setindex(size(arr), ns, d)) + out = similar_zeros(arr, Base.setindex(size(arr), ns, d)) # to work with CuArary center_set!(out, arr) # in the even case we need to fix hermitian property if iseven(s) @@ -40,7 +41,7 @@ function resample_by_1D_FT!(arr::AbstractArray{<:Complex, N}, new_size; normaliz l, r = get_indices_around_center(s, ns) inds_left = NDTools.slice_indices(axes(arr_v), d, 1) inds_right = NDTools.slice_indices(axes(arr), d, r+1) - arr_v[inds_left...] += arr[inds_right...] + arr_v[inds_left...] .+= arr[inds_right...] arr_v[inds_left...] ./= correction_factor end #overwrite old arr handle diff --git a/src/fourier_resizing.jl b/src/fourier_resizing.jl index 1c5f319..990940b 100644 --- a/src/fourier_resizing.jl +++ b/src/fourier_resizing.jl @@ -26,13 +26,13 @@ julia> x = [1 20 3; 4 500 6; -7 821 923] -7 821 923 julia> ffts(x) -3×3 ShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}: +3×3 MutableShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}: 106.5+390.577im -1099.5-1062.61im 1000.5+700.615im -1138.5+354.204im 2271.0+0.0im -1138.5-354.204im 1000.5-700.615im -1099.5+1062.61im 106.5-390.577im julia> select_region_ft(ffts(x), (4,4)) -4×4 PaddedView(0.0 + 0.0im, OffsetArray(::ShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}, 2:4, 2:4), (Base.OneTo(4), Base.OneTo(4))) with eltype ComplexF64: +4×4 PaddedView(0.0 + 0.0im, OffsetArray(::MutableShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}, 2:4, 2:4), (Base.OneTo(4), Base.OneTo(4))) with eltype ComplexF64: 0.0+0.0im 0.0+0.0im 0.0+0.0im 0.0+0.0im 0.0+0.0im 106.5+390.577im -1099.5-1062.61im 1000.5+700.615im 0.0+0.0im -1138.5+354.204im 2271.0+0.0im -1138.5-354.204im @@ -46,14 +46,14 @@ julia> x = [1 20; 4 500; -7 821; -2 2] -2 2 julia> ffts(x) -4×2 ShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}: +4×2 MutableShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}: -347.0+0.0im 331.0+0.0im 809.0-492.0im -793.0+504.0im -1347.0+0.0im 1339.0+0.0im 809.0+492.0im -793.0-504.0im julia> select_region_ft(ffts(x), (5,3)) -5×3 FourierTools.FourierSplit{ComplexF64, 2, FourierTools.FourierSplit{ComplexF64, 2, PaddedViews.PaddedView{ComplexF64, 2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, OffsetArrays.OffsetMatrix{ComplexF64, ShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}}}}}: +5×3 FourierTools.FourierSplit{ComplexF64, 2, FourierTools.FourierSplit{ComplexF64, 2, PaddedViews.PaddedView{ComplexF64, 2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, OffsetArrays.OffsetMatrix{ComplexF64, MutableShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}}}}}: -86.75+0.0im 165.5+0.0im -86.75+0.0im 404.5-246.0im -793.0+504.0im 404.5-246.0im -673.5+0.0im 1339.0+0.0im -673.5+0.0im @@ -87,53 +87,19 @@ The size of the corresponding real-space array view after the operation finished is always assumed to align before and after the padding aperation. """ function select_region_rft(mat, old_size, new_size) - # rft_old_size = size(mat) rft_new_size = Base.setindex(new_size,new_size[1] ÷ 2 + 1, 1) return rft_fix_after(rft_pad( rft_fix_before(mat, old_size, new_size), rft_new_size), old_size, new_size) end -""" - select_region(mat; new_size) - -performs the necessary Fourier-space operations of resampling -in the space of ft (meaning the already circshifted version of fft). - -`new_size`. -The size of the array view after the operation finished. - -`center`. -Specifies the center of the new view in coordinates of the old view. By default an alignment of the Fourier-centers is assumed. -# Examples -```jldoctest -julia> using FFTW, FourierTools - -julia> select_region(ones(3,3),new_size=(7,7),center=(1,3)) -7×7 PaddedView(0.0, OffsetArray(::Matrix{Float64}, 4:6, 2:4), (Base.OneTo(7), Base.OneTo(7))) with eltype Float64: - 0.0 0.0 0.0 0.0 0.0 0.0 0.0 - 0.0 0.0 0.0 0.0 0.0 0.0 0.0 - 0.0 0.0 0.0 0.0 0.0 0.0 0.0 - 0.0 1.0 1.0 1.0 0.0 0.0 0.0 - 0.0 1.0 1.0 1.0 0.0 0.0 0.0 - 0.0 1.0 1.0 1.0 0.0 0.0 0.0 - 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -``` -""" -function select_region(mat; new_size=size(mat), center=ft_center_diff(size(mat)).+1, pad_value=zero(eltype(mat))) - new_size = Tuple(expand_size(new_size, size(mat))) - center = Tuple(expand_size(center, ft_center_diff(size(mat)) .+ 1)) - oldcenter = ft_center_diff(new_size) .+ 1 - PaddedView(pad_value, mat, new_size, oldcenter .- center.+1); -end - function ft_pad(mat, new_size) - return select_region(mat; new_size = new_size) + return select_region(optional_collect(mat); new_size = new_size) end function rft_pad(mat, new_size) c2 = rft_center_diff(size(mat)) c2 = Base.setindex(c2, new_size[1] .÷ 2, 1); - return select_region(mat; new_size=new_size, center = c2 .+ 1) + return select_region(optional_collect(mat); new_size=new_size, center = c2 .+ 1) end """ @@ -147,7 +113,7 @@ function ft_fix_before(mat::MT, size_old, size_new, ::Val{N})::FourierJoin{T,N,M so = size_old[N] do_join = (sn < so && iseven(sn)) L1 = (size_old[N] - size_new[N] ) ÷ 2 + 1 - return FourierJoin(mat, N, L1, do_join) + return FourierJoin(mat, Val(N), L1, do_join) end """ @@ -162,20 +128,21 @@ function ft_fix_before(mat::MT, size_old, size_new, ::Val{D}=Val(1)) where {D, so = size_old[D] do_join = (sn < so && iseven(sn)) L1 = (size_old[D] - size_new[D] )÷2 +1 - mat = FourierJoin(mat, D, L1, do_join) + mat = FourierJoin(mat, Val(D), L1, do_join) return ft_fix_before(mat, size_old, size_new, Val(D + 1)) else L1 = (size_old[N] -size_new[N] )÷2 +1 - return FourierJoin(mat, N, L1, false) + return FourierJoin(mat, Val(N), L1, false) end end -function ft_fix_after(mat::MT, size_old, size_new, ::Val{N})::FourierSplit{T,N,MT} where {T, N, MT<:AbstractArray{T,N}} +# routine only for the last dimensions N == D +function ft_fix_after(mat::MT, size_old, size_new, ::Val{N})::FourierSplit{T,N,MT,N} where {T, N, MT<:AbstractArray{T,N}} sn = size_new[N] so = size_old[N] do_split = (sn > so && iseven(so)) L1 = (size_new[N] - size_old[N]) ÷ 2 + 1 - return FourierSplit(mat, N, L1, do_split) + return FourierSplit(mat, Val(N), L1, do_split) end function ft_fix_after(mat::MT, size_old, size_new, ::Val{D}=Val(1)) where {D, T, N, MT<:AbstractArray{T,N}} @@ -184,49 +151,49 @@ function ft_fix_after(mat::MT, size_old, size_new, ::Val{D}=Val(1)) where {D, T, so = size_old[D] do_split = (sn > so && iseven(so)) L1 = (size_new[D]-size_old[D])÷2+1 - mat = FourierSplit(mat, D, L1, do_split) + mat = FourierSplit(mat, Val(D), L1, do_split) return ft_fix_after(mat, size_old, size_new, Val(D + 1)) else L1 = (size_new[N]-size_old[N])÷2+1 - return FourierSplit(mat, N, L1, false) + return FourierSplit(mat, Val(N), L1, false) end end -function rft_fix_first_dim_before(mat, size_old, size_new; dim=1) +function rft_fix_first_dim_before(mat, size_old, size_new; dim::Val{D}=Val(1)) where {D} # Note that this dim is the corresponding real-space size - sn = size_new[dim] - so = size_old[dim] + sn = size_new[D] + so = size_old[D] # result size is even upon cropping do_join = (sn < so && iseven(sn)) - L1 = size_new[dim] ÷ 2 + 1 + L1 = size_new[D] ÷ 2 + 1 # a hack to dublicate the value - mat = FourierJoin(mat, dim, L1, L1, do_join) + mat = FourierJoin(mat, Val(D), L1, L1, do_join) return mat end -function rft_fix_first_dim_after(mat,size_old,size_new;dim=1) +function rft_fix_first_dim_after(mat,size_old,size_new; dim::Val{D}=Val(1)) where {D} # Note that this dim is the corresponding real-space size - sn = size_new[dim] - so = size_old[dim] + sn = size_new[D] + so = size_old[D] # source size is even upon padding do_split = (sn > so && iseven(so)) - L1 = size_old[dim] ÷ 2 + 1 + L1 = size_old[D] ÷ 2 + 1 # This hack prevents a second position to be affected - mat = FourierSplit(mat, dim, L1, -1, do_split) + mat = FourierSplit(mat, Val(D), L1, -1, do_split) # if equal do nothing return mat end function rft_fix_before(mat,size_old,size_new) # ignore the first dimension - mat=rft_fix_first_dim_before(mat,size_old,size_new;dim=1) + mat=rft_fix_first_dim_before(mat,size_old,size_new; dim=Val(1)) # ignore the first dimension since it starts at Val(2) ft_fix_before(mat, size_old, size_new, Val(2)) end -function rft_fix_after(mat,size_old,size_new) +function rft_fix_after(mat, size_old, size_new) # ignore the first dimension - mat = rft_fix_first_dim_after(mat,size_old,size_new;dim=1) + mat = rft_fix_first_dim_after(mat, size_old, size_new; dim=Val(1)) # ignore the first dimension since it starts at Val(2) ft_fix_after(mat, size_old, size_new, Val(2)) end diff --git a/src/fourier_rotate.jl b/src/fourier_rotate.jl index 625b7f0..f5f0595 100644 --- a/src/fourier_rotate.jl +++ b/src/fourier_rotate.jl @@ -22,11 +22,13 @@ function rotate(arr, θ, rotation_plane=(1, 2); adapt_size=true, keep_new_size=f a,b = rotation_plane old_size = size(arr) + pad_value = eltype(arr)(pad_value) + # enforce an odd size along these dimensions, to simplify the potential flips below. arr = let if iseven(size(arr,a)) || iseven(size(arr,b)) new_size = size(arr) .+ ntuple(i-> (i==a || i==b) ? iseven(size(arr,i)) : 0, ndims(arr)) - select_region(arr, new_size=new_size, pad_value=pad_value) + select_region_view(arr, new_size=new_size, pad_value=pad_value) else arr end @@ -53,6 +55,8 @@ function rotate(arr, θ, rotation_plane=(1, 2); adapt_size=true, keep_new_size=f 0 end end + + # ToDo: This can be select_region_view for better performance, but should be benchmarked first arr = select_region(arr, new_size=old_size .+ extra_size, pad_value=pad_value) # convert to radiants @@ -67,7 +71,7 @@ function rotate(arr, θ, rotation_plane=(1, 2); adapt_size=true, keep_new_size=f if keep_new_size || size(arr) == old_size return arr else - return select_region(arr, new_size=old_size, pad_value=pad_value) + return select_region_view(arr, new_size=old_size, pad_value=pad_value) end else return rotate!(copy(arr), θ, rotation_plane) diff --git a/src/fourier_shear.jl b/src/fourier_shear.jl index 23e9435..978d0c7 100644 --- a/src/fourier_shear.jl +++ b/src/fourier_shear.jl @@ -124,7 +124,8 @@ function apply_shift_strength!(arr::TA, arr_orig, shift, shear_dir_dim, shear_di r = real.(view(e, inds...)) if fix_nyquist inv_r = 1 ./ r - inv_r = map(x -> (isinf(x) ? 0 : x), inv_r) + # inv_r = map(x -> (isinf(x) ? 0 : x), inv_r) # NOT GPU compatible + inv_r[isinf.(inv_r)] .= 0 e[inds...] .= inv_r else e[inds...] .= r diff --git a/src/fourier_shifting.jl b/src/fourier_shifting.jl index 26a9c67..03182e3 100644 --- a/src/fourier_shifting.jl +++ b/src/fourier_shifting.jl @@ -7,7 +7,7 @@ export shift, shift! Shifts an array in-place. For real arrays it is based on `rfft`. For complex arrays based on `fft`. `shifts` can be non-integer, for integer shifts one should prefer -`circshift` or `ShiftedArrays.circshift` because a FFT-based methods +`circshift` or `MutableShiftedArrays.circshift` because a FFT-based methods introduces numerical errors. ## kwargs... @@ -70,20 +70,23 @@ function shift(arr, shifts; soft_fraction=0, fix_nyquist_frequency=false, take_r return shift!(copy(arr), shifts; soft_fraction=soft_fraction, fix_nyquist_frequency=fix_nyquist_frequency, take_real=take_real) end -function soft_shift(freqs, shift, fraction=eltype(freqs)(0.1); corner=false) - rounded_shift = round.(shift); +function soft_shift(freqs, myshift, fraction=eltype(freqs)(0.1); corner=false) + rounded_shift = round.(myshift); if corner - w = window_half_cos(size(freqs),border_in=2.0-2*fraction, border_out=2.0, offset=CtrCorner) + w = similar(freqs) # to also work with CuArray + w .= window_half_cos(size(freqs),border_in=2.0-2*fraction, border_out=2.0, offset=CtrCorner) else - w = ifftshift_view(window_half_cos(size(freqs),border_in=1.0-fraction, border_out=1.0)) + w = similar(freqs) # to also work with CuArray + w .= window_half_cos(size(freqs),border_in=1.0-fraction, border_out=1.0) + w = ifftshift_view(w) end - return cispi.(-freqs .* 2 .* (w .* shift + (1.0 .-w).* rounded_shift)) + return cispi.(-freqs .* 2 .* (w .* myshift + (1 .-w).* rounded_shift)) end function shift_by_1D_FT!(arr::TA, shifts; soft_fraction=0, take_real=false, fix_nyquist_frequency=false) where {N, TA<:AbstractArray{<:Complex, N}} # iterates of the dimension d using the corresponding shift - for (d, shift) in pairs(shifts) - if iszero(shift) + for (d, myshift) in pairs(shifts) + if iszero(myshift) continue end # better use reorient from NDTools here? @@ -95,18 +98,20 @@ function shift_by_1D_FT!(arr::TA, shifts; soft_fraction=0, take_real=false, fix_ # @show size(freqs) # allocates a 1D slice of exp values if iszero(soft_fraction) - ϕ = cispi.(- freqs .* 2 .* shift) + ϕ = cispi.(- freqs .* 2 .* myshift) else - ϕ = soft_shift(freqs, shift, soft_fraction) + ϕ = soft_shift(freqs, myshift, soft_fraction) end # ϕ = exp_ikx_sep(complex_arr_type(TA), size(arr), dims=(d,), shift_by = shift)[1] # in even case, set one value to real if iseven(size(arr, d)) s = size(arr, d) ÷ 2 + 1 - ϕ[s] = take_real ? real(ϕ[s]) : ϕ[s] - invr = 1 / ϕ[s] - invr = isinf(invr) ? 0 : invr - ϕ[s] = fix_nyquist_frequency ? invr : ϕ[s] + ϕ_val = Array(ϕ[s:s])[1] # to work with CuArray without @allowscalar + ϕ_val = take_real ? real(ϕ_val) : ϕ_val + invr = 1 / ϕ_val + invr = isinf.(invr) ? 0 : invr + ϕ_val = fix_nyquist_frequency ? invr : ϕ_val + ϕ[s:s] .= ϕ_val # to work with CuArray without @allowscalar end # go to fourier space and apply ϕ fft!(arr, d) @@ -133,8 +138,8 @@ end # rfft(x, 1) -> exp shift -> fft(x, 2) -> exp shift -> fft(x, 3) -> exp shift -> ifft(x, [2,3]) -> irfft(x, 1) # So once we did a rft to shift something we can call the routine for complex arrays to shift function shift_by_1D_RFT!(arr::TA, shifts; soft_fraction=0, fix_nyquist_frequency=false, take_real=true) where {N, TA<:AbstractArray{<:Real, N}} - for (d, shift) in pairs(shifts) - if iszero(shift) + for (d, myshift) in pairs(shifts) + if iszero(myshift) continue end @@ -151,16 +156,19 @@ function shift_by_1D_RFT!(arr::TA, shifts; soft_fraction=0, fix_nyquist_frequenc # freqs = TR(reorient(fftfreq(size(arr, d))[1:s], d, Val(N))) freqs .= reorient(rfftfreq(size(arr, d)), d, Val(N)) if iszero(soft_fraction) - ϕ = cispi.(-freqs .* 2 .* shift) + ϕ = cispi.(-freqs .* 2 .* myshift) else - ϕ = soft_shift(freqs, shift, soft_fraction, corner=true) + ϕ = soft_shift(freqs, myshift, soft_fraction, corner=true) end if iseven(size(arr, d)) # take real and maybe fix nyquist frequency - ϕ[s] = take_real ? real(ϕ[s]) : ϕ[s] - invr = 1 / ϕ[s] - invr = isinf(invr) ? 0 : invr - ϕ[s] = fix_nyquist_frequency ? invr : ϕ[s] + s = size(arr, d) ÷ 2 + 1 + ϕ_val = Array(ϕ[s:s])[1] # to work with CuArray without @allowscalar + ϕ_val = take_real ? real(ϕ_val) : ϕ_val + invr = 1 / ϕ_val + invr = isinf.(invr) ? 0 : invr + ϕ_val = fix_nyquist_frequency ? invr : ϕ_val + ϕ[s:s] .= ϕ_val # to work with CuArray without @allowscalar end arr_ft .*= ϕ # since we now did a single rfft dim, we can switch to the complex routine diff --git a/src/resampling.jl b/src/resampling.jl index 3be1e8b..66abd83 100644 --- a/src/resampling.jl +++ b/src/resampling.jl @@ -114,7 +114,8 @@ function upsample2_1D(mat::AbstractArray{T, N}, dim=1, fix_center=false, keep_si return mat end newsize = Tuple((d==dim) ? 2*size(mat,d) : size(mat,d) for d in 1:N) - res = zeros(eltype(mat), newsize) + res = similar_zeros(mat, newsize) + # res = zeros(eltype(mat), newsize) if fix_center && isodd(size(mat,dim)) selectdim(res,dim,2:2:size(res,dim)) .= mat shifts = Tuple((d==dim) ? 0.5 : 0.0 for d in 1:N) diff --git a/src/utils.jl b/src/utils.jl index d444e9d..f3fd155 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -3,6 +3,54 @@ export expanddims, fourierspace_pixelsize, realspace_pixelsize export δ export fourier_reverse! +""" + bc_size(arr1, arr2) + +Calculates the size of the broadcasted array of `arr1` and `arr2`. + +# Arguments +- `arr1`: first array +- `arr2`: second array + +# Examples +```jldoctest +julia> FourierTools.bc_size(rand(5, 2, 3), rand(1, 2)) +(5, 2, 3) +``` +""" +function bc_size(arr1, arr2) + md = max(ndims(arr1), ndims(arr2)) + return ntuple((d) -> max(size(arr1, d), size(arr2, d)), md) +end + +""" + similar_zeros(arr::AbstractArray, sz::NTuple) + +Creates a similar array to `arr` with zeros. This is useful to also support CuArrays. +There are specializations for `Array` and `CuArray` which use the original `zeros` function. + +# parameters +- `arr`: array to copy the type and size from +- `sz`: size of the new array. Default is the size of `arr`. + +# Examples +```jldoctest +julia> FourierTools.similar_zeros([1, 2, 3], (3,)) +3-element Vector{Int64}: + 0 + 0 + 0 +``` +""" +function similar_zeros(arr::AbstractArray, sz::NTuple=size(arr)) + res = similar(arr, sz) + fill!(res, zero(eltype(res))) + return res +end + +function similar_zeros(arr::Array, sz::NTuple=size(arr)) + zeros(eltype(arr), sz) +end #get_RFT_scale(real_size) = 0.5 ./ (max.(real_size ./ 2, 1)) # The same as the FFT scale but for the full array in real space! @@ -221,15 +269,12 @@ function rfft_size(size, dims) Base.setindex(size, size[dim] ÷ 2 + 1, dim) end - - - """ get_indices_around_center(i_in, i_out) A function which provides two output indices `i1` and `i2` where `i2 - i1 = i_out` -The indices are chosen in a way that the set `i1:i2` +The indices are chosen such that the set `i1:i2` cuts the interval `1:i_in` in a way that the center frequency stays at the center position. Works for both odd and even indices @@ -248,6 +293,22 @@ function get_indices_around_center(i_in, i_out) end end +""" + get_indexrange_around_center(arr_1, arr_2) + +A function which provides a range of output indices `i1:i2` +where `i2 - i1 = i_out` +The indices are chosen in a way that the set `i1:i2` +cuts the interval `1:i_in` such that the center frequency +stays at the center position. +Works for both odd and even indices +""" +function get_indexrange_around_center(arr_1, arr_2) + sz1 = size(arr_1) + sz2 = size(arr_2) + all_rng = ntuple((d) -> begin a,b = get_indices_around_center(sz1[d], sz2[d]); a:b end, ndims(arr_1)) + return all_rng +end """ center_extract(arr, new_size_array) @@ -311,14 +372,7 @@ julia> FourierTools.center_set!([1, 1, 1, 1, 1, 1], [5, 5, 5]) ``` """ function center_set!(arr_large, arr_small) - out_is = [] - for i = 1:ndims(arr_large) - a, b = get_indices_around_center(size(arr_large)[i], size(arr_small)[i]) - push!(out_is, a:b) - end - - #rest = ones(Int, ndims(arr_large) - 3) - arr_large[out_is...] = arr_small + arr_large[get_indexrange_around_center(arr_large, arr_small)...] = arr_small return arr_large end diff --git a/test/circ_shifted_arrays.jl b/test/circ_shifted_arrays.jl new file mode 100644 index 0000000..81567d3 --- /dev/null +++ b/test/circ_shifted_arrays.jl @@ -0,0 +1,41 @@ +@testset "Convolution methods" begin + # a = reshape(1:1000000,(1000,1000)) .+ 0 + # CUDA.allowscalar(false); + sz = (15,12) + myshift = (4,3) + a = reshape(1:prod(sz),sz) .+ 0 + c = CircShiftedArray(a,myshift); + b = copy(a) + d = c .+ c; + + @test (c == c .+0) + + ca = circshift(a, myshift) + # they are not the same but numerically the same: + @test (c != ca) + @test (collect(c) == ca) + + # adding a constant does not change the type + @test typeof(c) == typeof(c .+ 0) + # adding another CSA does not change the type + b .= c + @test b == collect(c) + cc = CircShiftedArray(c,.-myshift) + @test a == collect(cc) + + # assignment into a CSA + d .= a + @test d[1,1] == a[1,1] + @test collect(d) == a + + + # try a complicated broadcast expression + @test ca.+ 2 .* sin.(ca) == collect(c.+2 .*sin.(c)) + + #@run foo(c) + @test sum(a, dims=1) != collect(sum(c,dims=1)) + @test sum(ca,dims=1) == collect(sum(c,dims=1)) + @test sum(a, dims=2) != collect(sum(c,dims=2)) + @test sum(ca,dims=2) == collect(sum(c,dims=2)) + +end \ No newline at end of file diff --git a/test/convolutions.jl b/test/convolutions.jl index 1018eb3..ab89167 100644 --- a/test/convolutions.jl +++ b/test/convolutions.jl @@ -5,11 +5,12 @@ function conv_test(psf, img, img_out, dims, s) otf = fft(psf, dims) otf_r = rfft(psf, dims) - otf_p, conv_p = plan_conv(img, psf, dims, flags=FFTW.ESTIMATE) + # otf_p, conv_p = plan_conv(img, psf, dims, flags=FFTW.ESTIMATE) + otf_p, conv_p = plan_conv(img, psf, dims) otf_p2, conv_p2 = plan_conv(img .+ 0.0im, 0.0im .+ psf, dims) otf_p3, conv_p3 = plan_conv_psf(img, fftshift(psf,dims), dims) - otf_p3, conv_p3 = plan_conv_psf(img, fftshift(psf,dims), dims, flags=FFTW.MEASURE) - otf_p4, conv_p4 = plan_conv_psf_buffer(img, fftshift(psf,dims), dims, flags=FFTW.MEASURE) + # otf_p3, conv_p3 = plan_conv_psf(img, fftshift(psf,dims), dims, flags=FFTW.MEASURE) + otf_p4, conv_p4 = plan_conv_psf_buffer(img, fftshift(psf,dims), dims) # , flags=FFTW.MEASURE @testset "$s" begin @test img_out ≈ conv(0.0im .+ img, psf, dims) @test img_out ≈ conv(img, psf, dims) @@ -27,45 +28,51 @@ N = 5 psf = zeros((N, N)) psf[1, 1] = 1 - img = randn((N, N)) + psf = opt_cu(psf, use_cuda) + img = opt_cu(randn((N, N)), use_cuda) conv_test(psf, img, img, [1,2], "Convolution random image with delta peak") N = 5 psf = zeros((N, N)) psf[1, 1] = 1 - img = randn((N, N, N)) + psf = opt_cu(psf, use_cuda) + img = opt_cu(randn((N, N, N)), use_cuda) conv_test(psf, img, img, [1,2], "Convolution with different dimensions psf, img delta") N = 5 - psf = abs.(randn((N, N, 2))) - img = randn((N, N, 2)) + psf = opt_cu(abs.(randn((N, N, 2))), use_cuda) + img = opt_cu(randn((N, N, 2)), use_cuda) dims = [1, 2] img_out = conv_gen(img, psf, dims) conv_test(psf, img, img_out, dims, "Convolution with random 3D PSF and random 3D image over 2D dimensions") - - N = 5 - psf = abs.(randn((N, N, N, N, N))) - img = randn((N, N, N, N, N)) - dims = [1, 2, 3, 4] - img_out = conv_gen(img, psf, dims) - conv_test(psf, img, img_out, dims, "Convolution with random 5D PSF and random 5D image over 4 Dimensions") - N = 5 - psf = abs.(zeros((N, N, N, N, N))) - for i = 1:N - psf[1,1,1,1, i] = 1 + # Cuda has problems with >3D FFTs + if (!use_cuda) + N = 5 + psf = opt_cu(abs.(randn((N, N, N, N, N))), use_cuda) + img = opt_cu(randn((N, N, N, N, N)), use_cuda) + dims = [1, 2, 3, 4] + img_out = conv_gen(img, psf, dims) + conv_test(psf, img, img_out, dims, "Convolution with random 5D PSF and random 5D image over 4 Dimensions") + + N = 5 + psf = abs.(zeros((N, N, N, N, N))) + for i = 1:N + psf[1,1,1,1, i] = 1 + end + opt_cu(psf, use_cuda) + img = opt_cu(randn((N, N, N, N, N)), use_cuda) + dims = [1, 2, 3, 4] + img_out = conv_gen(img, psf, dims) + conv_test(psf, img, img, dims, "Convolution with 5D delta peak and random 5D image over 4 Dimensions") end - img = randn((N, N, N, N, N)) - dims = [1, 2, 3, 4] - img_out = conv_gen(img, psf, dims) - conv_test(psf, img, img, dims, "Convolution with 5D delta peak and random 5D image over 4 Dimensions") @testset "Check broadcasting convolution" begin - img = randn((5,6,7)) - psf = randn((5,6,7, 2, 3)) + img = opt_cu(randn((5,6,7)), use_cuda) + psf = opt_cu(randn((5,6,7, 2, 3)), use_cuda) _, p = plan_conv_buffer(img, psf) @test conv(img, psf) ≈ p(img) end @@ -73,8 +80,8 @@ @testset "Check types" begin N = 10 - img = randn(Float32, (N, N)) - psf = abs.(randn(Float32, (N, N))) + img = opt_cu(randn(Float32, (N, N)), use_cuda) + psf = opt_cu(abs.(randn(Float32, (N, N))), use_cuda) dims = [1, 2] @test typeof(conv_gen(img, psf, dims)) == typeof(conv(img, psf)) @test typeof(conv_gen(img, psf, dims)) != typeof(conv(img .+ 0f0im, psf)) @@ -89,21 +96,23 @@ @testset "dims argument nothing" begin N = 5 - psf = abs.(randn((N, N, N, N, N))) - img = randn((N, N, N, N, N)) - dims = [1,2,3,4,5] + psf = opt_cu(abs.(randn((N, N, N))), use_cuda) + img = opt_cu(randn((N, N, N)), use_cuda) + dims = [1,2,3] @test conv(psf, img) ≈ conv(img, psf, dims) @test conv(psf, img) ≈ conv(psf, img, dims) @test conv(img, psf) ≈ conv(img, psf, dims) end - @testset "adjoint convolution" begin - x = randn(ComplexF32, (5,6)) - y = randn(ComplexF32, (5,6)) + if (!use_cuda) + @testset "adjoint convolution" begin + x = opt_cu(randn(ComplexF32, (5,6)), use_cuda) + y = opt_cu( randn(ComplexF32, (5,6)), use_cuda) - y_ft, p = plan_conv(x, y) - @test ≈(exp(1im * 1.23) .+ conv(ones(eltype(y), size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(conv(x, y))), x)[1], rtol=1e-4) - @test ≈(exp(1im * 1.23) .+ conv(ones(ComplexF32, size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(p(x, y_ft))), x)[1], rtol=1e-4) + y_ft, p = plan_conv(x, y) + @test ≈(exp(1im * 1.23) .+ conv(ones(eltype(y), size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(conv(x, y))), x)[1], rtol=1e-4) + @test ≈(exp(1im * 1.23) .+ conv(ones(ComplexF32, size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(p(x, y_ft))), x)[1], rtol=1e-4) + end end end diff --git a/test/correlations.jl b/test/correlations.jl index 609b439..9f26935 100644 --- a/test/correlations.jl +++ b/test/correlations.jl @@ -1,14 +1,12 @@ - - @testset "Correlations methods" begin - @test ccorr([1, 0], [1, 0], centered = true) == [0.0, 1.0] - @test ccorr([1, 0], [1, 0]) == [1.0, 0.0] + @test ccorr(opt_cu([1, 0], use_cuda), opt_cu([1, 0], use_cuda), centered = true) == opt_cu([0.0, 1.0], use_cuda) + @test ccorr(opt_cu([1, 0], use_cuda), opt_cu([1, 0], use_cuda)) == opt_cu([1.0, 0.0], use_cuda) - x = [1,2,3,4,5] - y = [1,2,3,4,5] - @test ccorr(x,y) ≈ [55, 45, 40, 40, 45] - @test ccorr(x,y, centered=true) ≈ [40, 45, 55, 45, 40] + x = opt_cu([1,2,3,4,5], use_cuda) + y = opt_cu([1,2,3,4,5], use_cuda) + @test ccorr(x,y) ≈ opt_cu([55, 45, 40, 40, 45], use_cuda) + @test ccorr(x,y, centered=true) ≈ opt_cu([40, 45, 55, 45, 40], use_cuda) - @test ccorr(x, x .* (1im)) == ComplexF64[0.0 - 55.0im, 0.0 - 45.0im, 0.0 - 40.0im, 0.0 - 40.0im, 0.0 - 45.0im] + @test ccorr(x, x .* (1im)) ≈ opt_cu(ComplexF64[0.0 - 55.0im, 0.0 - 45.0im, 0.0 - 40.0im, 0.0 - 40.0im, 0.0 - 45.0im], use_cuda) end diff --git a/test/custom_fourier_types.jl b/test/custom_fourier_types.jl index d735c27..59c6121 100644 --- a/test/custom_fourier_types.jl +++ b/test/custom_fourier_types.jl @@ -1,19 +1,30 @@ @testset "Custom Fourier Types" begin N = 5 - x = randn((N, N)) - fs = FourierTools.FourierSplit(x, 2, 2, 4, true) + x = opt_cu(randn((N, N)), use_cuda) + fs = FourierTools.FourierSplit(x, Val(2), 2, 4, true) @test FourierTools.parenttype(fs) == typeof(x) - fs = FourierTools.FourierSplit(x, 2, 2, 4, false) + @test fs[1,1] == fs[1] + @test fs[1,2] == fs[6] + fs = FourierTools.FourierSplit(x, Val(2), 2, 4, false) @test FourierTools.parenttype(fs) == typeof(x) - fj = FourierTools.FourierJoin(x, 2, 2, 4, true) + fj = FourierTools.FourierJoin(x, Val(2), 2, 4, true) @test FourierTools.parenttype(fj) == typeof(x) + @test fj[1,1] == fj[1] + @test fj[1,2] == fj[6] - fj = FourierTools.FourierJoin(x, 2, 2, 4, false) + fj = FourierTools.FourierJoin(x, Val(2), 2, 4, false) @test FourierTools.parenttype(fj) == typeof(x) @test FourierTools.parenttype(typeof(fj)) == typeof(x) @test FourierTools.IndexStyle(typeof(fj)) == IndexStyle(typeof(fj)) + + x = opt_cu(ones((4, 7)), use_cuda) + fs = FourierTools.FourierSplit(x, Val(2), 2, 4, true) + fj = FourierTools.FourierJoin(x, Val(2), 2, 4, true) + @test all(fs[:,2] .== 0.5) + @test all(fj[:,2] .== 2) + end diff --git a/test/czt.jl b/test/czt.jl index 3ec04bf..23a157a 100644 --- a/test/czt.jl +++ b/test/czt.jl @@ -2,16 +2,18 @@ using NDTools # this is needed for the select_region! function below. @testset "chirp z-transformation" begin @testset "czt" begin - x = randn(ComplexF32, (5,6,7)) + x = opt_cu(randn(ComplexF32, (5,6,7)), use_cuda) @test eltype(czt(x, (2.0,2.0,2.0))) == ComplexF32 @test eltype(czt(x, (2f0,2f0,2f0))) == ComplexF32 - @test ≈(czt(x, (1.0,1.0,1.0), (1,3)), ft(x, (1,3)), rtol=1e-5) + # @test ≈(czt(x, (1.0,1.0,1.0), (1,3)), ft(x, (1,3)), rtol=1e-5) + @test ≈(czt(x, (1.0,1.0,1.0), (1,3)), ft(x, (1,3)), atol=1e-4) @test ≈(czt(x, (1.0,1.0,1.0), (1,3), src_center=(1,1,1), dst_center=(1,1,1)), fft(x, (1,3)), rtol=1e-5) @test ≈(iczt(x, (1.0,1.0,1.0), (1,3), src_center=(1,1,1), dst_center=(1,1,1)), ifft(x, (1,3)), rtol=1e-5) y = randn(ComplexF32, (5,6)) zoom = (1.0,1.0,1.0) - @test ≈(czt(x, zoom), ft(x), rtol=1e-4) + # @test ≈(czt(x, zoom), ft(x), rtol=1e-4) + @test ≈(czt(x, zoom), ft(x), atol=1e-4) @test ≈(czt(y, (1.0,1.0)), ft(y), rtol=1e-5) @test ≈(iczt(czt(y, (1.0,1.0)), (1.0,1.0)), y, rtol=1e-5) diff --git a/test/fft_helpers.jl b/test/fft_helpers.jl index badff06..1b06152 100644 --- a/test/fft_helpers.jl +++ b/test/fft_helpers.jl @@ -1,8 +1,8 @@ @testset "test fft_helpers" begin @testset "Optional collect" begin - y = [1,2,3] - x = fftshift_view(y, (1)) + y = opt_cu([1,2,3],use_cuda) + x = fftshift_view(y, (1)); @test fftshift(y) == FourierTools.optional_collect(x) end @@ -14,10 +14,11 @@ testiffts(arr, dims) = @test(iffts(arr, dims) ≈ ifft(ifftshift(arr, dims), dims)) testrft(arr, dims) = @test(rffts(arr, dims) ≈ fftshift(rfft(arr, dims), dims[2:end])) testirft(arr, dims, d) = @test(irffts(arr, d, dims) ≈ irfft(ifftshift(arr, dims[2:end]), d, dims)) - for dim = 1:4 + maxdim = ifelse(use_cuda, 3, 4) + for dim = 1:maxdim for _ in 1:3 s = ntuple(_ -> rand(1:13), dim) - arr = randn(ComplexF32, s) + arr = opt_cu(randn(ComplexF32, s), use_cuda) dims = 1:dim testft(arr, dims) testift(arr, dims) @@ -26,14 +27,12 @@ testift(arr, dims) testffts(arr, dims) testiffts(arr, dims) - end end end - @testset "Test 2d fft helpers" begin - arr = randn((6,7,8)) + arr = opt_cu(randn((6,7,8)), use_cuda) dims = [1,2] d = 6 @test(ft2d(arr) == fftshift(fft(ifftshift(arr, (1,2)), (1,2)), dims)) @@ -50,34 +49,36 @@ @test(fftshift2d_view(arr) == fftshift_view(arr, (1,2))) @test(ifftshift2d_view(arr) == ifftshift_view(arr, (1,2))) - arr = randn(ComplexF32, (4,7,8)) + arr = opt_cu(randn(ComplexF32, (4,7,8)), use_cuda) @test(irffts2d(arr, d) == irfft(ifftshift(arr, dims[2:2]), d, (1,2))) @test(irft2d(arr, d) == irft(arr, d, (1,2))) @test(irfft2d(arr, d) == irfft(arr, d, (1,2))) end - @testset "Test ft, ift, rft and irft real space centering" begin + atol = 1e-6 szs = ((10,10),(11,10),(100,101),(101,101)) for sz in szs - @test ft(ones(sz)) ≈ prod(sz) .* delta(sz) - @test ft(delta(sz)) ≈ ones(sz) - @test rft(ones(sz)) ≈ prod(sz) .* delta(rft_size(sz), offset=CtrRFT) - @test rft(delta(sz)) ≈ ones(rft_size(sz)) - @test ift(ones(sz)) ≈ delta(sz) - @test ift(delta(sz)) ≈ ones(sz) ./ prod(sz) - @test irft(ones(rft_size(sz)),sz[1]) ≈ delta(sz) - @test irft(delta(rft_size(sz),offset=CtrRFT),sz[1]) ≈ ones(sz) ./ prod(sz) + my_ones = opt_cu(ones(sz), use_cuda) + my_delta = opt_cu(collect(delta(sz)), use_cuda) + @test isapprox(ft(my_ones), prod(sz) .* my_delta, atol=atol) + @test isapprox(ft(my_delta), my_ones, atol=atol) + @test isapprox(rft(my_ones), prod(sz) .* opt_cu(delta(rft_size(sz), offset=CtrRFT), use_cuda), atol=atol) + @test isapprox(rft(my_delta), opt_cu(ones(rft_size(sz)), use_cuda), atol=atol) + @test isapprox(ift(my_ones), my_delta, atol=atol) + @test isapprox(ift(my_delta), my_ones ./ prod(sz), atol=atol) + # needing to specify Complex datatype. Is a CUDA bug for irfft (!!!) + @test isapprox(irft(opt_cu(ones(ComplexF64, rft_size(sz)), use_cuda), sz[1]), opt_cu(my_delta, use_cuda), atol=atol) + @test isapprox(irft(opt_cu(collect(delta(ComplexF64, rft_size(sz), offset=CtrRFT)), use_cuda), sz[1]), opt_cu(my_ones ./ prod(sz), use_cuda), atol=atol) end end - @testset "Test in place methods" begin - x = randn(ComplexF32, (5,3,10)) + atol = 1e-6 + x = opt_cu(randn(ComplexF32, (5,3,10)), use_cuda) dims = (1,2) - @test fftshift(fft(x, dims), dims) ≈ ffts!(copy(x), dims) - @test ffts2d!(copy(x)) ≈ ffts!(copy(x), (1,2)) + @test isapprox(fftshift(fft(x, dims), dims), ffts!(copy(x), dims), atol=atol) + @test isapprox(ffts2d!(copy(x)), ffts!(copy(x), (1,2)), atol=atol) end - end diff --git a/test/fftshift_alternatives.jl b/test/fftshift_alternatives.jl index e5f0f5f..4d37450 100644 --- a/test/fftshift_alternatives.jl +++ b/test/fftshift_alternatives.jl @@ -1,7 +1,7 @@ @testset "fftshift alternatives" begin @testset "Test fftshift_view and ifftshift_view" begin Random.seed!(42) - x = randn((2,1,4,1,6,7,4,7)) + x = opt_cu(randn((2,1,4,1,6,7,4,7)), use_cuda); dims = (4,6,7) @test fftshift(x,dims) == FourierTools.fftshift_view(x, dims) @test ifftshift(x,dims) == FourierTools.ifftshift_view(x, dims) @@ -10,18 +10,18 @@ @test x === FourierTools.optional_collect(ifftshift_view(fftshift_view(x, dims), dims)) @test x === FourierTools.optional_collect(fftshift_view(ifftshift_view(x, dims), dims)) - x = randn((13, 13, 14)) + x = opt_cu(randn((13, 13, 14)), use_cuda); @test fftshift(x) == FourierTools.fftshift_view(x) @test ifftshift(x) == FourierTools.ifftshift_view(x) @test fftshift(x, (2,3)) == FourierTools.fftshift_view(x, (2,3)) @test ifftshift(x, (2,3) ) == FourierTools.ifftshift_view(x, (2,3)) - end end @testset "fftshift and ifftshift in-place" begin function f(arr, dims) + arr = opt_cu(arr, use_cuda) arr3 = copy(arr) @test fftshift(arr, dims) == FourierTools._fftshift!(copy(arr), arr, dims) @test arr3 == arr diff --git a/test/fourier_rotate.jl b/test/fourier_rotate.jl index a431e9f..c75d819 100644 --- a/test/fourier_rotate.jl +++ b/test/fourier_rotate.jl @@ -3,7 +3,7 @@ @testset "Compare with ImageTransformations" begin function f(θ) - x = 1.0 .* range(0.0, 1.0, length=256)' .* range(0.0, 1.0, length=256) + x = opt_cu(1.0 .* range(0.0, 1.0, length=256)' .* range(0.0, 1.0, length=256), use_cuda) f(x) = sin(x * 20) + tan(1.2 * x) + sin(x) + cos(1.1323 * x) * x^3 + x^3 + 0.23 * x^4 + sin(1/(x+0.1)) img = 5 .+ abs.(f.(x)) img ./= maximum(img) @@ -13,25 +13,32 @@ m = sum(img) / length(img) - img_1 = parent(ImageTransformations.imrotate(img, θ; fillvalue = m)) - z = ones(Float32, size(img_1)) + img = opt_cu(img, use_cuda) + img_1 = opt_cu(parent(ImageTransformations.imrotate(collect(img), θ; fillvalue = m)), use_cuda) + z = opt_cu(ones(Float32, size(img_1)), use_cuda) z .*= m FourierTools.center_set!(z, img) - img_2 = FourierTools.rotate(z, θ, pad_value=img_1[1,1]) - img_2b = FourierTools.center_extract(FourierTools.rotate(z, θ, pad_value=img_1[1,1], keep_new_size=true), size(img_2)) - img_3 = real(FourierTools.rotate(z .+ 0im, θ, pad_value=img_1[1,1])) + pad_val = collect(img_1[1:1,1:1])[1] + img_2 = FourierTools.rotate(z, θ, pad_value=pad_val) + img_2b = FourierTools.center_extract(FourierTools.rotate(z, θ, pad_value=pad_val, keep_new_size=true), size(img_2)) + img_3 = real(FourierTools.rotate(z .+ 0im, θ, pad_value=pad_val)) img_4 = FourierTools.rotate!(z, θ) - @test all(.≈(img_1, img_2, rtol=0.6)) - @test ≈(img_1, img_2, rtol=0.03) - @test ≈(img_3, img_2, rtol=0.01) + @test maximum(abs.(img_1 .- img_2)) .< 0.65 + # @test all(.≈(img_1, img_2, rtol=0.65)) # 0.6 + + # There is an issue here! Im-Rotate has a shift wrt. our center of rotation. This leads to 0.5 absolute error!! + # @test ≈(img_1, img_2, rtol=0.05) # 0.03 + + @test ≈(img_3, img_2, atol=0.0001) @test ==(img_4, z) @test ==(img_2, img_2b) img_1c = FourierTools.center_extract(img_1, (100, 100)) img_2c = FourierTools.center_extract(img_2, (100, 100)) - @test all(.≈(img_1c, img_2c, rtol=0.3)) - @test ≈(img_1c, img_2c, rtol=0.05) + # @test all(.≈(img_1c, img_2c, rtol=0.3)) + @test maximum(abs.(img_1c .- img_2c)) .< 0.25 + # @test ≈(img_1c, img_2c, rtol=0.05) # 0.05 end f(deg2rad(-54.31)) diff --git a/test/fourier_shear.jl b/test/fourier_shear.jl index e46dbdd..0567172 100644 --- a/test/fourier_shear.jl +++ b/test/fourier_shear.jl @@ -3,9 +3,9 @@ @testset "Complex and real shear produce similar results" begin function f(a, b, Δ) - x = randn((30, 24, 13)) - xc = 0im .+ x - xc2 = 1im .* x + x = opt_cu(randn((30, 24, 13)), use_cuda); + xc = 0im .+ x; + xc2 = 1im .* x; @test shear(x, Δ, a, b) ≈ real(shear(xc, Δ, a, b)) @test shear(x, Δ, a, b) ≈ imag(shear(xc2, Δ, a, b)) end @@ -18,9 +18,9 @@ @testset "Test that in-place works in-place" begin function f(a, b, Δ) - x = randn((30, 24, 13)) - xc = randn(ComplexF32, (30, 24, 13)) - xc2 = 1im .* x + x = opt_cu(randn((30, 24, 13)), use_cuda); + xc = opt_cu(randn(ComplexF32, (30, 24, 13)), use_cuda); + xc2 = 1im .* x; @test shear!(x, Δ, a, b) ≈ x @test shear!(xc, Δ, a, b) ≈ xc @test shear!(xc2, Δ, a, b) ≈ xc2 @@ -34,13 +34,15 @@ @testset "Fix Nyquist" begin - @test shear(shear([1 2; 3 4.0], 0.123), -0.123, fix_nyquist = true) == [1.0 2.0; 3.0 4.0] - @test shear(shear([1 2; 3 4.0], 0.123), -0.123, fix_nyquist = false) != [1.0 2.0; 3.0 4.0] + dat = opt_cu([1 2; 3 4.0], use_cuda) + res = opt_cu([1.0 2.0; 3.0 4.0], use_cuda) + @test shear(shear(dat, 0.123), -0.123, fix_nyquist = true) == res + @test shear(shear(dat, 0.123), -0.123, fix_nyquist = false) != res end @testset "assign_shear_wrap!" begin - q = ones((10,11)) + q = opt_cu(ones((10,11)), use_cuda); assign_shear_wrap!(q, 10) - @test q[:,1] == [0,0,0,0,0,1,1,1,1,1] + @test q[:,1] == opt_cu([0,0,0,0,0,1,1,1,1,1], use_cuda) end end diff --git a/test/fourier_shifting.jl b/test/fourier_shifting.jl index 12f109f..e033433 100644 --- a/test/fourier_shifting.jl +++ b/test/fourier_shifting.jl @@ -1,20 +1,18 @@ Random.seed!(42) @testset "Fourier shifting methods" begin - # Int error - @test_throws ArgumentError FourierTools.shift([1,2,3], (1,)) - + @test_throws ArgumentError FourierTools.shift(opt_cu([1,2,3], use_cuda), (1,)) @testset "Empty shifts" begin - x = randn(ComplexF32, (11, 12, 13)) + x = opt_cu(randn(ComplexF32, (11, 12, 13)), use_cuda); @test FourierTools.shift(x, []) == x - x = randn(Float32, (11, 12, 13)) + x = opt_cu(randn(Float32, (11, 12, 13)), use_cuda); @test FourierTools.shift(x, []) == x end @testset "Integer shifts for complex and real arrays" begin - x = randn(ComplexF32, (11, 12, 13)) + x =opt_cu(randn(ComplexF32, (11, 12, 13)), use_cuda); s = (2,2,2) @test FourierTools.shift(x, s) ≈ circshift(x, s) @@ -22,7 +20,7 @@ Random.seed!(42) @test FourierTools.shift(x, s) ≈ circshift(x, s) @test FourierTools.shift(x, (0,0,0)) == x - x = randn(Float32, (11, 12, 13)) + x = opt_cu(randn(Float32, (11, 12, 13)), use_cuda); s = (2,2,2) @test FourierTools.shift!(copy(x), s) ≈ circshift(x, s) @@ -35,7 +33,7 @@ Random.seed!(42) @testset "Half integer shifts" begin - x = [0.0, 1.0, 0.0, 1.0] + x = opt_cu([0.0, 1.0, 0.0, 1.0], use_cuda) xc = ComplexF32.(x) s = [0.5] @@ -47,18 +45,19 @@ Random.seed!(42) end @testset "Check shifts with soft_fraction" begin - a = shift(delta((255,255)), (1.5,1.25),soft_fraction=0.1); + del = opt_cu(delta((255,255)), use_cuda) + a = shift(del, (1.5,1.25), soft_fraction=0.1); @test abs(sum(a[real(a).<0])) < 3.0 - a = shift(delta((255,255)), (1.5,1.25),soft_fraction=0.0); + a = shift(del, (1.5,1.25), soft_fraction=0.0); @test abs(sum(a[real(a).<0])) > 5.0 end @testset "Random shifts consistency between both methods" begin - x = randn((11, 12, 13)) + x = opt_cu(randn((11, 12, 13)), use_cuda) s = randn((3,)) .* 10 @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s)) @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(x) .+ 0im, s)) - x = randn((11, 12, 13)) + x = opt_cu(randn((11, 12, 13)), use_cuda) s = randn((3,)) .* 10 @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(x) .+ 0im, s)) @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s)) @@ -67,12 +66,12 @@ Random.seed!(42) @testset "Check revertibility for complex and real data" begin @testset "Complex data" begin - x = randn(ComplexF32, (11, 12, 13)) + x = opt_cu(randn(ComplexF32, (11, 12, 13)), use_cuda) s = (-1.1, 12.123, 0.21) @test x ≈ shift(shift(x, s), .- s, fix_nyquist_frequency=true) end @testset "Real data" begin - x = randn(Float32, (11, 12, 13)) + x = opt_cu(randn(Float32, (11, 12, 13)), use_cuda) s = (-1.1, 12.123, 0.21) @test x ≈ shift(shift(x, s), .- s, fix_nyquist_frequency=true) end diff --git a/test/performance_tests.jl b/test/performance_tests.jl new file mode 100644 index 0000000..5222bc0 --- /dev/null +++ b/test/performance_tests.jl @@ -0,0 +1,43 @@ +using BenchmarkTools +using CUDA +using Test + +function test_fft(img) + img = fft(img) + img = ifft(img) + return img +end + +function test_ft(img) + img = ft(img) + img = ift(img) + return img +end + +use_cuda = false +sz = (1024, 1024) +dat = rand(ComplexF32, sz...) +img = opt_cu(dat, use_cuda) +display(@benchmark test_fft($img)) # 33 ms +display(@benchmark test_ft($img)) # 38 ms + +use_cuda = true +img = opt_cu(dat, use_cuda) +display(@benchmark CUDA.@sync test_fft($img)) # 834 µs +display(@benchmark CUDA.@sync test_ft($img)) # 1086 ms + +function test_nfft() + J, N = 8, 16 + k = range(-0.4, stop=0.4, length=J) # nodes at which the NFFT is evaluated + f = cu(randn(ComplexF64, J)) # data to be transformed + p = plan_nfft(k, N, reltol=1e-9) # create plan + fHat = adjoint(p) * f # calculate adjoint NFFT + y = p * fHat # calculate forward NFFT +end + +# using CUDA, NFFT, CuNFFT +# Ny, Nx = 1024, 2048 +# x = CUDA.randn(Ny, Nx); +# knots = CUDA.rand(2, Ny*Nx) .- 0.5f0; +# plan = NFFT.plan_nfft(CuArray{Float32}, knots, size(x)); +# CUDA.@allowscalar [(adjoint(plan) * complex(x[:]))[1] for i=1:10] diff --git a/test/resampling_tests.jl b/test/resampling_tests.jl index 929a85b..df4e80f 100644 --- a/test/resampling_tests.jl +++ b/test/resampling_tests.jl @@ -4,15 +4,14 @@ for _ in 1:5 s_small = ntuple(_ -> rand(1:13), dim) s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim) - - - x = randn(Float32, (s_small)) + + x = opt_cu(randn(Float32, (s_small)), use_cuda) @test x == resample(x, s_small) @test Float32.(x) ≈ Float32.(resample(resample(x, s_large), s_small)) @test x ≈ resample_by_FFT(resample_by_FFT(x, s_large), s_small) @test Float32.(x) ≈ Float32.(resample_by_RFFT(resample_by_RFFT(x, s_large), s_small)) @test x ≈ FourierTools.resample_by_1D(FourierTools.resample_by_1D(x, s_large), s_small) - x = randn(ComplexF32, (s_small)) + x = opt_cu(randn(ComplexF32, (s_small)), use_cuda) @test x ≈ resample(resample(x, s_large), s_small) @test x ≈ resample_by_FFT(resample_by_FFT(x, s_large), s_small) @test x ≈ resample_by_FFT(resample_by_FFT(real(x), s_large), s_small) + 1im .* resample_by_FFT(resample_by_FFT(imag(x), s_large), s_small) @@ -27,7 +26,7 @@ s_small = ntuple(_ -> rand(1:13), dim) s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim) - x = randn(Float32, (s_small)) + x = opt_cu(randn(Float32, (s_small)), use_cuda) @test ≈(FourierTools.resample(x, s_large), FourierTools.resample_by_1D(x, s_large)) end end @@ -39,17 +38,16 @@ s_small = ntuple(_ -> rand(1:13), dim) s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim) - x = randn(Float32, (s_small)) + x = opt_cu(randn(Float32, (s_small)), use_cuda) @test Float32.(resample(x, s_large)) ≈ Float32.(real(resample(ComplexF32.(x), s_large))) @test FourierTools.resample_by_1D(x, s_large) ≈ real(FourierTools.resample_by_1D(ComplexF32.(x), s_large)) end end end - @testset "Tests that resample_by_FFT is purely real" begin function test_real(s_1, s_2) - x = randn(Float32, (s_1)) + x = opt_cu(randn(Float32, (s_1)), use_cuda) y = resample_by_FFT(x, s_2) @test all(( imag.(y) .+ 1 .≈ 1)) y = FourierTools.resample_by_1D(x, s_2) @@ -85,8 +83,8 @@ x_min = 0.0 x_max = 16π - xs_low = range(x_min, x_max, length=N_low+1)[1:N_low] - xs_high = range(x_min, x_max, length=N)[1:end-1] + xs_low = opt_cu(range(x_min, x_max, length=N_low+1)[1:N_low], use_cuda) + xs_high = opt_cu(range(x_min, x_max, length=N)[1:end-1], use_cuda) f(x) = sin(0.5*x) + cos(x) + cos(2 * x) + sin(0.25*x) arr_low = f.(xs_low) arr_high = f.(xs_high) @@ -108,10 +106,10 @@ @testset "Upsample2 compared to resample" begin for sz in ((10,10),(5,8,9),(20,5,4)) - a = rand(sz...) - @test ≈(upsample2(a),resample(a,sz.*2)) + a = opt_cu(rand(sz...), use_cuda) + @test ≈(upsample2(a), resample(a,sz.*2)) @test ≈(upsample2_abs2(a),abs2.(resample(a,sz.*2))) - a = rand(ComplexF32, sz...) + a = opt_cu(rand(ComplexF32, sz...), use_cuda) @test ≈(upsample2(a),resample(a,sz.*2)) @test ≈(upsample2_abs2(a),abs2.(resample(a,sz.*2))) s2 = (d == 2 ? sz[d]*2 : sz[d] for d in 1:length(sz)) @@ -127,7 +125,7 @@ x_min = 0.0 x_max = 16π - xs_low = range(x_min, x_max, length=N_low+1)[1:N_low] + xs_low = opt_cu(range(x_min, x_max, length=N_low+1)[1:N_low], use_cuda) f(x) = sin(0.5*x) + cos(x) + cos(2 * x) + sin(0.25*x) arr_low = f.(xs_low) @@ -148,15 +146,12 @@ test_resample(253, 254) test_resample(253, 1001) test_resample(99, 100101) - end - @testset "FFT resample in 2D" begin - - + @testset "FFT resample in 2D" begin function test_2D(in_s, out_s) - x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1] - y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]' + x = opt_cu(range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1], use_cuda) + y = opt_cu(range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]', use_cuda) arr = abs.(x) .+ abs.(y) .+ sinc.(sqrt.(x .^2 .+ y .^2)) arr_interp = resample(arr[1:end, 1:end], out_s); arr_ds = resample(arr_interp, in_s) @@ -174,9 +169,9 @@ test_2D((129, 128), (129, 153)) - x = range(-10.0, 10.0, length=129)[1:end-1] - x2 = range(-10.0, 10.0, length=130)[1:end-1] - x_exact = range(-10.0, 10.0, length=2049)[1:end-1] + x = opt_cu(range(-10.0, 10.0, length=129)[1:end-1], use_cuda) + x2 = opt_cu(range(-10.0, 10.0, length=130)[1:end-1], use_cuda) + x_exact = opt_cu(range(-10.0, 10.0, length=2049)[1:end-1], use_cuda) y = x' y2 = x2' y_exact = x_exact' @@ -202,8 +197,8 @@ @testset "FFT resample 2D for a complex signal" begin function test_2D(in_s, out_s) - x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1] - y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]' + x = opt_cu(range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1], use_cuda) + y = opt_cu(range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]', use_cuda) f(x, y) = 1im * (abs(x) + abs(y) + sinc(sqrt(x ^2 + y ^2))) f2(x, y) = abs(x) + abs(y) + sinc(sqrt((x - 5) ^2 + (y - 5)^2)) @@ -231,8 +226,8 @@ @testset "FFT resample in 2D for a purely imaginary signal" begin function test_2D(in_s, out_s) - x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1] - y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]' + x = opt_cu(range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1], use_cuda) + y = opt_cu(range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]', use_cuda) f(x, y) = 1im * (abs(x) + abs(y) + sinc(sqrt(x ^2 + y ^2))) arr = f.(x, y) @@ -256,17 +251,19 @@ end @testset "test select_region_ft" begin - x = [1,2,3,4] - @test select_region_ft(ffts(x), (5,)) == ComplexF64[-1.0 + 0.0im, -2.0 - 2.0im, 10.0 + 0.0im, -2.0 + 2.0im, -1.0 + 0.0im] - x = [3.1495759241275225 0.24720770605505335 -1.311507800204285 -0.3387627167144301; -0.7214121984874265 -0.02566249380406308 0.687066447881175 -0.09536748694092163; -0.577092696986848 -0.6320809680268722 -0.09460071173365793 0.7689715736798227; 0.4593837753047561 -1.0204193548690512 -0.28474772376166907 1.442443602597533] - @test select_region_ft(ffts(x), (7, 7)) == ComplexF64[0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 + 0.16703621316206385im 2.4110077589815555 - 0.16558718095884828im 2.2813159163314163 - 0.7520360306228049im 7.47614366018844 - 4.139633109911205im 1.3691035744780353 + 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.4801675770812479 + 0.0im 3.3142445917764407 - 3.2082400832669373im 1.6529948781166373 + 0.0im 3.3142445917764407 + 3.2082400832669373im 0.4801675770812479 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 - 0.16703621316206385im 7.47614366018844 + 4.139633109911205im 2.2813159163314163 + 0.7520360306228049im 2.4110077589815555 + 0.16558718095884828im 1.3691035744780353 - 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im] + x = opt_cu([1,2,3,4], use_cuda) + res = select_region_ft(ffts(x), (5,)) + @test res == opt_cu(ComplexF64[-1.0 + 0.0im, -2.0 - 2.0im, 10.0 + 0.0im, -2.0 + 2.0im, -1.0 + 0.0im], use_cuda) + x = opt_cu([3.1495759241275225 0.24720770605505335 -1.311507800204285 -0.3387627167144301; -0.7214121984874265 -0.02566249380406308 0.687066447881175 -0.09536748694092163; -0.577092696986848 -0.6320809680268722 -0.09460071173365793 0.7689715736798227; 0.4593837753047561 -1.0204193548690512 -0.28474772376166907 1.442443602597533], use_cuda) + res = select_region_ft(ffts(x), (7, 7)) + @test collect(res) ≈ opt_cu(ComplexF64[0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 + 0.16703621316206385im 2.4110077589815555 - 0.16558718095884828im 2.2813159163314163 - 0.7520360306228049im 7.47614366018844 - 4.139633109911205im 1.3691035744780353 + 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.4801675770812479 + 0.0im 3.3142445917764407 - 3.2082400832669373im 1.6529948781166373 + 0.0im 3.3142445917764407 + 3.2082400832669373im 0.4801675770812479 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 - 0.16703621316206385im 7.47614366018844 + 4.139633109911205im 2.2813159163314163 + 0.7520360306228049im 2.4110077589815555 + 0.16558718095884828im 1.3691035744780353 - 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im], use_cuda) end @testset "test resample_czt" begin dim =2 s_small = (12,16) # ntuple(_ -> rand(1:13), dim) s_large = (20,18) # ntuple(i -> max.(s_small[i], rand(10:16)), dim) - dat = select_region(randn(Float32, (5,6)), new_size= s_small) + dat = select_region(opt_cu(randn(Float32, (5,6)), use_cuda), new_size= s_small) rs1 = FourierTools.resample(dat, s_large) rs1b = select_region(rs1, new_size=size(dat)) rs2 = FourierTools.resample_czt(dat, s_large./s_small, do_damp=false) @@ -282,11 +279,12 @@ @test rs2b ≈ rs2 end - @testset "test resample_nfft" begin + if (! use_cuda) + @testset "test resample_nfft" begin dim =2 s_small = (12,16) # ntuple(_ -> rand(1:13), dim) s_large = (20,18) # ntuple(i -> max.(s_small[i], rand(10:16)), dim) - dat = select_region(randn(Float32, (5,6)), new_size= s_small) + dat = select_region(opt_cu(randn(Float32, (5,6)), use_cuda), new_size= s_small) rs1 = FourierTools.resample(dat, s_large) rs1b = select_region(rs1, new_size=size(dat)) mymap = (t) -> t .* s_small ./ s_large @@ -320,6 +318,9 @@ rs6 = FourierTools.resample_nfft(1im .* dat , t->t .* 2.0, s_small.÷2, is_src_coords=false, is_in_pixels=false, pad_value=0.0) rs7 = FourierTools.resample_nfft(1im .* dat, t->t .* 0.5, s_small.÷2, is_src_coords=true, is_in_pixels=false, pad_value=0.0) @test rs6.*4 ≈ rs7 + end + else + @warn "Skipping test for CuArray, since nfft does not support CuArray" end end diff --git a/test/runtests.jl b/test/runtests.jl index 0c8ec32..00520a3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,3 @@ -using Random, Test, FFTW using FourierTools using ImageTransformations using IndexFunArrays @@ -7,24 +6,52 @@ using NDTools using LinearAlgebra # for the assigned nfft function LinearAlgebra.mul! using FractionalTransforms using TestImages +using Random, Test, FFTW +using CUDA Random.seed!(42) -include("fft_helpers.jl") -include("fftshift_alternatives.jl") -include("utils.jl") -include("fourier_shifting.jl") -include("fourier_shear.jl") -include("fourier_rotate.jl") -include("resampling_tests.jl") -include("convolutions.jl") -include("correlations.jl") -include("custom_fourier_types.jl") -include("damping.jl") -include("czt.jl") -include("nfft_tests.jl") -include("fractional_fourier_transform.jl") -include("fourier_filtering.jl") -include("sdft.jl") +use_cuda = false +function opt_cu(img, use_cuda=false) + if (use_cuda) + CuArray(img) + else + img + end +end + +function run_all_tests() + include("fft_helpers.jl"); + include("fftshift_alternatives.jl"); + include("utils.jl"); + include("fourier_shifting.jl"); + include("fourier_shear.jl"); + include("fourier_rotate.jl"); + include("resampling_tests.jl"); ### nfft does not work with CUDA + include("convolutions.jl"); # spurious buffer problem in conv_p4 in CUDA? + include("correlations.jl"); + include("custom_fourier_types.jl"); + include("damping.jl"); + include("czt.jl"); + include("nfft_tests.jl"); + include("fractional_fourier_transform.jl"); + include("fourier_filtering.jl"); + include("sdft.jl"); +end + +use_cuda=false +run_all_tests() + +if CUDA.functional() + @testset "all in CUDA" begin + CUDA.allowscalar(false); + use_cuda=true + run_all_tests() + end +else + @testset "no CUDA available!" begin + @test true == true + end +end return diff --git a/test/utils.jl b/test/utils.jl index 5fdf23a..7daa22a 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -23,21 +23,24 @@ @testset "Test rfft_size" begin s = (11, 20, 10) - @test FourierTools.rfft_size(s, 2) == size(rfft(randn(s),2)) - @test FourierTools.rft_size(randn(s), 2) == size(rfft(randn(s),2)) - - s = (11, 21, 10) - @test FourierTools.rfft_size(s, 2) == size(rfft(randn(s),2)) + dat = opt_cu(randn(s), use_cuda); + if !use_cuda + @test FourierTools.rfft_size(s, 2) == size(rfft(dat,2)) + @test FourierTools.rft_size(randn(s), 2) == size(rfft(dat,2)) + s = (11, 21, 10) + @test FourierTools.rfft_size(s, 2) == size(rfft(dat,2)) + end s = (11, 21, 10) - @test FourierTools.rfft_size(s, 1) == size(rfft(randn(s),(1,2,3))) + dat = opt_cu(randn(s), use_cuda); + @test FourierTools.rfft_size(s, 1) == size(rfft(dat,(1,2,3))) end function center_test(x1, x2, x3, y1, y2, y3) - arr1 = randn((x1, x2, x3)) - arr2 = zeros((y1, y2, y3)) + arr1 = opt_cu(randn((x1, x2, x3)), use_cuda); + arr2 = opt_cu(zeros((y1, y2, y3)), use_cuda); FourierTools.center_set!(arr2, arr1) arr3 = FourierTools.center_extract(arr2, (x1, x2, x3)) @@ -107,7 +110,6 @@ @test all(fourierspace_pixelsize(1, (512,256)) .≈ 1 ./ (512, 256)) @test realspace_pixelsize(1, 512) ≈ 1 / 512 @test all(realspace_pixelsize(1, (512,256)) .≈ 1 ./ (512, 256)) - end @@ -117,25 +119,34 @@ end @testset "odd_view, fourier_reverse!" begin - a = [1 2 3;4 5 6;7 8 9;10 11 12] - @test FourierTools.odd_view(a) == [4 5 6;7 8 9; 10 11 12] + a = opt_cu([1 2 3;4 5 6;7 8 9;10 11 12], use_cuda) + @test FourierTools.odd_view(a) == opt_cu([4 5 6;7 8 9; 10 11 12], use_cuda) fourier_reverse!(a) - @test a == [3 2 1;12 11 10;9 8 7;6 5 4] - a = [1 2 3;4 5 6;7 8 9;10 11 12] + @test a == opt_cu([3 2 1;12 11 10;9 8 7;6 5 4], use_cuda) + a = opt_cu([1 2 3;4 5 6;7 8 9;10 11 12], use_cuda) b = copy(a); fourier_reverse!(a,dims=1); @test a[2:end,:] == b[end:-1:2,:] - a = [1 2 3 4;5 6 7 8;9 10 11 12 ;13 14 15 16] + a = opt_cu([1 2 3 4;5 6 7 8;9 10 11 12 ;13 14 15 16], use_cuda) b = copy(a); fourier_reverse!(a); - @test a[2,2] == b[4,4] - @test a[2,3] == b[4,3] + # the ranges are used to avoid error in single element acces with CuArray + @test a[2:2,2:2] == b[4:4,4:4] + @test a[2:2,3:3] == b[4:4,3:3] fourier_reverse!(a); @test a == b fourier_reverse!(a;dims=1); @test a[2:end,:] == b[end:-1:2,:] - @test sum(abs.(imag.(ift(fourier_reverse!(ft(rand(5,6,7))))))) < 1e-10 + rd = opt_cu(rand(5,6,7), use_cuda) + if !use_cuda + @test sum(abs.(imag.(ift(fourier_reverse!(ft(rd)))))) < 1e-10 + end + # @test sum(abs.(imag.(ift(fourier_reverse(ft(rd)))))) < 1e-10 sz = (10,9,6) - @test sum(abs.(real.(ift(fourier_reverse!(ft(box((sz)))))) .- box(sz))) < 1e-10 + bb = opt_cu(box((sz)), use_cuda) + if !use_cuda + @test sum(abs.(real.(ift(fourier_reverse!(ft(bb)))) .- bb)) < 1e-10 + end + # @test sum(abs.(real.(ift(fourier_reverse(ft(bb)))) .- bb)) < 1e-10 end end