diff --git a/Project.toml b/Project.toml index e9239c2..c38978b 100644 --- a/Project.toml +++ b/Project.toml @@ -5,7 +5,7 @@ version = "0.3.4" [deps] ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197" -GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" @@ -21,7 +21,7 @@ AcceleratedKernelsoneAPIExt = "oneAPI" [compat] ArgCheck = "2" -GPUArrays = "10, 11" +GPUArraysCore = "0.2.0" KernelAbstractions = "0.9.34" Markdown = "1" Metal = "1" diff --git a/src/AcceleratedKernels.jl b/src/AcceleratedKernels.jl index babef42..69928de 100644 --- a/src/AcceleratedKernels.jl +++ b/src/AcceleratedKernels.jl @@ -12,7 +12,7 @@ module AcceleratedKernels # Internal dependencies using ArgCheck: @argcheck -using GPUArrays: GPUArrays, AbstractGPUVector, AbstractGPUArray, @allowscalar +using GPUArraysCore: AbstractGPUVector, AbstractGPUArray, @allowscalar using KernelAbstractions using Polyester: @batch import OhMyThreads as OMT @@ -21,7 +21,6 @@ import OhMyThreads as OMT # Exposed functions from upstream packages const synchronize = KernelAbstractions.synchronize const get_backend = KernelAbstractions.get_backend -const neutral_element = GPUArrays.neutral_element # Include code from other files diff --git a/src/accumulate/accumulate.jl b/src/accumulate/accumulate.jl index e88fd18..1e532b9 100644 --- a/src/accumulate/accumulate.jl +++ b/src/accumulate/accumulate.jl @@ -31,7 +31,7 @@ include("accumulate_cpu.jl") accumulate!( op, v::AbstractArray, backend::Backend=get_backend(v); init, - neutral=GPUArrays.neutral_element(op, eltype(v)), + neutral=neutral_element(op, eltype(v)), dims::Union{Nothing, Int}=nothing, inclusive::Bool=true, @@ -47,7 +47,7 @@ include("accumulate_cpu.jl") accumulate!( op, dst::AbstractArray, src::AbstractArray, backend::Backend=get_backend(v); init, - neutral=GPUArrays.neutral_element(op, eltype(dst)), + neutral=neutral_element(op, eltype(dst)), dims::Union{Nothing, Int}=nothing, inclusive::Bool=true, @@ -117,7 +117,7 @@ AK.accumulate!(+, v, alg=AK.ScanPrefixes()) function accumulate!( op, v::AbstractArray, backend::Backend=get_backend(v); init, - neutral=GPUArrays.neutral_element(op, eltype(v)), + neutral=neutral_element(op, eltype(v)), dims::Union{Nothing, Int}=nothing, inclusive::Bool=true, @@ -141,7 +141,7 @@ end function accumulate!( op, dst::AbstractArray, src::AbstractArray, backend::Backend=get_backend(v); init, - neutral=GPUArrays.neutral_element(op, eltype(dst)), + neutral=neutral_element(op, eltype(dst)), dims::Union{Nothing, Int}=nothing, inclusive::Bool=true, @@ -166,7 +166,7 @@ end function _accumulate_impl!( op, v::AbstractArray, backend::Backend; init, - neutral=GPUArrays.neutral_element(op, eltype(v)), + neutral=neutral_element(op, eltype(v)), dims::Union{Nothing, Int}=nothing, inclusive::Bool=true, @@ -211,7 +211,7 @@ end accumulate( op, v::AbstractArray, backend::Backend=get_backend(v); init, - neutral=GPUArrays.neutral_element(op, eltype(v)), + neutral=neutral_element(op, eltype(v)), dims::Union{Nothing, Int}=nothing, inclusive::Bool=true, @@ -229,7 +229,7 @@ Out-of-place version of [`accumulate!`](@ref). function accumulate( op, v::AbstractArray, backend::Backend=get_backend(v); init, - neutral=GPUArrays.neutral_element(op, eltype(v)), + neutral=neutral_element(op, eltype(v)), dims::Union{Nothing, Int}=nothing, inclusive::Bool=true, diff --git a/src/accumulate/accumulate_nd.jl b/src/accumulate/accumulate_nd.jl index 52d0fa8..605e043 100644 --- a/src/accumulate/accumulate_nd.jl +++ b/src/accumulate/accumulate_nd.jl @@ -254,7 +254,7 @@ end function accumulate_nd!( op, v::AbstractArray, backend::GPU; init, - neutral=GPUArrays.neutral_element(op, eltype(v)), + neutral=neutral_element(op, eltype(v)), dims::Int, inclusive::Bool=true, diff --git a/src/reduce/mapreduce_1d.jl b/src/reduce/mapreduce_1d.jl index 93323f3..564b06a 100644 --- a/src/reduce/mapreduce_1d.jl +++ b/src/reduce/mapreduce_1d.jl @@ -102,7 +102,7 @@ end function mapreduce_1d( f, op, src::AbstractArray, backend::GPU; init, - neutral=GPUArrays.neutral_element(op, eltype(src)), + neutral=neutral_element(op, eltype(src)), block_size::Int=256, temp::Union{Nothing, AbstractArray}=nothing, diff --git a/src/reduce/mapreduce_nd.jl b/src/reduce/mapreduce_nd.jl index b240379..9c828d1 100644 --- a/src/reduce/mapreduce_nd.jl +++ b/src/reduce/mapreduce_nd.jl @@ -188,7 +188,7 @@ end function mapreduce_nd( f, op, src::AbstractArray, backend::GPU; init, - neutral=GPUArrays.neutral_element(op, eltype(src)), + neutral=neutral_element(op, eltype(src)), dims::Int, block_size::Int=256, temp::Union{Nothing, AbstractArray}=nothing, @@ -324,7 +324,7 @@ end function _mapreduce_nd_apply_init!(f, op, dst, src, backend, init, block_size) foreachindex( dst, backend, - block_size=block_size, + block_size=block_size, ) do i dst[i] = op(init, f(src[i])) end diff --git a/src/reduce/reduce.jl b/src/reduce/reduce.jl index 06d775b..b9af75e 100644 --- a/src/reduce/reduce.jl +++ b/src/reduce/reduce.jl @@ -1,3 +1,18 @@ +# neutral_element moved over from GPUArrays.jl +neutral_element(op, T) = + error("""AcceleratedKernels.jl needs to know the neutral element for your operator `$op`. + Please pass it as an explicit keyword argument `neutral`.""") +neutral_element(::typeof(Base.:(|)), T) = zero(T) +neutral_element(::typeof(Base.:(+)), T) = zero(T) +neutral_element(::typeof(Base.add_sum), T) = zero(T) +neutral_element(::typeof(Base.:(&)), T) = one(T) +neutral_element(::typeof(Base.:(*)), T) = one(T) +neutral_element(::typeof(Base.mul_prod), T) = one(T) +neutral_element(::typeof(Base.min), T) = typemax(T) +neutral_element(::typeof(Base.max), T) = typemin(T) +neutral_element(::typeof(Base._extrema_rf), ::Type{<:NTuple{2,T}}) where {T} = typemax(T), typemin(T) + + include("mapreduce_1d.jl") include("mapreduce_nd.jl") @@ -6,7 +21,7 @@ include("mapreduce_nd.jl") reduce( op, src::AbstractArray, backend::Backend=get_backend(src); init, - neutral=GPUArrays.neutral_element(op, eltype(src)), + neutral=neutral_element(op, eltype(src)), dims::Union{Nothing, Int}=nothing, # CPU settings @@ -72,7 +87,7 @@ mcolsum = AK.reduce(+, m; init=zero(eltype(m)), dims=2) function reduce( op, src::AbstractArray, backend::Backend=get_backend(src); init, - neutral=GPUArrays.neutral_element(op, eltype(src)), + neutral=neutral_element(op, eltype(src)), dims::Union{Nothing, Int}=nothing, # CPU settings @@ -103,7 +118,7 @@ end function _reduce_impl( op, src::AbstractArray, backend; init, - neutral=GPUArrays.neutral_element(op, eltype(src)), + neutral=neutral_element(op, eltype(src)), dims::Union{Nothing, Int}=nothing, # CPU settings @@ -137,7 +152,7 @@ end mapreduce( f, op, src::AbstractArray, backend::Backend=get_backend(src); init, - neutral=GPUArrays.neutral_element(op, eltype(src)), + neutral=neutral_element(op, eltype(src)), dims::Union{Nothing, Int}=nothing, # CPU settings @@ -203,7 +218,7 @@ mcolsumsq = AK.mapreduce(f, +, m; init=zero(eltype(m)), dims=2) function mapreduce( f, op, src::AbstractArray, backend::Backend=get_backend(src); init, - neutral=GPUArrays.neutral_element(op, eltype(src)), + neutral=neutral_element(op, eltype(src)), dims::Union{Nothing, Int}=nothing, # CPU settings @@ -234,7 +249,7 @@ end function _mapreduce_impl( f, op, src::AbstractArray, backend::Backend; init, - neutral=GPUArrays.neutral_element(op, eltype(src)), + neutral=neutral_element(op, eltype(src)), dims::Union{Nothing, Int}=nothing, # CPU settings