Skip to content

[0.9] Unified memory allocations #632

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jul 22, 2025
1 change: 1 addition & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ allocate

```@docs
KernelAbstractions.zeros
KernelAbstractions.supports_unified
```

## Internal
Expand Down
53 changes: 40 additions & 13 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -532,40 +532,67 @@ get_backend(::Array) = CPU()
Adapt.adapt_storage(::CPU, a::Array) = a

"""
allocate(::Backend, Type, dims...)::AbstractArray
allocate(::Backend, Type, dims...; unified=false)::AbstractArray

Allocate a storage array appropriate for the computational backend.
Allocate a storage array appropriate for the computational backend. `unified=true`
allocates an array using unified memory if the backend supports it and throws otherwise.
Use [`supports_unified`](@ref) to determine whether it is supported by a backend.

!!! note
Backend implementations **must** implement `allocate(::NewBackend, T, dims::Tuple)`
"""
allocate(backend::Backend, T::Type, dims...) = allocate(backend, T, dims)
allocate(backend::Backend, T::Type, dims::Tuple) = throw(MethodError(allocate, (backend, T, dims)))
Backend implementations **should** implement `allocate(::NewBackend, T, dims::Tuple; unified::Bool=false)`
"""
allocate(backend::Backend, T::Type, dims...; kwargs...) = allocate(backend, T, dims; kwargs...)
function allocate(backend::Backend, T::Type, dims::Tuple; unified::Union{Nothing, Bool} = nothing)
if isnothing(unified)
throw(MethodError(allocate, (backend, T, dims)))
elseif unified
throw(ArgumentError("`$(typeof(backend))` does not support unified memory. If you believe it does, please open a github issue."))
else
return allocate(backend, T, dims)
end
end


"""
zeros(::Backend, Type, dims...)::AbstractArray
zeros(::Backend, Type, dims...; unified=false)::AbstractArray

Allocate a storage array appropriate for the computational backend filled with zeros.
`unified=true` allocates an array using unified memory if the backend supports it and
throws otherwise.
"""
zeros(backend::Backend, T::Type, dims...) = zeros(backend, T, dims)
function zeros(backend::Backend, ::Type{T}, dims::Tuple) where {T}
data = allocate(backend, T, dims...)
zeros(backend::Backend, T::Type, dims...; kwargs...) = zeros(backend, T, dims; kwargs...)
function zeros(backend::Backend, ::Type{T}, dims::Tuple; kwargs...) where {T}
data = allocate(backend, T, dims...; kwargs...)
fill!(data, zero(T))
return data
end

"""
ones(::Backend, Type, dims...)::AbstractArray
ones(::Backend, Type, dims...; unified=false)::AbstractArray

Allocate a storage array appropriate for the computational backend filled with ones.
`unified=true` allocates an array using unified memory if the backend supports it and
throws otherwise.
"""
ones(backend::Backend, T::Type, dims...) = ones(backend, T, dims)
function ones(backend::Backend, ::Type{T}, dims::Tuple) where {T}
data = allocate(backend, T, dims)
ones(backend::Backend, T::Type, dims...; kwargs...) = ones(backend, T, dims; kwargs...)
function ones(backend::Backend, ::Type{T}, dims::Tuple; kwargs...) where {T}
data = allocate(backend, T, dims; kwargs...)
fill!(data, one(T))
return data
end

"""
supports_unified(::Backend)::Bool

Returns whether unified memory arrays are supported by the backend.

!!! note
Backend implementations **should** implement this function
only if they **do** support unified memory.
"""
supports_unified(::Backend) = false

"""
supports_atomics(::Backend)::Bool

Expand Down
11 changes: 6 additions & 5 deletions src/cpu.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
unsafe_free!(::AbstractArray) = return
synchronize(::CPU) = nothing

allocate(::CPU, ::Type{T}, dims::Tuple) where {T} = Array{T}(undef, dims)
allocate(::CPU, ::Type{T}, dims::Tuple; unified::Bool = false) where {T} = Array{T}(undef, dims)

function zeros(backend::CPU, ::Type{T}, dims::Tuple) where {T}
arr = allocate(backend, T, dims)
function zeros(backend::CPU, ::Type{T}, dims::Tuple; kwargs...) where {T}
arr = allocate(backend, T, dims; kwargs...)
kernel = init_kernel(backend)
kernel(arr, zero, T, ndrange = length(arr))
return arr
end
function ones(backend::CPU, ::Type{T}, dims::Tuple) where {T}
arr = allocate(backend, T, dims)
function ones(backend::CPU, ::Type{T}, dims::Tuple; kwargs...) where {T}
arr = allocate(backend, T, dims; kwargs...)
kernel = init_kernel(backend)
kernel(arr, one, T; ndrange = length(arr))
return arr
Expand All @@ -34,6 +34,7 @@ end

functional(::CPU) = true
pagelock!(::CPU, x) = nothing
supports_unified(::CPU) = true

function (obj::Kernel{CPU})(args...; ndrange = nothing, workgroupsize = nothing)
ndrange, workgroupsize, iterspace, dynamic = launch_config(obj, ndrange, workgroupsize)
Expand Down
7 changes: 7 additions & 0 deletions test/test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk
backendT = typeof(backend).name.wrapper # To look through CUDABackend{true, false}
@test backend isa backendT

unified = KernelAbstractions.supports_unified(backend)
@test unified isa Bool
U = allocate(backend, Float32, 5; unified)
if unified
@test U[3] isa Float32
end

x = allocate(backend, Float32, 5)
A = allocate(backend, Float32, 5, 5)
@test @inferred(KernelAbstractions.get_backend(A)) isa backendT
Expand Down
Loading