From a73cf9705d54f9867f769d46977e09ecaf58b3ff Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sun, 20 Jul 2025 17:51:29 -0300 Subject: [PATCH 01/18] Typos --- src/KernelAbstractions.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 15757e3a..7a050932 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -564,7 +564,7 @@ end Returns whether `@atomic` operations are supported by the backend. !!! note - Backend implementations **must** implement this function, + Backend implementations **must** implement this function only if they **do not** support atomic operations with Atomix. """ supports_atomics(::Backend) = true @@ -575,7 +575,7 @@ supports_atomics(::Backend) = true Returns whether `Float64` values are supported by the backend. !!! note - Backend implementations **must** implement this function, + Backend implementations **must** implement this function only if they **do not** support `Float64`. """ supports_float64(::Backend) = true From 002de2d7934d925a5a9856c66a5897824423ac29 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:09:30 -0300 Subject: [PATCH 02/18] POC `supports_unified` --- src/KernelAbstractions.jl | 39 +++++++++++++++++++++++++++------------ test/test.jl | 9 +++++++++ 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 7a050932..70cf4aa5 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -524,40 +524,55 @@ end # adapt_storage(::Backend, a::BackendArray) = a """ - allocate(::Backend, Type, dims...)::AbstractArray + allocate(::Backend, Type, dims...; unified=false)::AbstractArray -Allocate a storage array appropriate for the computational backend. +Allocate a storage array appropriate for the computational backend. `unified` +allocates an array using unified memory if the backend supports it. Use +[`supports_unified`](@ref) to determine whether it is supported by a backend. !!! note Backend implementations **must** implement `allocate(::NewBackend, T, dims::Tuple)` """ -allocate(backend::Backend, T::Type, dims...) = allocate(backend, T, dims) -allocate(backend::Backend, T::Type, dims::Tuple) = throw(MethodError(allocate, (backend, T, dims))) +allocate(backend::Backend, T::Type, dims...; unified=false) = allocate(backend, T, dims; unified) +allocate(backend::Backend, T::Type, dims::Tuple; unified=false) = throw(MethodError(allocate, (backend, T, dims))) """ - zeros(::Backend, Type, dims...)::AbstractArray + zeros(::Backend, Type, dims...; unified=false)::AbstractArray Allocate a storage array appropriate for the computational backend filled with zeros. +`unified` allocates an array using unified memory if the backend supports it. """ -zeros(backend::Backend, T::Type, dims...) = zeros(backend, T, dims) -function zeros(backend::Backend, ::Type{T}, dims::Tuple) where {T} - data = allocate(backend, T, dims...) +zeros(backend::Backend, T::Type, dims...; kwargs...) = zeros(backend, T, dims; kwargs...) +function zeros(backend::Backend, ::Type{T}, dims::Tuple; unified=false) where {T} + data = allocate(backend, T, dims...; unified) fill!(data, zero(T)) return data end """ - ones(::Backend, Type, dims...)::AbstractArray + ones(::Backend, Type, dims...; unified=false)::AbstractArray Allocate a storage array appropriate for the computational backend filled with ones. +`unified` allocates an array using unified memory if the backend supports it. """ -ones(backend::Backend, T::Type, dims...) = ones(backend, T, dims) -function ones(backend::Backend, ::Type{T}, dims::Tuple) where {T} - data = allocate(backend, T, dims) +ones(backend::Backend, T::Type, dims...; kwargs...) = ones(backend, T, dims; kwargs...) +function ones(backend::Backend, ::Type{T}, dims::Tuple; unified=false) where {T} + data = allocate(backend, T, dims; unified) fill!(data, one(T)) return data end +""" + supports_unified(::Backend)::Bool + +Returns whether unified memory arrays are supported by the backend. + +!!! note + Backend implementations **must** implement this function + only if they **do not** support unified memory. +""" +supports_unified(::Backend) = true + """ supports_atomics(::Backend)::Bool diff --git a/test/test.jl b/test/test.jl index 53126e88..be2bb4b3 100644 --- a/test/test.jl +++ b/test/test.jl @@ -77,6 +77,15 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk backendT = typeof(backend).name.wrapper # To look through CUDABackend{true, false} @test backend isa backendT + unified = supports_unified(backend) + @test unified isa Bool + U = allocate(backend, Float32, 5; unified) + if unified + @test U[3] isa Float32 + else + @test_throws U[3] + end + x = allocate(backend, Float32, 5) A = allocate(backend, Float32, 5, 5) @test @inferred(KernelAbstractions.get_backend(A)) isa backendT From 6c03b08c9b91f659e669768a57cc272098acddca Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:17:31 -0300 Subject: [PATCH 03/18] Formatting --- src/KernelAbstractions.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 70cf4aa5..0ccd564a 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -533,8 +533,8 @@ allocates an array using unified memory if the backend supports it. Use !!! note Backend implementations **must** implement `allocate(::NewBackend, T, dims::Tuple)` """ -allocate(backend::Backend, T::Type, dims...; unified=false) = allocate(backend, T, dims; unified) -allocate(backend::Backend, T::Type, dims::Tuple; unified=false) = throw(MethodError(allocate, (backend, T, dims))) +allocate(backend::Backend, T::Type, dims...; unified = false) = allocate(backend, T, dims; unified) +allocate(backend::Backend, T::Type, dims::Tuple; unified = false) = throw(MethodError(allocate, (backend, T, dims))) """ zeros(::Backend, Type, dims...; unified=false)::AbstractArray @@ -543,7 +543,7 @@ Allocate a storage array appropriate for the computational backend filled with z `unified` allocates an array using unified memory if the backend supports it. """ zeros(backend::Backend, T::Type, dims...; kwargs...) = zeros(backend, T, dims; kwargs...) -function zeros(backend::Backend, ::Type{T}, dims::Tuple; unified=false) where {T} +function zeros(backend::Backend, ::Type{T}, dims::Tuple; unified = false) where {T} data = allocate(backend, T, dims...; unified) fill!(data, zero(T)) return data @@ -556,7 +556,7 @@ Allocate a storage array appropriate for the computational backend filled with o `unified` allocates an array using unified memory if the backend supports it. """ ones(backend::Backend, T::Type, dims...; kwargs...) = ones(backend, T, dims; kwargs...) -function ones(backend::Backend, ::Type{T}, dims::Tuple; unified=false) where {T} +function ones(backend::Backend, ::Type{T}, dims::Tuple; unified = false) where {T} data = allocate(backend, T, dims; unified) fill!(data, one(T)) return data From bdb7ad9ec0afc71e7c79533bc3ff24f34441bab0 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 21 Jul 2025 14:58:26 -0300 Subject: [PATCH 04/18] Address feedback --- src/KernelAbstractions.jl | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 0ccd564a..dd0918a3 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -533,8 +533,17 @@ allocates an array using unified memory if the backend supports it. Use !!! note Backend implementations **must** implement `allocate(::NewBackend, T, dims::Tuple)` """ -allocate(backend::Backend, T::Type, dims...; unified = false) = allocate(backend, T, dims; unified) -allocate(backend::Backend, T::Type, dims::Tuple; unified = false) = throw(MethodError(allocate, (backend, T, dims))) +allocate(backend::Backend, T::Type, dims...; kwargs...) = allocate(backend, T, dims; kwargs...) +function allocate(backend::Backend, T::Type, dims::Tuple; unified::Union{Nothing, Bool} = nothing) + if isnothing(unified) + throw(MethodError(allocate, (backend, T, dims))) + elseif unified + throw(ArgumentError("`$(typeof(backend))` either does not support unified memory or it has not yet defined `allocate(backend::$backend, T::Type, dims::Tuple; unified::Bool)`")) + else + allocate(backend, T, dims) + end +end + """ zeros(::Backend, Type, dims...; unified=false)::AbstractArray @@ -543,8 +552,8 @@ Allocate a storage array appropriate for the computational backend filled with z `unified` allocates an array using unified memory if the backend supports it. """ zeros(backend::Backend, T::Type, dims...; kwargs...) = zeros(backend, T, dims; kwargs...) -function zeros(backend::Backend, ::Type{T}, dims::Tuple; unified = false) where {T} - data = allocate(backend, T, dims...; unified) +function zeros(backend::Backend, ::Type{T}, dims::Tuple; kwargs...) where {T} + data = allocate(backend, T, dims...; kwargs...) fill!(data, zero(T)) return data end @@ -556,8 +565,8 @@ Allocate a storage array appropriate for the computational backend filled with o `unified` allocates an array using unified memory if the backend supports it. """ ones(backend::Backend, T::Type, dims...; kwargs...) = ones(backend, T, dims; kwargs...) -function ones(backend::Backend, ::Type{T}, dims::Tuple; unified = false) where {T} - data = allocate(backend, T, dims; unified) +function ones(backend::Backend, ::Type{T}, dims::Tuple; kwargs...) where {T} + data = allocate(backend, T, dims; kwargs...) fill!(data, one(T)) return data end @@ -569,9 +578,9 @@ Returns whether unified memory arrays are supported by the backend. !!! note Backend implementations **must** implement this function - only if they **do not** support unified memory. + only if they **do** support unified memory. """ -supports_unified(::Backend) = true +supports_unified(::Backend) = false """ supports_atomics(::Backend)::Bool From 4c0c6da5c59432601d57d461211c4d8a7b1579ce Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 22 Jul 2025 11:25:00 +0200 Subject: [PATCH 05/18] add CPU definition. --- src/cpu.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/cpu.jl b/src/cpu.jl index e383386f..dcfd4f9f 100644 --- a/src/cpu.jl +++ b/src/cpu.jl @@ -1,15 +1,15 @@ synchronize(::CPU) = nothing -allocate(::CPU, ::Type{T}, dims::Tuple) where {T} = Array{T}(undef, dims) +allocate(::CPU, ::Type{T}, dims::Tupl; unified::Bool=false) where {T} = Array{T}(undef, dims) -function zeros(backend::CPU, ::Type{T}, dims::Tuple) where {T} - arr = allocate(backend, T, dims) +function zeros(backend::CPU, ::Type{T}, dims::Tuple, kwargs...) where {T} + arr = allocate(backend, T, dims; kwargs...) kernel = init_kernel(backend) kernel(arr, zero, T, ndrange = length(arr)) return arr end -function ones(backend::CPU, ::Type{T}, dims::Tuple) where {T} - arr = allocate(backend, T, dims) +function ones(backend::CPU, ::Type{T}, dims::Tuple, kwargs...) where {T} + arr = allocate(backend, T, dims; kwargs...) kernel = init_kernel(backend) kernel(arr, one, T; ndrange = length(arr)) return arr @@ -33,6 +33,7 @@ end functional(::CPU) = true pagelock!(::CPU, x) = nothing +supports_unified(::CPU) = true function (obj::Kernel{CPU})(args...; ndrange = nothing, workgroupsize = nothing) ndrange, workgroupsize, iterspace, dynamic = launch_config(obj, ndrange, workgroupsize) From c4aeb1048c400fb28fa6ecc82c1f1cbecd9095fd Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 22 Jul 2025 10:16:07 -0300 Subject: [PATCH 06/18] Finish CPU backend --- src/cpu.jl | 4 ++-- src/pocl/backend.jl | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cpu.jl b/src/cpu.jl index dcfd4f9f..41185527 100644 --- a/src/cpu.jl +++ b/src/cpu.jl @@ -2,13 +2,13 @@ synchronize(::CPU) = nothing allocate(::CPU, ::Type{T}, dims::Tupl; unified::Bool=false) where {T} = Array{T}(undef, dims) -function zeros(backend::CPU, ::Type{T}, dims::Tuple, kwargs...) where {T} +function zeros(backend::CPU, ::Type{T}, dims::Tuple; kwargs...) where {T} arr = allocate(backend, T, dims; kwargs...) kernel = init_kernel(backend) kernel(arr, zero, T, ndrange = length(arr)) return arr end -function ones(backend::CPU, ::Type{T}, dims::Tuple, kwargs...) where {T} +function ones(backend::CPU, ::Type{T}, dims::Tuple; kwargs...) where {T} arr = allocate(backend, T, dims; kwargs...) kernel = init_kernel(backend) kernel(arr, one, T; ndrange = length(arr)) diff --git a/src/pocl/backend.jl b/src/pocl/backend.jl index 8e7fcc08..ffa24746 100644 --- a/src/pocl/backend.jl +++ b/src/pocl/backend.jl @@ -21,16 +21,16 @@ end ## Memory Operations -KA.allocate(::POCLBackend, ::Type{T}, dims::Tuple) where {T} = Array{T}(undef, dims) +KA.allocate(::POCLBackend, ::Type{T}, dims::Tuple; unified::Bool=false) where {T} = Array{T}(undef, dims) -function KA.zeros(backend::POCLBackend, ::Type{T}, dims::Tuple) where {T} - arr = KA.allocate(backend, T, dims) +function KA.zeros(backend::POCLBackend, ::Type{T}, dims::Tuple; kwargs...) where {T} + arr = KA.allocate(backend, T, dims; kwargs...) kernel = KA.init_kernel(backend) kernel(arr, zero, T, ndrange = length(arr)) return arr end -function KA.ones(backend::POCLBackend, ::Type{T}, dims::Tuple) where {T} - arr = KA.allocate(backend, T, dims) +function KA.ones(backend::POCLBackend, ::Type{T}, dims::Tuple; kwargs...) where {T} + arr = KA.allocate(backend, T, dims; kwargs...) kernel = KA.init_kernel(backend) kernel(arr, one, T; ndrange = length(arr)) return arr From 4eec3c6dc83fe086e55ac2d6614cba78311a28d1 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 21 Jul 2025 15:05:20 -0300 Subject: [PATCH 07/18] Format & fix --- src/KernelAbstractions.jl | 2 +- test/test.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index dd0918a3..620b31bc 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -535,7 +535,7 @@ allocates an array using unified memory if the backend supports it. Use """ allocate(backend::Backend, T::Type, dims...; kwargs...) = allocate(backend, T, dims; kwargs...) function allocate(backend::Backend, T::Type, dims::Tuple; unified::Union{Nothing, Bool} = nothing) - if isnothing(unified) + return if isnothing(unified) throw(MethodError(allocate, (backend, T, dims))) elseif unified throw(ArgumentError("`$(typeof(backend))` either does not support unified memory or it has not yet defined `allocate(backend::$backend, T::Type, dims::Tuple; unified::Bool)`")) diff --git a/test/test.jl b/test/test.jl index be2bb4b3..4784d97b 100644 --- a/test/test.jl +++ b/test/test.jl @@ -83,7 +83,7 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk if unified @test U[3] isa Float32 else - @test_throws U[3] + @test_throws ErrorException U[3] end x = allocate(backend, Float32, 5) From b892c4a5794dd10040ba5dee51755a2270d60562 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 21 Jul 2025 15:18:50 -0300 Subject: [PATCH 08/18] Fix test --- test/test.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.jl b/test/test.jl index 4784d97b..4f7f9c7a 100644 --- a/test/test.jl +++ b/test/test.jl @@ -77,7 +77,7 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk backendT = typeof(backend).name.wrapper # To look through CUDABackend{true, false} @test backend isa backendT - unified = supports_unified(backend) + unified = KernelAbstractions.supports_unified(backend) @test unified isa Bool U = allocate(backend, Float32, 5; unified) if unified From be66b7128d2b3ed6f81a5ef27ebf0d23911933ce Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:12:32 -0300 Subject: [PATCH 09/18] Format feedback --- src/KernelAbstractions.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 620b31bc..dc373f8c 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -535,12 +535,12 @@ allocates an array using unified memory if the backend supports it. Use """ allocate(backend::Backend, T::Type, dims...; kwargs...) = allocate(backend, T, dims; kwargs...) function allocate(backend::Backend, T::Type, dims::Tuple; unified::Union{Nothing, Bool} = nothing) - return if isnothing(unified) + if isnothing(unified) throw(MethodError(allocate, (backend, T, dims))) elseif unified throw(ArgumentError("`$(typeof(backend))` either does not support unified memory or it has not yet defined `allocate(backend::$backend, T::Type, dims::Tuple; unified::Bool)`")) else - allocate(backend, T, dims) + return allocate(backend, T, dims) end end From 9566220df713cc70ab493cb76a746fa490d8507e Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 22 Jul 2025 09:43:45 -0300 Subject: [PATCH 10/18] Update docstring and shorten error --- src/KernelAbstractions.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index dc373f8c..e9b65d17 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -532,13 +532,14 @@ allocates an array using unified memory if the backend supports it. Use !!! note Backend implementations **must** implement `allocate(::NewBackend, T, dims::Tuple)` + Backend implementations **should** implement `allocate(::NewBackend, T, dims::Tuple; unified::Bool=false)` """ allocate(backend::Backend, T::Type, dims...; kwargs...) = allocate(backend, T, dims; kwargs...) function allocate(backend::Backend, T::Type, dims::Tuple; unified::Union{Nothing, Bool} = nothing) if isnothing(unified) throw(MethodError(allocate, (backend, T, dims))) elseif unified - throw(ArgumentError("`$(typeof(backend))` either does not support unified memory or it has not yet defined `allocate(backend::$backend, T::Type, dims::Tuple; unified::Bool)`")) + throw(ArgumentError("`$(typeof(backend))` does not support unified memory. If you believe it does, please open a github issue.")) else return allocate(backend, T, dims) end From 9ff6f529cdaddd680fe07de383ea10b0f2074f41 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 22 Jul 2025 10:27:55 -0300 Subject: [PATCH 11/18] Enforcing unified support would be breaking --- src/KernelAbstractions.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index e9b65d17..5d43b04e 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -578,8 +578,9 @@ end Returns whether unified memory arrays are supported by the backend. !!! note - Backend implementations **must** implement this function - only if they **do** support unified memory. + Backend implementations **should** implement this function + only if they **do** support unified memory. It will be required + in KernelAbstractions 0.10. """ supports_unified(::Backend) = false From 2ae1075ced2ce7735bead25e4c9a62d89678ef36 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 22 Jul 2025 10:32:44 -0300 Subject: [PATCH 12/18] Feedback --- src/KernelAbstractions.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 5d43b04e..12b9e05b 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -579,8 +579,7 @@ Returns whether unified memory arrays are supported by the backend. !!! note Backend implementations **should** implement this function - only if they **do** support unified memory. It will be required - in KernelAbstractions 0.10. + only if they **do** support unified memory. """ supports_unified(::Backend) = false From 441702171ad1d3bc01f2601549ca937c49e99849 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 22 Jul 2025 10:39:58 -0300 Subject: [PATCH 13/18] Fixes --- src/KernelAbstractions.jl | 2 +- src/cpu.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 12b9e05b..e9b65d17 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -578,7 +578,7 @@ end Returns whether unified memory arrays are supported by the backend. !!! note - Backend implementations **should** implement this function + Backend implementations **must** implement this function only if they **do** support unified memory. """ supports_unified(::Backend) = false diff --git a/src/cpu.jl b/src/cpu.jl index 41185527..2e9d7bd0 100644 --- a/src/cpu.jl +++ b/src/cpu.jl @@ -1,6 +1,6 @@ synchronize(::CPU) = nothing -allocate(::CPU, ::Type{T}, dims::Tupl; unified::Bool=false) where {T} = Array{T}(undef, dims) +allocate(::CPU, ::Type{T}, dims::Tuple; unified::Bool=false) where {T} = Array{T}(undef, dims) function zeros(backend::CPU, ::Type{T}, dims::Tuple; kwargs...) where {T} arr = allocate(backend, T, dims; kwargs...) From 5151302de56f399153a1bf78243917661e7b628c Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 22 Jul 2025 10:50:57 -0300 Subject: [PATCH 14/18] Remove unused file --- src/cpu.jl | 225 ----------------------------------------------------- 1 file changed, 225 deletions(-) delete mode 100644 src/cpu.jl diff --git a/src/cpu.jl b/src/cpu.jl deleted file mode 100644 index 2e9d7bd0..00000000 --- a/src/cpu.jl +++ /dev/null @@ -1,225 +0,0 @@ -synchronize(::CPU) = nothing - -allocate(::CPU, ::Type{T}, dims::Tuple; unified::Bool=false) where {T} = Array{T}(undef, dims) - -function zeros(backend::CPU, ::Type{T}, dims::Tuple; kwargs...) where {T} - arr = allocate(backend, T, dims; kwargs...) - kernel = init_kernel(backend) - kernel(arr, zero, T, ndrange = length(arr)) - return arr -end -function ones(backend::CPU, ::Type{T}, dims::Tuple; kwargs...) where {T} - arr = allocate(backend, T, dims; kwargs...) - kernel = init_kernel(backend) - kernel(arr, one, T; ndrange = length(arr)) - return arr -end - -function copyto!(backend::CPU, A, B) - if get_backend(A) == get_backend(B) && get_backend(A) isa CPU - if length(A) != length(B) - error("Arrays must match in length") - end - if Base.mightalias(A, B) - error("Arrays may not alias") - end - kernel = copy_kernel(backend) - kernel(A, B, ndrange = length(A)) - return A - else - return Base.copyto!(A, B) - end -end - -functional(::CPU) = true -pagelock!(::CPU, x) = nothing -supports_unified(::CPU) = true - -function (obj::Kernel{CPU})(args...; ndrange = nothing, workgroupsize = nothing) - ndrange, workgroupsize, iterspace, dynamic = launch_config(obj, ndrange, workgroupsize) - - if length(blocks(iterspace)) == 0 - return nothing - end - - __run(obj, ndrange, iterspace, args, dynamic, obj.backend.static) - return nothing -end - -const CPU_GRAINSIZE = 1024 # Vectorization, 4x unrolling, minimal grain size -function default_cpu_workgroupsize(ndrange) - # if the total kernel is small, don't launch multiple tasks - n = prod(ndrange) - if iszero(n) - # If the ndrange is zero return a workgroupsize of (1, 1,...) - return map(one, ndrange) - elseif n <= CPU_GRAINSIZE - return ndrange - else - available = Ref(CPU_GRAINSIZE) - return ntuple(length(ndrange)) do i - dim = ndrange[i] - remaining = available[] - if remaining == 0 - return 1 - elseif remaining <= dim - available[] = 0 - return remaining - else - available[] = remaining รท dim - return dim - end - end - end -end - -@inline function launch_config(kernel::Kernel{CPU}, ndrange, workgroupsize) - if ndrange isa Integer - ndrange = (ndrange,) - end - if workgroupsize isa Integer - workgroupsize = (workgroupsize,) - end - - if KernelAbstractions.workgroupsize(kernel) <: DynamicSize && workgroupsize === nothing - workgroupsize = default_cpu_workgroupsize(ndrange) - end - iterspace, dynamic = partition(kernel, ndrange, workgroupsize) - # partition checked that the ndrange's agreed - if KernelAbstractions.ndrange(kernel) <: StaticSize - ndrange = nothing - end - - return ndrange, workgroupsize, iterspace, dynamic -end - -# Inference barriers -function __run(obj, ndrange, iterspace, args, dynamic, static_threads) - N = length(iterspace) - Nthreads = Threads.nthreads() - if Nthreads == 1 - len, rem = N, 0 - else - len, rem = divrem(N, Nthreads) - end - # not enough iterations for all the threads? - if len == 0 - Nthreads = N - len, rem = 1, 0 - end - if Nthreads == 1 - __thread_run(1, len, rem, obj, ndrange, iterspace, args, dynamic) - else - if static_threads - Threads.@threads :static for tid in 1:Nthreads - __thread_run(tid, len, rem, obj, ndrange, iterspace, args, dynamic) - end - else - @sync for tid in 1:Nthreads - Threads.@spawn __thread_run(tid, len, rem, obj, ndrange, iterspace, args, dynamic) - end - end - end - return nothing -end - -function __thread_run(tid, len, rem, obj, ndrange, iterspace, args, dynamic) - # compute this thread's iterations - f = 1 + ((tid - 1) * len) - l = f + len - 1 - # distribute remaining iterations evenly - if rem > 0 - if tid <= rem - f = f + (tid - 1) - l = l + tid - else - f = f + rem - l = l + rem - end - end - # run this thread's iterations - for i in f:l - block = @inbounds blocks(iterspace)[i] - ctx = mkcontext(obj, block, ndrange, iterspace, dynamic) - obj.f(ctx, args...) - end - return nothing -end - -function mkcontext(kernel::Kernel{CPU}, I, _ndrange, iterspace, ::Dynamic) where {Dynamic} - return CompilerMetadata{ndrange(kernel), Dynamic}(I, _ndrange, iterspace) -end - -@inline function __index_Local_Linear(ctx, idx::CartesianIndex) - indices = workitems(__iterspace(ctx)) - return @inbounds LinearIndices(indices)[idx] -end - -@inline function __index_Group_Linear(ctx, idx::CartesianIndex) - indices = blocks(__iterspace(ctx)) - return @inbounds LinearIndices(indices)[__groupindex(ctx)] -end - -@inline function __index_Global_Linear(ctx, idx::CartesianIndex) - I = @inbounds expand(__iterspace(ctx), __groupindex(ctx), idx) - return @inbounds LinearIndices(__ndrange(ctx))[I] -end - -@inline function __index_Local_Cartesian(_, idx::CartesianIndex) - return idx -end - -@inline function __index_Group_Cartesian(ctx, ::CartesianIndex) - return __groupindex(ctx) -end - -@inline function __index_Global_Cartesian(ctx, idx::CartesianIndex) - return @inbounds expand(__iterspace(ctx), __groupindex(ctx), idx) -end - -@inline function __validindex(ctx, idx::CartesianIndex) - # Turns this into a noop for code where we can turn of checkbounds of - if __dynamic_checkbounds(ctx) - I = @inbounds expand(__iterspace(ctx), __groupindex(ctx), idx) - return I in __ndrange(ctx) - else - return true - end -end - -### -# CPU implementation of shared memory -### -@inline function SharedMemory(::Type{T}, ::Val{Dims}, ::Val) where {T, Dims} - return MArray{__size(Dims), T}(undef) -end - -### -# CPU implementation of scratch memory -# - memory allocated as a MArray with size `Dims` -### - -struct ScratchArray{N, D} - data::D - ScratchArray{N}(data::D) where {N, D} = new{N, D}(data) -end - -@inline function Scratchpad(ctx, ::Type{T}, ::Val{Dims}) where {T, Dims} - return ScratchArray{length(Dims)}(MArray{__size((Dims..., prod(__groupsize(ctx)))), T}(undef)) -end - -# Base.view creates a boundscheck which captures A -# https://github.com/JuliaLang/julia/issues/39308 -@inline function aview(A, I::Vararg{Any, N}) where {N} - J = Base.to_indices(A, I) - return Base.unsafe_view(Base._maybe_reshape_parent(A, Base.index_ndims(J...)), J...) -end - -@inline function Base.getindex(A::ScratchArray{N}, idx) where {N} - return @inbounds aview(A.data, ntuple(_ -> :, Val(N))..., idx) -end - -# Argument conversion -argconvert(k::Kernel{CPU}, arg) = arg - -supports_enzyme(::CPU) = true From afe5818291ebd087b4b964bfa8a87a5233754d6e Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 22 Jul 2025 10:56:30 -0300 Subject: [PATCH 15/18] Fix cpu backend support --- src/pocl/backend.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pocl/backend.jl b/src/pocl/backend.jl index ffa24746..f12a4e9b 100644 --- a/src/pocl/backend.jl +++ b/src/pocl/backend.jl @@ -58,6 +58,7 @@ KA.pagelock!(::POCLBackend, x) = nothing KA.get_backend(::Array) = POCLBackend() KA.synchronize(::POCLBackend) = nothing KA.supports_float64(::POCLBackend) = true +KA.supports_unified(::POCLBackend) = true ## Kernel Launch From 2ad1d50b0a3c1d81e3b7473d9581da88ceb58e0d Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 22 Jul 2025 10:56:55 -0300 Subject: [PATCH 16/18] Update docstrings --- src/KernelAbstractions.jl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index e9b65d17..64e213a5 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -526,9 +526,9 @@ end """ allocate(::Backend, Type, dims...; unified=false)::AbstractArray -Allocate a storage array appropriate for the computational backend. `unified` -allocates an array using unified memory if the backend supports it. Use -[`supports_unified`](@ref) to determine whether it is supported by a backend. +Allocate a storage array appropriate for the computational backend. `unified=true` +allocates an array using unified memory if the backend supports it and throws otherwise. +Use [`supports_unified`](@ref) to determine whether it is supported by a backend. !!! note Backend implementations **must** implement `allocate(::NewBackend, T, dims::Tuple)` @@ -550,7 +550,8 @@ end zeros(::Backend, Type, dims...; unified=false)::AbstractArray Allocate a storage array appropriate for the computational backend filled with zeros. -`unified` allocates an array using unified memory if the backend supports it. +`unified=true` allocates an array using unified memory if the backend supports it and +throws otherwise. """ zeros(backend::Backend, T::Type, dims...; kwargs...) = zeros(backend, T, dims; kwargs...) function zeros(backend::Backend, ::Type{T}, dims::Tuple; kwargs...) where {T} @@ -563,7 +564,8 @@ end ones(::Backend, Type, dims...; unified=false)::AbstractArray Allocate a storage array appropriate for the computational backend filled with ones. -`unified` allocates an array using unified memory if the backend supports it. +`unified=true` allocates an array using unified memory if the backend supports it and +throws otherwise. """ ones(backend::Backend, T::Type, dims...; kwargs...) = ones(backend, T, dims; kwargs...) function ones(backend::Backend, ::Type{T}, dims::Tuple; kwargs...) where {T} From fa116c452407cda98675b61e15b5b92e84cf0fed Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 22 Jul 2025 11:01:05 -0300 Subject: [PATCH 17/18] Format --- src/pocl/backend.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pocl/backend.jl b/src/pocl/backend.jl index f12a4e9b..e733cae3 100644 --- a/src/pocl/backend.jl +++ b/src/pocl/backend.jl @@ -21,7 +21,7 @@ end ## Memory Operations -KA.allocate(::POCLBackend, ::Type{T}, dims::Tuple; unified::Bool=false) where {T} = Array{T}(undef, dims) +KA.allocate(::POCLBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where {T} = Array{T}(undef, dims) function KA.zeros(backend::POCLBackend, ::Type{T}, dims::Tuple; kwargs...) where {T} arr = KA.allocate(backend, T, dims; kwargs...) From 98ba8903b7fc2272b8727b0acd48a81fe3cd6547 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 22 Jul 2025 11:44:06 -0300 Subject: [PATCH 18/18] Add `supports_unified` to docs --- docs/src/api.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/api.md b/docs/src/api.md index 9373d231..4e107075 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -21,6 +21,7 @@ allocate ```@docs KernelAbstractions.zeros +KernelAbstractions.supports_unified ``` ## Internal