From c37d0fadb786091038863df19ef6b800d9e18765 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 13 Jan 2025 14:49:43 +0100 Subject: [PATCH 1/5] Document the semantics of copyto! and add pagelock! --- src/KernelAbstractions.jl | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 55d5d465e..e34a0c3a0 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -110,13 +110,45 @@ macro Const end """ copyto!(::Backend, dest::AbstractArray, src::AbstractArray) -Perform a `copyto!` operation that execution ordered with respect to the backend. +Perform a `copyto!` operation that is execution ordered with respect to the backend. + +!!! note + On some backends it may be necessary to first call [`pagelock!`](@ref) on host memory, + to enable fully asynchronous behaviour w.r.t to the host. + +!!! warning + While this function is always asynchronous w.r.t. to the device, it may be synchronous w.r.t to the host. + Additionally if the function is asynchronous w.r.t to the host, the user is required to gurantuee, the lifetime + of the host buffer. Otherwise the user may cause a use-after-free, because the GC was able to prove that the host + buffer can be freed. + + ```julia + arr = zeros(64) + GC.@preserve arr begin + copyto!(backend, arr, ...) + # other operations + synchronize(backend) + end + ``` !!! note Backend implementations **must** implement this function. """ function copyto! end +""" + pagelock!(::Backend, dest::AbstractArray) + +Pagelock (pin) a host memory buffer for a backend device. This may be necessary for [`copyto!`](@ref) +to perform asynchronously w.r.t to the host/ + +!!! note + Backends **may** implement this function. +""" +function pagelock!(::Backend, x) + return nothing +end + """ synchronize(::Backend) From d5c67f686497c5a2e44fcd64789080dff422d4f6 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 13 Jan 2025 14:53:20 +0100 Subject: [PATCH 2/5] fixup! Document the semantics of copyto! and add pagelock! --- src/KernelAbstractions.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index e34a0c3a0..d5c72a8fa 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -111,14 +111,14 @@ macro Const end copyto!(::Backend, dest::AbstractArray, src::AbstractArray) Perform a `copyto!` operation that is execution ordered with respect to the backend. +For most uses `Base.copyto!` provides a fully synchronous interface. !!! note On some backends it may be necessary to first call [`pagelock!`](@ref) on host memory, to enable fully asynchronous behaviour w.r.t to the host. !!! warning - While this function is always asynchronous w.r.t. to the device, it may be synchronous w.r.t to the host. - Additionally if the function is asynchronous w.r.t to the host, the user is required to gurantuee, the lifetime + If the function is asynchronous w.r.t to the host, the user is required to gurantuee, the lifetime of the host buffer. Otherwise the user may cause a use-after-free, because the GC was able to prove that the host buffer can be freed. From e86376935a7ea729b7d7348f9a2b3f889aa5a55e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 21 Jan 2025 12:08:11 +0100 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Tim Besard --- src/KernelAbstractions.jl | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index d5c72a8fa..1bb70d34e 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -110,17 +110,19 @@ macro Const end """ copyto!(::Backend, dest::AbstractArray, src::AbstractArray) -Perform a `copyto!` operation that is execution ordered with respect to the backend. -For most uses `Base.copyto!` provides a fully synchronous interface. +Perform an asynchronous `copyto!` operation that is execution ordered with respect to the back-end. -!!! note - On some backends it may be necessary to first call [`pagelock!`](@ref) on host memory, - to enable fully asynchronous behaviour w.r.t to the host. +For most users, `Base.copyto!` should suffice, performance a simple, synchronous copy. +Only when you know you need asynchronicity w.r.t. the host, you should consider using +this asynchronous version, which requires additional lifetime guarantees as documented below. !!! warning - If the function is asynchronous w.r.t to the host, the user is required to gurantuee, the lifetime - of the host buffer. Otherwise the user may cause a use-after-free, because the GC was able to prove that the host - buffer can be freed. + + Because of the asynchronous nature of this operation, the user is required to guarantee that the lifetime + of the source extends past the *completion* of the copy operation as to avoid a use-after-free. It is not + sufficient to simply use `GC.@preserve` around the call to `copyto!`, because that only extends the + lifetime past the operation getting queued. Instead, it may be required to `synchronize()`, + or otherwise guarantee that the source will still be around when the copy is executed: ```julia arr = zeros(64) @@ -131,6 +133,11 @@ For most uses `Base.copyto!` provides a fully synchronous interface. end ``` +!!! note + + On some back-ends it may be necessary to first call [`pagelock!`](@ref) on host memory + to enable fully asynchronous behavior w.r.t to the host. + !!! note Backend implementations **must** implement this function. """ From 45c12af54220714adf82548a20521394f7b9a6f0 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 21 Jan 2025 12:08:24 +0100 Subject: [PATCH 4/5] Update src/KernelAbstractions.jl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ludovic Räss <61313342+luraess@users.noreply.github.com> --- src/KernelAbstractions.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 1bb70d34e..be1b2cfcd 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -139,7 +139,7 @@ this asynchronous version, which requires additional lifetime guarantees as docu to enable fully asynchronous behavior w.r.t to the host. !!! note - Backend implementations **must** implement this function. + Backends **must** implement this function. """ function copyto! end From aa5ca4f1794a02f6a64e2380fdb845b8a869681d Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 21 Jan 2025 12:14:33 +0100 Subject: [PATCH 5/5] move default pagelock implementation --- src/KernelAbstractions.jl | 11 ++++++++--- src/cpu.jl | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index be1b2cfcd..a9dc1ef2a 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -149,12 +149,13 @@ function copyto! end Pagelock (pin) a host memory buffer for a backend device. This may be necessary for [`copyto!`](@ref) to perform asynchronously w.r.t to the host/ +This function should return `nothing`; or `missing` if not implemented. + + !!! note Backends **may** implement this function. """ -function pagelock!(::Backend, x) - return nothing -end +function pagelock! end """ synchronize(::Backend) @@ -601,6 +602,10 @@ function functional(::Backend) return missing end +function pagelock!(::Backend, x) + return missing +end + include("nditeration.jl") using .NDIteration import .NDIteration: get diff --git a/src/cpu.jl b/src/cpu.jl index 79252a26b..513648f5d 100644 --- a/src/cpu.jl +++ b/src/cpu.jl @@ -33,6 +33,7 @@ function copyto!(backend::CPU, A, B) end functional(::CPU) = true +pagelock!(::CPU, x) = nothing function (obj::Kernel{CPU})(args...; ndrange = nothing, workgroupsize = nothing) ndrange, workgroupsize, iterspace, dynamic = launch_config(obj, ndrange, workgroupsize)