From 02f16948c6847d5d968cc1ec80c998d42eb7255a Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 4 Dec 2025 13:38:29 -0600 Subject: [PATCH 1/8] Update KA API --- src/oneAPIKernels.jl | 100 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 6 deletions(-) diff --git a/src/oneAPIKernels.jl b/src/oneAPIKernels.jl index 2fd144ad..61f2b2d7 100644 --- a/src/oneAPIKernels.jl +++ b/src/oneAPIKernels.jl @@ -15,18 +15,24 @@ import Adapt export oneAPIBackend struct oneAPIBackend <: KA.GPU + prefer_blocks::Bool + always_inline::Bool end -KA.allocate(::oneAPIBackend, ::Type{T}, dims::Tuple) where T = oneArray{T}(undef, dims) -KA.zeros(::oneAPIBackend, ::Type{T}, dims::Tuple) where T = oneAPI.zeros(T, dims) -KA.ones(::oneAPIBackend, ::Type{T}, dims::Tuple) where T = oneAPI.ones(T, dims) +oneAPIBackend(; prefer_blocks=false, always_inline=false) = oneAPIBackend(prefer_blocks, always_inline) + +@inline KA.allocate(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where T = oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims) +@inline KA.zeros(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where T = fill!(oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims), zero(T)) +@inline KA.ones(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where T = fill!(oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims), one(T)) KA.get_backend(::oneArray) = oneAPIBackend() # TODO should be non-blocking -KA.synchronize(::oneAPIBackend) = oneL0.synchronize() +KA.synchronize(::oneAPIBackend) = oneAPI.oneL0.synchronize() KA.supports_float64(::oneAPIBackend) = false # TODO: Check if this is device dependent -Adapt.adapt_storage(::oneAPIBackend, a::Array) = Adapt.adapt(oneArray, a) +KA.functional(::oneAPIBackend) = oneAPI.functional() + +Adapt.adapt_storage(::oneAPIBackend, a::AbstractArray) = Adapt.adapt(oneArray, a) Adapt.adapt_storage(::oneAPIBackend, a::oneArray) = a Adapt.adapt_storage(::KA.CPU, a::oneArray) = convert(Array, a) @@ -39,6 +45,24 @@ function KA.copyto!(::oneAPIBackend, A, B) end +## Device Operations + +function KA.ndevices(::oneAPIBackend) + return length(oneAPI.devices()) +end + +function KA.device(::oneAPIBackend)::Int + dev = oneAPI.device() + devs = oneAPI.devices() + idx = findfirst(==(dev), devs) + return idx === nothing ? 1 : idx +end + +function KA.device!(backend::oneAPIBackend, id::Int) + oneAPI.device!(id) +end + + ## Kernel Launch function KA.mkcontext(kernel::KA.Kernel{oneAPIBackend}, _ndrange, iterspace) @@ -83,14 +107,42 @@ function threads_to_workgroupsize(threads, ndrange) end function (obj::KA.Kernel{oneAPIBackend})(args...; ndrange=nothing, workgroupsize=nothing) + backend = KA.backend(obj) + ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, ndrange, workgroupsize) # this might not be the final context, since we may tune the workgroupsize ctx = KA.mkcontext(obj, ndrange, iterspace) - kernel = @oneapi launch=false obj.f(ctx, args...) + + # If the kernel is statically sized we can tell the compiler about that + if KA.workgroupsize(obj) <: KA.StaticSize + # TODO: maxthreads + # maxthreads = prod(KA.get(KA.workgroupsize(obj))) + else + # maxthreads = nothing + end + + kernel = @oneapi launch=false always_inline=backend.always_inline obj.f(ctx, args...) # figure out the optimal workgroupsize automatically if KA.workgroupsize(obj) <: KA.DynamicSize && workgroupsize === nothing items = oneAPI.launch_configuration(kernel) + + if backend.prefer_blocks + # Prefer blocks over threads: + # Reducing the workgroup size (items) increases the number of workgroups (blocks). + # We use a simple heuristic here since we lack full occupancy info (max_blocks) from launch_configuration. + + # If the total range is large enough, full workgroups are fine. + # If the range is small, we might want to reduce 'items' to create more blocks to fill the GPU. + # (Simplified logic compared to CUDA.jl which uses explicit occupancy calculators) + total_items = prod(ndrange) + if total_items < items * 16 # Heuristic factor + # Force at least a few blocks if possible by reducing items per block + target_blocks = 16 # Target at least 16 blocks + items = max(1, min(items, cld(total_items, target_blocks))) + end + end + workgroupsize = threads_to_workgroupsize(items, ndrange) iterspace, dynamic = KA.partition(obj, ndrange, workgroupsize) ctx = KA.mkcontext(obj, ndrange, iterspace) @@ -171,6 +223,42 @@ end ## Other +Adapt.adapt_storage(to::KA.ConstAdaptor, a::oneDeviceArray) = Base.Experimental.Const(a) + KA.argconvert(::KA.Kernel{oneAPIBackend}, arg) = kernel_convert(arg) +function KA.priority!(::oneAPIBackend, prio::Symbol) + if !(prio in (:high, :normal, :low)) + error("priority must be one of :high, :normal, :low") + end + + priority_enum = if prio == :high + oneAPI.oneL0.ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH + elseif prio == :low + oneAPI.oneL0.ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW + else + oneAPI.oneL0.ZE_COMMAND_QUEUE_PRIORITY_NORMAL + end + + ctx = oneAPI.context() + dev = oneAPI.device() + + # Update the cached queue + # We synchronize the current queue first to ensure safety + current_queue = oneAPI.global_queue(ctx, dev) + oneAPI.oneL0.synchronize(current_queue) + + # Replace the queue in task_local_storage + # The key used by global_queue is (:ZeCommandQueue, ctx, dev) + + new_queue = oneAPI.oneL0.ZeCommandQueue(ctx, dev; + flags = oneAPI.oneL0.ZE_COMMAND_QUEUE_FLAG_IN_ORDER, + priority = priority_enum + ) + + task_local_storage((:ZeCommandQueue, ctx, dev), new_queue) + + return nothing +end + end From 324114a48581d8c0da72c5335f6114f622493ef8 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Fri, 12 Dec 2025 10:15:35 -0600 Subject: [PATCH 2/8] Support keys() and unsafe_cached_load() --- lib/level-zero/device.jl | 2 ++ src/context.jl | 8 +++++++- src/device/array.jl | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/level-zero/device.jl b/lib/level-zero/device.jl index 07a72e17..58d5b2da 100644 --- a/lib/level-zero/device.jl +++ b/lib/level-zero/device.jl @@ -204,6 +204,8 @@ Base.length(iter::ZeDevices) = length(iter.handles) Base.IteratorSize(::ZeDevices) = Base.HasLength() +Base.keys(iter::ZeDevices) = 1:length(iter) + function Base.show(io::IO, ::MIME"text/plain", iter::ZeDevices) print(io, "ZeDevice iterator for $(length(iter)) devices") if !isempty(iter) diff --git a/src/context.jl b/src/context.jl index b0f9ff10..89a2f219 100644 --- a/src/context.jl +++ b/src/context.jl @@ -103,7 +103,13 @@ See also: [`device`](@ref), [`devices`](@ref) function device!(drv::ZeDevice) task_local_storage(:ZeDevice, drv) end -device!(i::Int) = device!(devices(driver())[i]) +function device!(i::Int) + devs = devices(driver()) + if i < 1 || i > length(devs) + throw(ArgumentError("Invalid device index $i (must be between 1 and $(length(devs)))")) + end + device!(devs[i]) +end const global_contexts = Dict{ZeDriver,ZeContext}() diff --git a/src/device/array.jl b/src/device/array.jl index ae339110..95fc911f 100644 --- a/src/device/array.jl +++ b/src/device/array.jl @@ -195,6 +195,13 @@ end end end +@device_function @inline function unsafe_cached_load(ptr::LLVMPtr{T,A}, i::Integer, align::Val) where {T,A} + # For SPIR-V/Level Zero, we don't have explicit cache control intrinsics like CUDA's __ldg + # So we fall back to a regular unsafe_load. The SPIR-V compiler may still apply + # appropriate optimizations based on context. + unsafe_load(ptr, i, align) +end + @device_function @inline function const_arrayref(A::oneDeviceArray{T}, index::Integer) where {T} # simplified bounds check (see `arrayset`) #@boundscheck checkbounds(A, index) From e723fdf0bae7b8e668a18e51fc61816bdbfbbf69 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Fri, 12 Dec 2025 12:15:30 -0600 Subject: [PATCH 3/8] Runic, KA 0.9.39, and support_unified() --- Project.toml | 2 +- src/context.jl | 2 +- src/device/array.jl | 2 +- src/oneAPIKernels.jl | 22 ++++++++++++---------- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Project.toml b/Project.toml index d9925aae..abd8d7e0 100644 --- a/Project.toml +++ b/Project.toml @@ -44,7 +44,7 @@ ExprTools = "0.1" GPUArrays = "11.2.1" GPUCompiler = "1.6" GPUToolbox = "0.1, 0.2, 0.3, 1" -KernelAbstractions = "0.9.1" +KernelAbstractions = "0.9.39" LLVM = "6, 7, 8, 9" NEO_jll = "=25.44.36015" Preferences = "1" diff --git a/src/context.jl b/src/context.jl index 89a2f219..710bf720 100644 --- a/src/context.jl +++ b/src/context.jl @@ -108,7 +108,7 @@ function device!(i::Int) if i < 1 || i > length(devs) throw(ArgumentError("Invalid device index $i (must be between 1 and $(length(devs)))")) end - device!(devs[i]) + return device!(devs[i]) end const global_contexts = Dict{ZeDriver,ZeContext}() diff --git a/src/device/array.jl b/src/device/array.jl index 95fc911f..24822656 100644 --- a/src/device/array.jl +++ b/src/device/array.jl @@ -195,7 +195,7 @@ end end end -@device_function @inline function unsafe_cached_load(ptr::LLVMPtr{T,A}, i::Integer, align::Val) where {T,A} +@device_function @inline function unsafe_cached_load(ptr::LLVMPtr{T, A}, i::Integer, align::Val) where {T, A} # For SPIR-V/Level Zero, we don't have explicit cache control intrinsics like CUDA's __ldg # So we fall back to a regular unsafe_load. The SPIR-V compiler may still apply # appropriate optimizations based on context. diff --git a/src/oneAPIKernels.jl b/src/oneAPIKernels.jl index 61f2b2d7..6e092397 100644 --- a/src/oneAPIKernels.jl +++ b/src/oneAPIKernels.jl @@ -19,16 +19,17 @@ struct oneAPIBackend <: KA.GPU always_inline::Bool end -oneAPIBackend(; prefer_blocks=false, always_inline=false) = oneAPIBackend(prefer_blocks, always_inline) +oneAPIBackend(; prefer_blocks = false, always_inline = false) = oneAPIBackend(prefer_blocks, always_inline) -@inline KA.allocate(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where T = oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims) -@inline KA.zeros(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where T = fill!(oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims), zero(T)) -@inline KA.ones(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where T = fill!(oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims), one(T)) +@inline KA.allocate(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where {T} = oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims) +@inline KA.zeros(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where {T} = fill!(oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims), zero(T)) +@inline KA.ones(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where {T} = fill!(oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims), one(T)) KA.get_backend(::oneArray) = oneAPIBackend() # TODO should be non-blocking KA.synchronize(::oneAPIBackend) = oneAPI.oneL0.synchronize() KA.supports_float64(::oneAPIBackend) = false # TODO: Check if this is device dependent +KA.supports_unified(::oneAPIBackend) = true KA.functional(::oneAPIBackend) = oneAPI.functional() @@ -59,7 +60,7 @@ function KA.device(::oneAPIBackend)::Int end function KA.device!(backend::oneAPIBackend, id::Int) - oneAPI.device!(id) + return oneAPI.device!(id) end @@ -121,7 +122,7 @@ function (obj::KA.Kernel{oneAPIBackend})(args...; ndrange=nothing, workgroupsize # maxthreads = nothing end - kernel = @oneapi launch=false always_inline=backend.always_inline obj.f(ctx, args...) + kernel = @oneapi launch = false always_inline = backend.always_inline obj.f(ctx, args...) # figure out the optimal workgroupsize automatically if KA.workgroupsize(obj) <: KA.DynamicSize && workgroupsize === nothing @@ -137,9 +138,9 @@ function (obj::KA.Kernel{oneAPIBackend})(args...; ndrange=nothing, workgroupsize # (Simplified logic compared to CUDA.jl which uses explicit occupancy calculators) total_items = prod(ndrange) if total_items < items * 16 # Heuristic factor - # Force at least a few blocks if possible by reducing items per block - target_blocks = 16 # Target at least 16 blocks - items = max(1, min(items, cld(total_items, target_blocks))) + # Force at least a few blocks if possible by reducing items per block + target_blocks = 16 # Target at least 16 blocks + items = max(1, min(items, cld(total_items, target_blocks))) end end @@ -251,7 +252,8 @@ function KA.priority!(::oneAPIBackend, prio::Symbol) # Replace the queue in task_local_storage # The key used by global_queue is (:ZeCommandQueue, ctx, dev) - new_queue = oneAPI.oneL0.ZeCommandQueue(ctx, dev; + new_queue = oneAPI.oneL0.ZeCommandQueue( + ctx, dev; flags = oneAPI.oneL0.ZE_COMMAND_QUEUE_FLAG_IN_ORDER, priority = priority_enum ) From 6bf7cde6c404898c385293542e3ea7fb27ef2bd7 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Fri, 12 Dec 2025 14:01:57 -0600 Subject: [PATCH 4/8] Circular dependency in 1.10 --- test/Project.toml | 1 - test/setup.jl | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/Project.toml b/test/Project.toml index cb603629..c214ed96 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,6 +1,5 @@ [deps] AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" -AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" diff --git a/test/setup.jl b/test/setup.jl index a3b0f1a4..269d5b9c 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -1,4 +1,4 @@ -using Distributed, Test, oneAPI, AcceleratedKernels +using Distributed, Test, oneAPI oneAPI.functional() || error("oneAPI.jl is not functional on this system") From 16784d7a58cef12d9e772ea58e5b82d70a9b3bae Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Fri, 12 Dec 2025 14:21:21 -0600 Subject: [PATCH 5/8] Again deps --- Project.toml | 3 ++- ext/oneAPIAcceleratedKernelsExt.jl => src/accumulate.jl | 4 ---- src/oneAPI.jl | 1 + 3 files changed, 3 insertions(+), 5 deletions(-) rename ext/oneAPIAcceleratedKernelsExt.jl => src/accumulate.jl (92%) diff --git a/Project.toml b/Project.toml index abd8d7e0..f43159b8 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ authors = ["Tim Besard ", "Alexis Montoison", "Michel Scha [deps] AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" +AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82" ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04" @@ -37,7 +38,7 @@ oneAPIAcceleratedKernelsExt = "AcceleratedKernels" [compat] AbstractFFTs = "1.5.0" -AcceleratedKernels = "0.4.3" +AcceleratedKernels = "0.3.1, 0.4" Adapt = "4" CEnum = "0.4, 0.5" ExprTools = "0.1" diff --git a/ext/oneAPIAcceleratedKernelsExt.jl b/src/accumulate.jl similarity index 92% rename from ext/oneAPIAcceleratedKernelsExt.jl rename to src/accumulate.jl index 6bb96180..f893dc96 100644 --- a/ext/oneAPIAcceleratedKernelsExt.jl +++ b/src/accumulate.jl @@ -1,5 +1,3 @@ -module oneAPIAcceleratedKernelsExt - import oneAPI import oneAPI: oneArray, oneAPIBackend import AcceleratedKernels as AK @@ -13,5 +11,3 @@ Base.accumulate(op, A::oneArray; init = zero(eltype(A)), kwargs...) = Base.cumsum(src::oneArray; kwargs...) = AK.cumsum(src, oneAPIBackend(); kwargs...) Base.cumprod(src::oneArray; kwargs...) = AK.cumprod(src, oneAPIBackend(); kwargs...) - -end # module diff --git a/src/oneAPI.jl b/src/oneAPI.jl index b9caa398..e10dd0d3 100644 --- a/src/oneAPI.jl +++ b/src/oneAPI.jl @@ -69,6 +69,7 @@ include("utils.jl") include("oneAPIKernels.jl") import .oneAPIKernels: oneAPIBackend +include("accumulate.jl") include("indexing.jl") export oneAPIBackend From eff363ded81ddba94aa248e9239fdb08894302ac Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Fri, 12 Dec 2025 14:31:25 -0600 Subject: [PATCH 6/8] Again deps --- Project.toml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index f43159b8..bd0179c2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "oneAPI" uuid = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" -version = "2.6.0" authors = ["Tim Besard ", "Alexis Montoison", "Michel Schanen "] +version = "2.6.0" [deps] AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" @@ -30,12 +30,6 @@ oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d" oneAPI_Level_Zero_Loader_jll = "13eca655-d68d-5b81-8367-6d99d727ab01" oneAPI_Support_jll = "b049733a-a71d-5ed3-8eba-7d323ac00b36" -[weakdeps] -AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1" - -[extensions] -oneAPIAcceleratedKernelsExt = "AcceleratedKernels" - [compat] AbstractFFTs = "1.5.0" AcceleratedKernels = "0.3.1, 0.4" From a1436e14bb6f6ce81e4d3a21a1e2542b3f2ad587 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Mon, 15 Dec 2025 10:26:03 -0600 Subject: [PATCH 7/8] scalar indexing for unified arrays --- src/array.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/array.jl b/src/array.jl index 4d621cf2..7a1e14cb 100644 --- a/src/array.jl +++ b/src/array.jl @@ -345,6 +345,19 @@ function Base.unsafe_convert(::Type{ZePtr{T}}, x::oneArray{T}) where {T} end +## indexing + +# Host-accessible arrays can be indexed from CPU, bypassing GPUArrays restrictions +function Base.getindex(x::oneArray{<:Any, <:Any, <:Union{oneL0.HostBuffer,oneL0.SharedBuffer}}, I::Int) + @boundscheck checkbounds(x, I) + unsafe_load(pointer(x, I; type=oneL0.HostBuffer)) +end + +function Base.setindex!(x::oneArray{<:Any, <:Any, <:Union{oneL0.HostBuffer,oneL0.SharedBuffer}}, v, I::Int) + @boundscheck checkbounds(x, I) + unsafe_store!(pointer(x, I; type=oneL0.HostBuffer), v) +end + ## interop with GPU arrays From 493e06d749916d2731c55b923abbcfe2c98a3caf Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Mon, 15 Dec 2025 11:49:12 -0600 Subject: [PATCH 8/8] Runic --- src/array.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/array.jl b/src/array.jl index 7a1e14cb..f219ad5f 100644 --- a/src/array.jl +++ b/src/array.jl @@ -348,14 +348,14 @@ end ## indexing # Host-accessible arrays can be indexed from CPU, bypassing GPUArrays restrictions -function Base.getindex(x::oneArray{<:Any, <:Any, <:Union{oneL0.HostBuffer,oneL0.SharedBuffer}}, I::Int) - @boundscheck checkbounds(x, I) - unsafe_load(pointer(x, I; type=oneL0.HostBuffer)) +function Base.getindex(x::oneArray{<:Any, <:Any, <:Union{oneL0.HostBuffer, oneL0.SharedBuffer}}, I::Int) + @boundscheck checkbounds(x, I) + return unsafe_load(pointer(x, I; type = oneL0.HostBuffer)) end -function Base.setindex!(x::oneArray{<:Any, <:Any, <:Union{oneL0.HostBuffer,oneL0.SharedBuffer}}, v, I::Int) - @boundscheck checkbounds(x, I) - unsafe_store!(pointer(x, I; type=oneL0.HostBuffer), v) +function Base.setindex!(x::oneArray{<:Any, <:Any, <:Union{oneL0.HostBuffer, oneL0.SharedBuffer}}, v, I::Int) + @boundscheck checkbounds(x, I) + return unsafe_store!(pointer(x, I; type = oneL0.HostBuffer), v) end