Skip to content

Commit deb8ecb

Browse files
committed
Add a name kwarg to gpu_call for better debug info.
1 parent 4463977 commit deb8ecb

File tree

3 files changed

+7
-4
lines changed

3 files changed

+7
-4
lines changed

src/device/execution.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,15 @@ host to influence how the kernel is executed. The following keyword arguments ar
3333
no other keyword arguments that influence the launch configuration are specified.
3434
- `threads::Int` and `blocks::Int`: configure exactly how many threads and blocks are
3535
launched. This cannot be used in combination with the `total_threads` argument.
36+
- `name::String`: inform the back end about the name of the kernel to be executed.
37+
This can be used to emit better diagnostics, and is useful with anonymous kernels.
3638
"""
3739
function gpu_call(kernel::Base.Callable, args...;
3840
target::AbstractArray=first(args),
3941
total_threads::Union{Int,Nothing}=nothing,
4042
threads::Union{Int,Nothing}=nothing,
4143
blocks::Union{Int,Nothing}=nothing,
42-
kwargs...)
44+
name::Union{String,Nothing}=nothing)
4345
# determine how many threads/blocks to launch
4446
if total_threads===nothing && threads===nothing && blocks===nothing
4547
total_threads = length(target)
@@ -58,7 +60,7 @@ function gpu_call(kernel::Base.Callable, args...;
5860
end
5961
end
6062

61-
gpu_call(backend(target), kernel, args...; threads=threads, blocks=blocks, kwargs...)
63+
gpu_call(backend(target), kernel, args...; threads=threads, blocks=blocks, name=name)
6264
end
6365

6466
gpu_call(backend::AbstractGPUBackend, kernel, args...; kwargs...) = error("Not implemented") # COV_EXCL_LINE

src/host/broadcast.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ end
6060
@inline function Base.copyto!(dest::GPUDestArray, bc::Broadcasted{Nothing})
6161
axes(dest) == axes(bc) || Broadcast.throwdm(axes(dest), axes(bc))
6262
bc′ = Broadcast.preprocess(dest, bc)
63-
gpu_call(dest, bc′) do ctx, dest, bc′
63+
gpu_call(dest, bc′; name="broadcast") do ctx, dest, bc′
6464
let I = CartesianIndex(@cartesianidx(dest))
6565
#@inbounds dest[I] = bc′[I]
6666
@inbounds let

src/reference.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ end
6464
Base.getindex(r::JlRefValue) = r.x
6565
Adapt.adapt_structure(to::Adaptor, r::Base.RefValue) = JlRefValue(adapt(to, r[]))
6666

67-
function GPUArrays.gpu_call(::JLBackend, f, args...; blocks::Int, threads::Int)
67+
function GPUArrays.gpu_call(::JLBackend, f, args...; blocks::Int, threads::Int,
68+
name::Union{String,Nothing})
6869
ctx = JLKernelContext(threads, blocks)
6970
device_args = jlconvert.(args)
7071
tasks = Array{Task}(undef, threads)

0 commit comments

Comments
 (0)