Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
78043ba
Add a cache of CUDA kernel names created from the stack trace
petebachant Oct 7, 2025
687365e
Create kernel name keys with arg types
petebachant Oct 7, 2025
26e4ad5
Update kernel naming
petebachant Oct 7, 2025
3a94f7c
Update naming
petebachant Oct 7, 2025
f3542d4
Disable default kernel naming
petebachant Oct 7, 2025
cf830bf
Put kernel renaming option into DebugOnly
petebachant Oct 7, 2025
e1285ae
Switch back to using object ID alone for kernel name
petebachant Oct 20, 2025
06c44e2
Use methodinstance for kernel name key
petebachant Oct 20, 2025
7411a8d
Improve readability of kernel names
petebachant Oct 24, 2025
9dd3116
Merge branch 'main' of github.com:CliMA/ClimaCore.jl into pb/kernel-n…
petebachant Oct 24, 2025
93fcf68
Handle packages without Clima in their name
petebachant Oct 24, 2025
25f23ee
Handle packages without Clima in their name
petebachant Oct 24, 2025
9f0fa87
Use src for file as fallback
petebachant Oct 24, 2025
31ec263
Add GPUCompiler as a weakdep
petebachant Oct 27, 2025
eafbe54
Use GPUCompiler method directly
petebachant Oct 27, 2025
52ff458
Switch to pure env var based stack trace naming
petebachant Oct 27, 2025
46c1f6c
Use a constant set at compile time
petebachant Oct 27, 2025
fca1ad1
Remove GPUCompiler weakdep
petebachant Oct 27, 2025
05a5d5b
Use methodinstance from CUDA
petebachant Nov 3, 2025
e7bca7d
Merge branch 'main' of https://github.com/CliMA/ClimaCore.jl into pb/…
petebachant Nov 3, 2025
a541746
Use splitpath to split path
petebachant Nov 4, 2025
7af8d16
Make kernel naming purely dynamic based on env var reading
petebachant Nov 5, 2025
cec2ef7
Pass args into get_kernel_name
petebachant Nov 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 46 additions & 3 deletions ext/cuda/cuda_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ import CUDA
import ClimaCore.Fields
import ClimaCore.DataLayouts
import ClimaCore.DataLayouts: empty_kernel_stats
import ClimaCore.DebugOnly: name_kernels_from_stack_trace

const reported_stats = Dict()
const kernel_names = Dict{String, AbstractString}()
# Call via ClimaCore.DataLayouts.empty_kernel_stats()
empty_kernel_stats(::ClimaComms.CUDADevice) = empty!(reported_stats)
collect_kernel_stats() = false
Expand Down Expand Up @@ -39,19 +41,60 @@ function auto_launch!(
always_inline = true,
caller = :unknown,
) where {F!}
# If desired, compute a kernel name from the stack trace and store in
# a global Dict, which serves as an in memory cache
kernel_name = nothing
if name_kernels_from_stack_trace()
# Create a key from the function and types of the args
key = string(objectid(f!))
kernel_name_exists = key in keys(kernel_names)
if !kernel_name_exists
Comment on lines +38 to +39
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These syntax changes may be less clear, so feel free to ignore

        kernel_name_exists =  haskey(kernel_names, key)
        if !kernel_name_exists

or

kernel_name = get!(kernel_names, key) do
#calculate_name_here
end

# Construct the kernel name, ignoring modules we don't care about
uninteresting_modules = [
:Base,
:Core,
:GPUCompiler,
:CUDA,
:NVTX,
:ClimaCoreCUDAExt,
:ClimaCore,
]
stack = stacktrace()
first_relevant_index = findfirst(stack) do frame
frame.linfo isa Core.MethodInstance && (
fullname(frame.linfo.def.module)[1] ∉ uninteresting_modules
)
end
if !isnothing(first_relevant_index)
frame = stack[first_relevant_index]
name_str =
string(frame.func) *
"_" *
string(frame.linfo.def.file) *
"_" *
string(frame.line)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be a good idea to either handle the case where frame.linfo.def.file is inside NVTX.jl specially, or to leave a note here that filenames and line numbers of kernels will be incorrect if inside an NVTX annotation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about the format below? If it's an NVTX annotation, it's easy to see that. Likewise, it's easy to tell if the name is a function defined in a file:

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this a lot. If a lot of kernels are getting caught in the NVTX annotations, it might make sense to reduce the number of NVTX annotations in downstream packages

kernel_name = replace(name_str, r"[^A-Za-z0-9]" => "_")
end
@debug "Using kernel name: $kernel_name"
kernel_names[key] = kernel_name
end
kernel_name = kernel_names[key]
end

if auto
@assert !isnothing(nitems)
if nitems ≥ 0
kernel = CUDA.@cuda always_inline = true launch = false f!(args...)
kernel = CUDA.@cuda name = kernel_name always_inline = true launch =
false f!(args...)
config = CUDA.launch_configuration(kernel.fun)
threads = min(nitems, config.threads)
blocks = cld(nitems, threads)
kernel(args...; threads, blocks) # This knows to use always_inline from above.
end
else
kernel =
CUDA.@cuda always_inline = always_inline threads = threads_s blocks =
blocks_s f!(args...)
CUDA.@cuda name = kernel_name always_inline = always_inline threads =
threads_s blocks = blocks_s f!(args...)
end

if collect_kernel_stats() # only for development use
Expand Down
2 changes: 2 additions & 0 deletions src/DebugOnly/DebugOnly.jl
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,6 @@ function allow_mismatched_spaces_unsafe()
return false
end

name_kernels_from_stack_trace() = false

end
Loading