Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
78043ba
Add a cache of CUDA kernel names created from the stack trace
petebachant Oct 7, 2025
687365e
Create kernel name keys with arg types
petebachant Oct 7, 2025
26e4ad5
Update kernel naming
petebachant Oct 7, 2025
3a94f7c
Update naming
petebachant Oct 7, 2025
f3542d4
Disable default kernel naming
petebachant Oct 7, 2025
cf830bf
Put kernel renaming option into DebugOnly
petebachant Oct 7, 2025
e1285ae
Switch back to using object ID alone for kernel name
petebachant Oct 20, 2025
06c44e2
Use methodinstance for kernel name key
petebachant Oct 20, 2025
7411a8d
Improve readability of kernel names
petebachant Oct 24, 2025
9dd3116
Merge branch 'main' of github.com:CliMA/ClimaCore.jl into pb/kernel-n…
petebachant Oct 24, 2025
93fcf68
Handle packages without Clima in their name
petebachant Oct 24, 2025
25f23ee
Handle packages without Clima in their name
petebachant Oct 24, 2025
9f0fa87
Use src for file as fallback
petebachant Oct 24, 2025
31ec263
Add GPUCompiler as a weakdep
petebachant Oct 27, 2025
eafbe54
Use GPUCompiler method directly
petebachant Oct 27, 2025
52ff458
Switch to pure env var based stack trace naming
petebachant Oct 27, 2025
46c1f6c
Use a constant set at compile time
petebachant Oct 27, 2025
fca1ad1
Remove GPUCompiler weakdep
petebachant Oct 27, 2025
05a5d5b
Use methodinstance from CUDA
petebachant Nov 3, 2025
e7bca7d
Merge branch 'main' of https://github.com/CliMA/ClimaCore.jl into pb/…
petebachant Nov 3, 2025
a541746
Use splitpath to split path
petebachant Nov 4, 2025
7af8d16
Make kernel naming purely dynamic based on env var reading
petebachant Nov 5, 2025
cec2ef7
Pass args into get_kernel_name
petebachant Nov 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 72 additions & 3 deletions ext/cuda/cuda_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ import CUDA
import ClimaCore.Fields
import ClimaCore.DataLayouts
import ClimaCore.DataLayouts: empty_kernel_stats
import ClimaCore.DebugOnly: name_kernels_from_stack_trace
import CUDA.GPUCompiler: methodinstance
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if importing a package through another package is recommended or not, and I can't find anything when I look online.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does feel wrong. Would it be smarter to make GPUCompiler an explicit dependency of ClimaCore? Is there a nice way to make it optional for just the CUDA extension?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reading a little more about this, I guess it would go in weakdeps?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think ClimaLand is a good example of using extensions/weak reps. I'm not sure if there is a way to manage extensions with the package manager though (I never found a way for Julia 1.10), so you might need to edit the Project.toml by hand

Copy link
Member Author

@petebachant petebachant Oct 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was able to add the weak dep to ClimaCore with add --weak in Julia v1.11, but I don't seem to be able to add it to the extension.


const reported_stats = Dict()
const kernel_names = IdDict()
# Call via ClimaCore.DataLayouts.empty_kernel_stats()
empty_kernel_stats(::ClimaComms.CUDADevice) = empty!(reported_stats)
collect_kernel_stats() = false
Expand Down Expand Up @@ -39,19 +42,85 @@ function auto_launch!(
always_inline = true,
caller = :unknown,
) where {F!}
# If desired, compute a kernel name from the stack trace and store in
# a global Dict, which serves as an in memory cache
kernel_name = nothing
if name_kernels_from_stack_trace()
# Create a key from the method instance and types of the args
key = objectid(methodinstance(typeof(f!), typeof(args)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
key = objectid(methodinstance(typeof(f!), typeof(args)))
key = objectid(methodinstance(F!, typeof(args)))

The function signature could also be changed to

function auto_launch!(
    f!::F!,
    args::ARGS,
    nitems::Union{Integer, Nothing} = nothing;
    auto = false,
    threads_s = nothing,
    blocks_s = nothing,
    always_inline = true,
    caller = :unknown,
) where {F!, ARGS}
.
.
.

 key = objectid(methodinstance(F!, ARGS))

but I think that forced specialization of the method (I'm not sure if that is desirable or not here)

kernel_name_exists = key in keys(kernel_names)
if !kernel_name_exists
Comment on lines +38 to +39
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These syntax changes may be less clear, so feel free to ignore

        kernel_name_exists =  haskey(kernel_names, key)
        if !kernel_name_exists

or

kernel_name = get!(kernel_names, key) do
#calculate_name_here
end

# Construct the kernel name, ignoring modules we don't care about
ignore_modules = [
:Base,
:Core,
:GPUCompiler,
:CUDA,
:NVTX,
:ClimaCoreCUDAExt,
:ClimaCore,
]
stack = stacktrace()
first_relevant_index = findfirst(stack) do frame
frame.linfo isa Core.MethodInstance && (
fullname(frame.linfo.def.module)[1] ∉ ignore_modules
)
end
if !isnothing(first_relevant_index)
# Don't include file if this is inside an NVTX annotation
frame = stack[first_relevant_index]
func_name = string(frame.func)
if contains(func_name, "#")
func_name = split(func_name, "#")[1]
end
file_path = frame.linfo.def.file
fp_split = split(string(file_path), "/")
if "NVTX" in fp_split
fp_string = "_NVTX"
line_string = ""
else
# Trim base directory off of file path to shorten
package_index = findfirst(fp_split) do part
startswith(part, "Clima")
end
if isnothing(package_index)
package_index = findfirst(p -> p == ".julia", fp_split)
end
if isnothing(package_index)
package_index = findfirst(p -> p == "src", fp_split)
end
if isnothing(package_index)
package_index = 1
end
fp_string =
"_FILE_" *
string(joinpath(fp_split[package_index:end]...))
line_string = "_L" * string(frame.line)
end
name_str = string(func_name) * fp_string * line_string
kernel_name = replace(name_str, r"[^A-Za-z0-9]" => "_")
end
@debug "Using kernel name: $kernel_name"
kernel_names[key] = kernel_name
end
kernel_name = kernel_names[key]
end

if auto
@assert !isnothing(nitems)
if nitems ≥ 0
kernel = CUDA.@cuda always_inline = true launch = false f!(args...)
# Note: `name = nothing` here will revert to default behavior
kernel = CUDA.@cuda name = kernel_name always_inline = true launch =
false f!(args...)
config = CUDA.launch_configuration(kernel.fun)
threads = min(nitems, config.threads)
blocks = cld(nitems, threads)
kernel(args...; threads, blocks) # This knows to use always_inline from above.
end
else
kernel =
CUDA.@cuda always_inline = always_inline threads = threads_s blocks =
blocks_s f!(args...)
CUDA.@cuda name = kernel_name always_inline = always_inline threads =
threads_s blocks = blocks_s f!(args...)
end

if collect_kernel_stats() # only for development use
Expand Down
2 changes: 2 additions & 0 deletions src/DebugOnly/DebugOnly.jl
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,6 @@ function allow_mismatched_spaces_unsafe()
return false
end

name_kernels_from_stack_trace() = false

end
Loading