-
Notifications
You must be signed in to change notification settings - Fork 87
Open
Description
julia> using CUDA, Statistics
julia> a_gpu = CUDA.rand(Int32, 100, 100)
julia> m_gpu = mean(Float32, a_gpu, dims=1)resulted in the following error.
GPU compilation of MethodInstance for CUDA.partial_mapreduce_grid(::ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, ::typeof(Base.add_sum), ::Float64, ::CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, ::CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, ::Val{true}, ::CuDeviceMatrix{Float64, 1}, ::CuDeviceMatrix{Int32, 1}) failed
KernelError: passing non-bitstype argument
Argument 2 to your kernel function is of type ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, which is not a bitstype:
.inner is of type Type{Float32} which is not isbits.
Only bitstypes, which are "plain data" types that are immutable
and contain no references to other values, can be used in GPU kernels.
For more information, see the `Base.isbitstype` function.
Stacktrace:
[1] check_invocation(job::GPUCompiler.CompilerJob)
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/validation.jl:108
[2] macro expansion
@ /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:87 [inlined]
[3] macro expansion
@ /pscratch/sd/y/yuanru/.julia/packages/Tracy/slmNc/src/tracepoint.jl:163 [inlined]
[4] compile_unhooked(output::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:85
[5] compile_unhooked
@ /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:80 [inlined]
[6] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:67
[7] compile
@ /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:55 [inlined]
[8] #1182
@ /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/compilation.jl:250 [inlined]
[9] JuliaContext(f::CUDA.var"#1182#1185"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:34
[10] JuliaContext(f::Function)
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:25
[11] compile(job::GPUCompiler.CompilerJob)
@ CUDA /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/compilation.jl:249
[12] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/execution.jl:245
[13] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/execution.jl:159
[14] macro expansion
@ /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/execution.jl:373 [inlined]
[15] macro expansion
@ ./lock.jl:267 [inlined]
[16] cufunction(f::typeof(CUDA.partial_mapreduce_grid), tt::Type{Tuple{ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, typeof(Base.add_sum), Float64, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Val{true}, CuDeviceMatrix{Float64, 1}, CuDeviceMatrix{Int32, 1}}}; kwargs::@Kwargs{})
@ CUDA /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/execution.jl:368
[17] cufunction(f::typeof(CUDA.partial_mapreduce_grid), tt::Type{Tuple{ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, typeof(Base.add_sum), Float64, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Val{true}, CuDeviceMatrix{Float64, 1}, CuDeviceMatrix{Int32, 1}}})
@ CUDA /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/execution.jl:365
[18] macro expansion
@ /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/execution.jl:112 [inlined]
[19] mapreducedim!(f::ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, op::typeof(Base.add_sum), R::CuArray{Float64, 2, CUDA.DeviceMemory}, A::CuArray{Int32, 2, CUDA.DeviceMemory}; init::Float64)
@ CUDA /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/mapreduce.jl:229
[20] mapreducedim!
@ /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/mapreduce.jl:169 [inlined]
[21] _mapreduce(f::ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, op::typeof(Base.add_sum), As::CuArray{Int32, 2, CUDA.DeviceMemory}; dims::Int64, init::Nothing)
@ GPUArrays /pscratch/sd/y/yuanru/.julia/packages/GPUArrays/u6tui/src/host/mapreduce.jl:76
[22] mapreduce(::Function, ::Function, ::CuArray{Int32, 2, CUDA.DeviceMemory}; dims::Int64, init::Nothing)
@ GPUArrays /pscratch/sd/y/yuanru/.julia/packages/GPUArrays/u6tui/src/host/mapreduce.jl:28
[23] mapreduce
@ /pscratch/sd/y/yuanru/.julia/packages/GPUArrays/u6tui/src/host/mapreduce.jl:28 [inlined]
[24] _sum
@ ./reducedim.jl:1041 [inlined]
[25] sum
@ ./reducedim.jl:1013 [inlined]
[26] _mean
@ /pscratch/sd/y/yuanru/.julia/packages/GPUArrays/u6tui/src/host/statistics.jl:37 [inlined]
[27] #mean#1
@ /global/cfs/cdirs/m2676/users/yuanru/.juliaup/juliaup/julia-1.10.10+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:104 [inlined]
[28] top-level scope
@ REPL[5]:1
[29] top-level scope
@ none:1
Note that it works if I remove either Float32 or dims=1
julia> m = mean(Float32, a)
2.6236948f6
julia> m = mean(a, dims=1)
1×100 CuArray{Float64, 2, CUDA.DeviceMemory}:
6.44418e7 1.63643e8 5.03201e7 4.19325e7 -9.81884e7 -1.30971e8 -1.48728e8 … 1.86605e7 -5.30929e6 9.33825e7 -6.25027e7 8.92984e7 5.62057e7 6.36258e7The CUDA version is v5.8.2.
Version Info
Julia Version 1.10.10
Commit 95f30e51f41 (2025-06-27 09:51 UTC)
Build Info:
Official https://julialang.org/ release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 256 × AMD EPYC 7713 64-Core Processor
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-15.0.7 (ORCJIT, znver3)
Threads: 1 default, 0 interactive, 1 GC (on 256 virtual cores)
Environment:
JULIA_PROJECT = @work
JULIA_DEPOT_PATH = /pscratch/sd/y/yuanru/.julia
Metadata
Metadata
Assignees
Labels
No labels