-
Notifications
You must be signed in to change notification settings - Fork 8
Open
Description
MWE:
using CUDA
using AcceleratedKernels
v = CUDA.ones(Int32, 2^15)
AcceleratedKernels.accumulate(+, v; init=Int32(0), alg=AcceleratedKernels.DecoupledLookback())gives me a crash log:
julia> AcceleratedKernels.accumulate(+, v; init=Int32(0), alg=AcceleratedKernels.DecoupledLookback())
ERROR: LLVM error: Cannot select: 0x2c2518f0: ch = AtomicFence 0x2aac16f0, TargetConstant:i64<4>, TargetConstant:i64<1>, /home/reinha57/.julia/packages/UnsafeAtomics/vpyYB/src/core.jl:248 @[ /home/reinha57/.julia/packages/UnsafeAtomics/vpyYB/src/core.jl:12 @[ /home/reinha57/.julia/packages/AcceleratedKernels/AdYRJ/src/accumulate/accumulate_1d_gpu.jl:174 @[ none:0 ] ] ]
0x2ded3310: i64 = TargetConstant<4>
0x2c2511f0: i64 = TargetConstant<1>
In function: _Z25gpu__accumulate_previous_16CompilerMetadataI11DynamicSize12DynamicCheckv16CartesianIndicesILi1E5TupleI5OneToI5Int64EEE7NDRangeILi1ES0_10StaticSizeI6_256__ES8_vEE1_13CuDeviceArrayI5Int32Li1ELi1EESF_I5UInt8Li1ELi1EESH_
Stacktrace:
[1] handle_error(reason::Cstring)
@ LLVM ~/.julia/packages/LLVM/iza6e/src/core/context.jl:194
[2] LLVMTargetMachineEmitToMemoryBuffer
@ ~/.julia/packages/LLVM/iza6e/lib/18/libLLVM.jl:11531 [inlined]
[3] emit(tm::LLVM.TargetMachine, mod::LLVM.Module, filetype::LLVM.API.LLVMCodeGenFileType)
@ LLVM ~/.julia/packages/LLVM/iza6e/src/targetmachine.jl:118
[4] mcgen
@ ~/.julia/packages/GPUCompiler/j4HFa/src/mcgen.jl:75 [inlined]
[5] mcgen(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, mod::LLVM.Module, format::LLVM.API.LLVMCodeGenFileType)
@ CUDA ~/.julia/packages/CUDA/x8d2s/src/compiler/compilation.jl:127
[6] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module, format::LLVM.API.LLVMCodeGenFileType)
@ GPUCompiler ~/.julia/packages/GPUCompiler/j4HFa/src/driver.jl:438
[7] compile_unhooked(output::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/j4HFa/src/driver.jl:115
[8] compile_unhooked
@ ~/.julia/packages/GPUCompiler/j4HFa/src/driver.jl:80 [inlined]
[9] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/j4HFa/src/driver.jl:67
[10] compile
@ ~/.julia/packages/GPUCompiler/j4HFa/src/driver.jl:55 [inlined]
[11] #compile##0
@ ~/.julia/packages/CUDA/x8d2s/src/compiler/compilation.jl:250 [inlined]
[12] JuliaContext(f::CUDA.var"#compile##0#compile##1"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
@ GPUCompiler ~/.julia/packages/GPUCompiler/j4HFa/src/driver.jl:34
[13] JuliaContext(f::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/j4HFa/src/driver.jl:25
[14] compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/x8d2s/src/compiler/compilation.jl:249
[15] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/j4HFa/src/execution.jl:245
[16] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/j4HFa/src/execution.jl:159
[17] macro expansion
@ ~/.julia/packages/CUDA/x8d2s/src/compiler/execution.jl:373 [inlined]
[18] macro expansion
@ ./lock.jl:376 [inlined]
[19] cufunction(f::typeof(AcceleratedKernels.gpu__accumulate_previous!), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{…}, typeof(+), CuDeviceVector{…}, CuDeviceVector{…}, CuDeviceVector{…}}}; kwargs::@Kwargs{always_inline::Bool, maxthreads::Int64})
@ CUDA ~/.julia/packages/CUDA/x8d2s/src/compiler/execution.jl:368
[20] macro expansion
@ ~/.julia/packages/CUDA/x8d2s/src/compiler/execution.jl:112 [inlined]
[21] (::KernelAbstractions.Kernel{CUDABackend, KernelAbstractions.NDIteration.StaticSize{(256,)}, KernelAbstractions.NDIteration.DynamicSize, typeof(AcceleratedKernels.gpu__accumulate_previous!)})(::Function, ::Vararg{Any}; ndrange::Int64, workgroupsize::Nothing)
@ CUDA.CUDAKernels ~/.julia/packages/CUDA/x8d2s/src/CUDAKernels.jl:127
[22] accumulate_1d_gpu!(op::typeof(+), v::CuArray{Int32, 1, CUDA.DeviceMemory}, backend::CUDABackend, ::AcceleratedKernels.DecoupledLookback; init::Int32, neutral::Int32, inclusive::Bool, max_tasks::Int64, min_elems::Int64, block_size::Int64, temp::Nothing, temp_flags::Nothing)
@ AcceleratedKernels ~/.julia/packages/AcceleratedKernels/AdYRJ/src/accumulate/accumulate_1d_gpu.jl:307
[23] accumulate_1d_gpu!
@ ~/.julia/packages/AcceleratedKernels/AdYRJ/src/accumulate/accumulate_1d_gpu.jl:257 [inlined]
[24] _accumulate_impl!(op::typeof(+), v::CuArray{Int32, 1, CUDA.DeviceMemory}, backend::CUDABackend; init::Int32, neutral::Int32, dims::Nothing, inclusive::Bool, alg::AcceleratedKernels.DecoupledLookback, max_tasks::Int64, min_elems::Int64, prefer_threads::Bool, block_size::Int64, temp::Nothing, temp_flags::Nothing)
@ AcceleratedKernels ~/.julia/packages/AcceleratedKernels/AdYRJ/src/accumulate/accumulate.jl:171
[25] #accumulate!#99
@ ~/.julia/packages/AcceleratedKernels/AdYRJ/src/accumulate/accumulate.jl:128 [inlined]
[26] accumulate(op::Function, v::CuArray{Int32, 1, CUDA.DeviceMemory}, backend::CUDABackend; init::Int32, kwargs::@Kwargs{alg::AcceleratedKernels.DecoupledLookback})
@ AcceleratedKernels ~/.julia/packages/AcceleratedKernels/AdYRJ/src/accumulate/accumulate.jl:227
[27] accumulate
@ ~/.julia/packages/AcceleratedKernels/AdYRJ/src/accumulate/accumulate.jl:219 [inlined]
[28] top-level scope
@ REPL[4]:1
Some type information was truncated. Use `show(err)` to see complete types.
julia> versioninfo()
Julia Version 1.12.3
Commit 966d0af0fdf (2025-12-15 11:20 UTC)
Build Info:
Official https://julialang.org release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 64 × AMD EPYC 7452 32-Core Processor
WORD_SIZE: 64
LLVM: libLLVM-18.1.7 (ORCJIT, znver2)
GC: Built with stock GC
Threads: 1 default, 1 interactive, 1 GC (on 64 virtual cores)
julia> CUDA.versioninfo()
CUDA toolchain:
- runtime 13.0, artifact installation
- driver 580.95.5 for 13.0
- compiler 13.0
CUDA libraries:
- CUBLAS: 13.1.0
- CURAND: 10.4.0
- CUFFT: 12.0.0
- CUSOLVER: 12.0.4
- CUSPARSE: 12.6.3
- CUPTI: 2025.3.1 (API 13.0.1)
- NVML: 13.0.0+580.95.5
Julia packages:
- CUDA: 5.9.4
- CUDA_Driver_jll: 13.0.2+0
- CUDA_Compiler_jll: 0.3.0+0
- CUDA_Runtime_jll: 0.19.2+0
Toolchain:
- Julia: 1.12.3
- LLVM: 18.1.7
1 device:
0: NVIDIA A30 (sm_80, 10.925 GiB / 24.000 GiB available)
relevant package versions:
(@v1.12) pkg> status
Status `~/.julia/environments/v1.12/Project.toml`
[21141c5a] AMDGPU v2.1.4
[6a4ca0a5] AcceleratedKernels v0.4.3
[052768ef] CUDA v5.9.5
[63c18a36] KernelAbstractions v0.9.39
[295af30f] Revise v3.12.3
szabo137
Metadata
Metadata
Assignees
Labels
No labels