diff --git a/src/macros.jl b/src/macros.jl index 86d5b0bc..69600872 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -87,10 +87,14 @@ function transform_gpu!(def, constargs, force_inbounds, unsafe_indices) push!(let_constargs, :($arg = $constify($arg))) end end + has_constargs = !isempty(let_constargs) pushfirst!(def[:args], :__ctx__) new_stmts = Expr[] body = MacroTools.flatten(def[:body]) - push!(new_stmts, Expr(:aliasscope)) + # On 1.11 and later having this aliasscope causes issues + # even with kernels that don't use `@Const` on arguments + # See https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652 + has_constargs && push!(new_stmts, Expr(:aliasscope)) if !unsafe_indices push!(new_stmts, :(__active_lane__ = $__validindex(__ctx__))) end @@ -105,7 +109,7 @@ function transform_gpu!(def, constargs, force_inbounds, unsafe_indices) if force_inbounds push!(new_stmts, Expr(:inbounds, :pop)) end - push!(new_stmts, Expr(:popaliasscope)) + has_constargs && push!(new_stmts, Expr(:popaliasscope)) push!(new_stmts, :(return nothing)) def[:body] = Expr( :let, @@ -129,10 +133,14 @@ function transform_cpu!(def, constargs, force_inbounds) push!(let_constargs, :($arg = $constify($arg))) end end + has_constargs = !isempty(let_constargs) pushfirst!(def[:args], :__ctx__) new_stmts = Expr[] body = MacroTools.flatten(def[:body]) - push!(new_stmts, Expr(:aliasscope)) + # On 1.11 and later having this aliasscope causes issues + # even with kernels that don't use `@Const` on arguments + # See https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652 + has_constargs && push!(new_stmts, Expr(:aliasscope)) if force_inbounds push!(new_stmts, Expr(:inbounds, true)) end @@ -140,7 +148,7 @@ function transform_cpu!(def, constargs, force_inbounds) if force_inbounds push!(new_stmts, Expr(:inbounds, :pop)) end - push!(new_stmts, Expr(:popaliasscope)) + has_constargs && push!(new_stmts, Expr(:popaliasscope)) push!(new_stmts, :(return nothing)) def[:body] = Expr( :let, diff --git a/test/test.jl b/test/test.jl index 241deb5c..8e3bd713 100644 --- a/test/test.jl +++ b/test/test.jl @@ -371,5 +371,54 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk end end + # from https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652 + @kernel function unaliased_accumulate!(output, input, n) + i, j = @index(Global, NTuple) + + for k in j:n + output[i, j] += input[i, k] + end + end + + @kernel function unaliased_accumulate_local!(output, input, n) + i, j = @index(Global, NTuple) + + # Use local accumulator + sum_val = zero(eltype(output)) + for k in j:n + sum_val += input[i, k] + end + output[i, j] = sum_val + end + + @testset "unaliased accumulate" begin + backend = Backend() + N = 8 + M = 5 + + input = Float32[i + k for i in 1:M, k in 1:N] + + reference = zeros(Float32, M, N) + for i in 1:M + for j in 1:N + for k in j:N + reference[i, j] += input[i, k] + end + end + end + + # Allocate device arrays + input = adapt(backend, input) + output = KernelAbstractions.zeros(backend, Float32, M, N) + + # Perform accumulation + unaliased_accumulate!(backend)(output, input, N; ndrange = size(output)) + @test adapt(Array, output) == reference + + fill!(output, 0) + unaliased_accumulate_local!(backend)(output, input, N; ndrange = size(output)) + @test adapt(Array, output) == reference + end + return end