Skip to content

Commit 76a35d8

Browse files
authored
Merge pull request #653 from JuliaGPU/vc/accumulate
Aliasscope miss-compile for 1.11+
2 parents b525d6b + 218abb9 commit 76a35d8

File tree

2 files changed

+61
-4
lines changed

2 files changed

+61
-4
lines changed

src/macros.jl

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,14 @@ function transform_gpu!(def, constargs, force_inbounds, unsafe_indices)
8787
push!(let_constargs, :($arg = $constify($arg)))
8888
end
8989
end
90+
has_constargs = !isempty(let_constargs)
9091
pushfirst!(def[:args], :__ctx__)
9192
new_stmts = Expr[]
9293
body = MacroTools.flatten(def[:body])
93-
push!(new_stmts, Expr(:aliasscope))
94+
# On 1.11 and later having this aliasscope causes issues
95+
# even with kernels that don't use `@Const` on arguments
96+
# See https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652
97+
has_constargs && push!(new_stmts, Expr(:aliasscope))
9498
if !unsafe_indices
9599
push!(new_stmts, :(__active_lane__ = $__validindex(__ctx__)))
96100
end
@@ -105,7 +109,7 @@ function transform_gpu!(def, constargs, force_inbounds, unsafe_indices)
105109
if force_inbounds
106110
push!(new_stmts, Expr(:inbounds, :pop))
107111
end
108-
push!(new_stmts, Expr(:popaliasscope))
112+
has_constargs && push!(new_stmts, Expr(:popaliasscope))
109113
push!(new_stmts, :(return nothing))
110114
def[:body] = Expr(
111115
:let,
@@ -129,18 +133,22 @@ function transform_cpu!(def, constargs, force_inbounds)
129133
push!(let_constargs, :($arg = $constify($arg)))
130134
end
131135
end
136+
has_constargs = !isempty(let_constargs)
132137
pushfirst!(def[:args], :__ctx__)
133138
new_stmts = Expr[]
134139
body = MacroTools.flatten(def[:body])
135-
push!(new_stmts, Expr(:aliasscope))
140+
# On 1.11 and later having this aliasscope causes issues
141+
# even with kernels that don't use `@Const` on arguments
142+
# See https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652
143+
has_constargs && push!(new_stmts, Expr(:aliasscope))
136144
if force_inbounds
137145
push!(new_stmts, Expr(:inbounds, true))
138146
end
139147
append!(new_stmts, split(emit_cpu, body.args))
140148
if force_inbounds
141149
push!(new_stmts, Expr(:inbounds, :pop))
142150
end
143-
push!(new_stmts, Expr(:popaliasscope))
151+
has_constargs && push!(new_stmts, Expr(:popaliasscope))
144152
push!(new_stmts, :(return nothing))
145153
def[:body] = Expr(
146154
:let,

test/test.jl

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,5 +371,54 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk
371371
end
372372
end
373373

374+
# from https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652
375+
@kernel function unaliased_accumulate!(output, input, n)
376+
i, j = @index(Global, NTuple)
377+
378+
for k in j:n
379+
output[i, j] += input[i, k]
380+
end
381+
end
382+
383+
@kernel function unaliased_accumulate_local!(output, input, n)
384+
i, j = @index(Global, NTuple)
385+
386+
# Use local accumulator
387+
sum_val = zero(eltype(output))
388+
for k in j:n
389+
sum_val += input[i, k]
390+
end
391+
output[i, j] = sum_val
392+
end
393+
394+
@testset "unaliased accumulate" begin
395+
backend = Backend()
396+
N = 8
397+
M = 5
398+
399+
input = Float32[i + k for i in 1:M, k in 1:N]
400+
401+
reference = zeros(Float32, M, N)
402+
for i in 1:M
403+
for j in 1:N
404+
for k in j:N
405+
reference[i, j] += input[i, k]
406+
end
407+
end
408+
end
409+
410+
# Allocate device arrays
411+
input = adapt(backend, input)
412+
output = KernelAbstractions.zeros(backend, Float32, M, N)
413+
414+
# Perform accumulation
415+
unaliased_accumulate!(backend)(output, input, N; ndrange = size(output))
416+
@test adapt(Array, output) == reference
417+
418+
fill!(output, 0)
419+
unaliased_accumulate_local!(backend)(output, input, N; ndrange = size(output))
420+
@test adapt(Array, output) == reference
421+
end
422+
374423
return
375424
end

0 commit comments

Comments
 (0)