Skip to content

Commit fc7a9ae

Browse files
committed
Add test for accumulate issue
(cherry picked from commit e628aa4)
1 parent 8d44613 commit fc7a9ae

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

test/test.jl

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,5 +331,54 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk
331331
end
332332
end
333333

334+
# from https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652
335+
@kernel function unaliased_accumulate!(output, input, n)
336+
i, j = @index(Global, NTuple)
337+
338+
for k in j:n
339+
output[i, j] += input[i, k]
340+
end
341+
end
342+
343+
@kernel function unaliased_accumulate_local!(output, input, n)
344+
i, j = @index(Global, NTuple)
345+
346+
# Use local accumulator
347+
sum_val = zero(eltype(output))
348+
for k in j:n
349+
sum_val += input[i, k]
350+
end
351+
output[i, j] = sum_val
352+
end
353+
354+
@testset "unaliased accumulate" begin
355+
backend = Backend()
356+
N = 8
357+
M = 5
358+
359+
input = Float32[i + k for i in 1:M, k in 1:N]
360+
361+
reference = zeros(Float32, M, N)
362+
for i in 1:M
363+
for j in 1:N
364+
for k in j:N
365+
reference[i, j] += input[i, k]
366+
end
367+
end
368+
end
369+
370+
# Allocate device arrays
371+
input = adapt(backend, input)
372+
output = KernelAbstractions.zeros(backend, Float32, M, N)
373+
374+
# Perform accumulation
375+
unaliased_accumulate!(backend)(output, input, N; ndrange = size(output))
376+
@test adapt(Array, output) == reference
377+
378+
fill!(output, 0)
379+
unaliased_accumulate_local!(backend)(output, input, N; ndrange = size(output))
380+
@test adapt(Array, output) == reference
381+
end
382+
334383
return
335384
end

0 commit comments

Comments
 (0)