@@ -371,5 +371,54 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk
371371 end
372372 end
373373
374+ # from https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652
375+ @kernel function unaliased_accumulate! (output, input, n)
376+ i, j = @index (Global, NTuple)
377+
378+ for k in j: n
379+ output[i, j] += input[i, k]
380+ end
381+ end
382+
383+ @kernel function unaliased_accumulate_local! (output, input, n)
384+ i, j = @index (Global, NTuple)
385+
386+ # Use local accumulator
387+ sum_val = zero (eltype (output))
388+ for k in j: n
389+ sum_val += input[i, k]
390+ end
391+ output[i, j] = sum_val
392+ end
393+
394+ @testset " unaliased accumulate" begin
395+ backend = Backend ()
396+ N = 8
397+ M = 5
398+
399+ input = Float32[i + k for i in 1 : M, k in 1 : N]
400+
401+ reference = zeros (Float32, M, N)
402+ for i in 1 : M
403+ for j in 1 : N
404+ for k in j: N
405+ reference[i, j] += input[i, k]
406+ end
407+ end
408+ end
409+
410+ # Allocate device arrays
411+ input = adapt (backend, input)
412+ output = KernelAbstractions. zeros (backend, Float32, M, N)
413+
414+ # Perform accumulation
415+ unaliased_accumulate! (backend)(output, input, N; ndrange = size (output))
416+ @test adapt (Array, output) == reference
417+
418+ fill! (output, 0 )
419+ unaliased_accumulate_local! (backend)(output, input, N; ndrange = size (output))
420+ @test adapt (Array, output) == reference
421+ end
422+
374423 return
375424end
0 commit comments