@@ -331,5 +331,54 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk
331331 end
332332 end
333333
334+ # from https://github.com/JuliaGPU/KernelAbstractions.jl/issues/652
335+ @kernel function unaliased_accumulate! (output, input, n)
336+ i, j = @index (Global, NTuple)
337+
338+ for k in j: n
339+ output[i, j] += input[i, k]
340+ end
341+ end
342+
343+ @kernel function unaliased_accumulate_local! (output, input, n)
344+ i, j = @index (Global, NTuple)
345+
346+ # Use local accumulator
347+ sum_val = zero (eltype (output))
348+ for k in j: n
349+ sum_val += input[i, k]
350+ end
351+ output[i, j] = sum_val
352+ end
353+
354+ @testset " unaliased accumulate" begin
355+ backend = Backend ()
356+ N = 8
357+ M = 5
358+
359+ input = Float32[i + k for i in 1 : M, k in 1 : N]
360+
361+ reference = zeros (Float32, M, N)
362+ for i in 1 : M
363+ for j in 1 : N
364+ for k in j: N
365+ reference[i, j] += input[i, k]
366+ end
367+ end
368+ end
369+
370+ # Allocate device arrays
371+ input = adapt (backend, input)
372+ output = KernelAbstractions. zeros (backend, Float32, M, N)
373+
374+ # Perform accumulation
375+ unaliased_accumulate! (backend)(output, input, N; ndrange = size (output))
376+ @test adapt (Array, output) == reference
377+
378+ fill! (output, 0 )
379+ unaliased_accumulate_local! (backend)(output, input, N; ndrange = size (output))
380+ @test adapt (Array, output) == reference
381+ end
382+
334383 return
335384end
0 commit comments