- 
                Notifications
    You must be signed in to change notification settings 
- Fork 79
Open
Description
Hi, I noticed that the following script produces different results depending on the backend. On my machine, the output is:
cpu: [18.0; 18.0; 18.0; 18.0; 18.0; 18.0; 18.0; 18.0; 18.0; 18.0;;;]
cuda: [18.0; 18.0; 18.0; 18.0; 18.0; 18.0; 18.0; 18.0; 18.0; 18.0;;;]
amd: [6.0; 6.0; 6.0; 6.0; 6.0; 6.0; 6.0; 6.0; 6.0; 6.0;;;]Is there a mistake in the kernel function?
using CUDA
using AMDGPU
using KernelAbstractions
function compute_tensors(tensor, kernel_fun, Nx, Ny, Nz)
    kernel! = kernel_fun(get_backend(tensor), 512)
    kernel!(tensor, Nx, Ny, Nz; ndrange=size(tensor))
    KernelAbstractions.synchronize(get_backend(tensor))
    return nothing
end
@kernel function kernel_xx!(tensor, Nx::Int64, Ny::Int64, Nz::Int64)
    i, j, k = @index(Global, NTuple)
    sum = zero(eltype(tensor))
    for p in (-Nx):Nx, q in (-Ny):Ny
        sum += 2.0
    end
    @inbounds tensor[i, j, k] = sum
end
nx, ny, nz = 10, 1, 1
Nx, Ny, Nz = 1, 1, 1
tensor = zeros(Float64, nx, ny, nz)
compute_tensors(tensor, kernel_xx!, Nx, Ny, Nz)
println("cpu:", tensor)
tensor = CUDA.zeros(Float64, nx, ny, nz)
compute_tensors(tensor, kernel_xx!, Nx, Ny, Nz)
println("cuda:", tensor)
tensor = AMDGPU.zeros(Float64, nx, ny, nz)
compute_tensors(tensor, kernel_xx!, Nx, Ny, Nz)
println("amd:", tensor)Metadata
Metadata
Assignees
Labels
No labels