Skip to content

error for scatter with Metal arrays Β #534

@CarloLucibello

Description

@CarloLucibello

gather works fine on Apple Silicon, but with scatter I get an error.
The scatter kernel works fine with cuda and amdgpu arrays.

cc @maleadt @pxl-th

julia> using Metal, NNlib, Flux

julia> Metal.versioninfo()
macOS 14.0.0, Darwin 23.0.0

Toolchain:
- Julia: 1.9.3
- LLVM: 14.0.6

Julia packages:
- Metal.jl: 0.5.1
- Metal_LLVM_Tools_jll: 0.5.1+0

1 device:
- Apple M1 Pro (384.000 KiB allocated)

julia> device = Flux.get_device("Metal")

julia> NNlib.gather([1 2 3; 4 5 6] |> device, [1,3,1,3,1] |> device)
2Γ—5 MtlMatrix{Int64, Metal.MTL.MTLResourceStorageModePrivate}:
 1  3  1  3  1
 4  6  4  6  4

julia> NNlib.scatter(+, [1 2 3 4; 5 6 7 8] |> device, [2,1,1,5] |> device)
ERROR: Compilation to native code failed; see below for details.
If you think this is a bug, please file an issue and attach /var/folders/z_/n_d2vxmx4jj95q7hzmwngnyc0000gn/T/jl_A3uHqJIPoH.metallib.
Stacktrace:
  [1] error(s::String)
    @ Base ./error.jl:35
  [2] link(job::GPUCompiler.CompilerJob, compiled::NamedTuple{(:image, :entry), Tuple{Vector{UInt8}, String}}; return_function::Bool)
    @ Metal ~/.julia/packages/Metal/lnkVP/src/compiler/compilation.jl:78
  [3] link(job::GPUCompiler.CompilerJob, compiled::NamedTuple{(:image, :entry), Tuple{Vector{UInt8}, String}})
    @ Metal ~/.julia/packages/Metal/lnkVP/src/compiler/compilation.jl:65
  [4] actual_compilation(cache::Dict{Any, Any}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}, compiler::typeof(Metal.compile), linker::typeof(Metal.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Cp7sE/src/execution.jl:132
  [5] cached_compilation(cache::Dict{Any, Any}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Cp7sE/src/execution.jl:103
  [6] macro expansion
    @ ~/.julia/packages/Metal/lnkVP/src/compiler/execution.jl:162 [inlined]
  [7] macro expansion
    @ ./lock.jl:267 [inlined]
  [8] mtlfunction(f::typeof(NNlib.gpu__scatter!), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, typeof(+), MtlDeviceMatrix{Int64, 1}, MtlDeviceMatrix{Int64, 1}, MtlDeviceVector{Int64, 1}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Int64}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Metal ~/.julia/packages/Metal/lnkVP/src/compiler/execution.jl:157
  [9] mtlfunction(f::typeof(NNlib.gpu__scatter!), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, typeof(+), MtlDeviceMatrix{Int64, 1}, MtlDeviceMatrix{Int64, 1}, MtlDeviceVector{Int64, 1}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Int64}})
    @ Metal ~/.julia/packages/Metal/lnkVP/src/compiler/execution.jl:155
 [10] macro expansion
    @ ~/.julia/packages/Metal/lnkVP/src/compiler/execution.jl:77 [inlined]
 [11] (::KernelAbstractions.Kernel{MetalBackend, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, typeof(NNlib.gpu__scatter!)})(::Function, ::Vararg{Any}; ndrange::Int64, workgroupsize::Nothing)
    @ Metal.MetalKernels ~/.julia/packages/Metal/lnkVP/src/MetalKernels.jl:105
 [12] Kernel
    @ ~/.julia/packages/Metal/lnkVP/src/MetalKernels.jl:101 [inlined]
 [13] scatter!
    @ ~/.julia/packages/NNlib/lOntC/src/scatter.jl:104 [inlined]
 [14] scatter(op::typeof(+), src::MtlMatrix{Int64, Metal.MTL.MTLResourceStorageModePrivate}, idx::MtlVector{Int64, Metal.MTL.MTLResourceStorageModePrivate}; init::Nothing, dstsize::Nothing)
    @ NNlib ~/.julia/packages/NNlib/lOntC/src/scatter.jl:177
 [15] scatter(op::typeof(+), src::MtlMatrix{Int64, Metal.MTL.MTLResourceStorageModePrivate}, idx::MtlVector{Int64, Metal.MTL.MTLResourceStorageModePrivate})
    @ NNlib ~/.julia/packages/NNlib/lOntC/src/scatter.jl:168
 [16] top-level scope
    @ REPL[32]:1
 [17] top-level scope
    @ ~/.julia/packages/Metal/lnkVP/src/initialization.jl:57

caused by: NSError: Compiler encountered an internal error (AGXMetalG13X, code 3)
Stacktrace:
  [1] MTLComputePipelineState(dev::Metal.MTL.MTLDeviceInstance, fun::Metal.MTL.MTLFunctionInstance)
    @ Metal.MTL ~/.julia/packages/Metal/lnkVP/lib/mtl/compute_pipeline.jl:60
  [2] link(job::GPUCompiler.CompilerJob, compiled::NamedTuple{(:image, :entry), Tuple{Vector{UInt8}, String}}; return_function::Bool)
    @ Metal ~/.julia/packages/Metal/lnkVP/src/compiler/compilation.jl:70
  [3] link(job::GPUCompiler.CompilerJob, compiled::NamedTuple{(:image, :entry), Tuple{Vector{UInt8}, String}})
    @ Metal ~/.julia/packages/Metal/lnkVP/src/compiler/compilation.jl:65
  [4] actual_compilation(cache::Dict{Any, Any}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}, compiler::typeof(Metal.compile), linker::typeof(Metal.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Cp7sE/src/execution.jl:132
  [5] cached_compilation(cache::Dict{Any, Any}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Cp7sE/src/execution.jl:103
  [6] macro expansion
    @ ~/.julia/packages/Metal/lnkVP/src/compiler/execution.jl:162 [inlined]
  [7] macro expansion
    @ ./lock.jl:267 [inlined]
  [8] mtlfunction(f::typeof(NNlib.gpu__scatter!), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, typeof(+), MtlDeviceMatrix{Int64, 1}, MtlDeviceMatrix{Int64, 1}, MtlDeviceVector{Int64, 1}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Int64}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Metal ~/.julia/packages/Metal/lnkVP/src/compiler/execution.jl:157
  [9] mtlfunction(f::typeof(NNlib.gpu__scatter!), tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, typeof(+), MtlDeviceMatrix{Int64, 1}, MtlDeviceMatrix{Int64, 1}, MtlDeviceVector{Int64, 1}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Int64}})
    @ Metal ~/.julia/packages/Metal/lnkVP/src/compiler/execution.jl:155
 [10] macro expansion
    @ ~/.julia/packages/Metal/lnkVP/src/compiler/execution.jl:77 [inlined]
 [11] (::KernelAbstractions.Kernel{MetalBackend, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, typeof(NNlib.gpu__scatter!)})(::Function, ::Vararg{Any}; ndrange::Int64, workgroupsize::Nothing)
    @ Metal.MetalKernels ~/.julia/packages/Metal/lnkVP/src/MetalKernels.jl:105
 [12] Kernel
    @ ~/.julia/packages/Metal/lnkVP/src/MetalKernels.jl:101 [inlined]
 [13] scatter!
    @ ~/.julia/packages/NNlib/lOntC/src/scatter.jl:104 [inlined]
 [14] scatter(op::typeof(+), src::MtlMatrix{Int64, Metal.MTL.MTLResourceStorageModePrivate}, idx::MtlVector{Int64, Metal.MTL.MTLResourceStorageModePrivate}; init::Nothing, dstsize::Nothing)
    @ NNlib ~/.julia/packages/NNlib/lOntC/src/scatter.jl:177
 [15] scatter(op::typeof(+), src::MtlMatrix{Int64, Metal.MTL.MTLResourceStorageModePrivate}, idx::MtlVector{Int64, Metal.MTL.MTLResourceStorageModePrivate})
    @ NNlib ~/.julia/packages/NNlib/lOntC/src/scatter.jl:168
 [16] top-level scope
    @ REPL[32]:1
 [17] top-level scope
    @ ~/.julia/packages/Metal/lnkVP/src/initialization.jl:57

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions