Adapt to GPUArrays.jl transition to KernelAbstractions.jl. (#461)

maleadt · leios · web-flow · commit 711758d725da · 2024-10-18T08:08:13.000+02:00
Co-authored-by: James Schloss &lt;jrs.schloss@gmail.com&gt;
diff --git a/Project.toml b/Project.toml
@@ -40,7 +40,7 @@ BFloat16s = "0.5"
 CEnum = "0.4, 0.5"
 CodecBzip2 = "0.8"
 ExprTools = "0.1"
-GPUArrays = "10.1"
+GPUArrays = "11"
 GPUCompiler = "0.26, 0.27, 1"
 KernelAbstractions = "0.9.1"
 LLVM = "7.2, 8, 9"
diff --git a/src/gpuarrays.jl b/src/gpuarrays.jl
@@ -1,59 +1,5 @@
 ## GPUArrays interfaces
 
-## execution
-
-struct mtlArrayBackend <: AbstractGPUBackend end
-
-struct mtlKernelContext <: AbstractKernelContext end
-
-@inline function GPUArrays.launch_heuristic(::mtlArrayBackend, f::F, args::Vararg{Any,N};
-                                             elements::Int, elements_per_thread::Int) where {F,N}
-    kernel = @metal launch=false f(mtlKernelContext(), args...)
-
-    # The pipeline state automatically computes occupancy stats
-    threads = min(elements, kernel.pipeline.maxTotalThreadsPerThreadgroup)
-    blocks  = cld(elements, threads)
-
-    return (; threads=Int(threads), blocks=Int(blocks))
-end
-
-function GPUArrays.gpu_call(::mtlArrayBackend, f, args, threads::Int, groups::Int;
-                            name::Union{String,Nothing})
-    @metal threads groups name f(mtlKernelContext(), args...)
-end
-
-
-## on-device
-
-# indexing
-GPUArrays.blockidx(ctx::mtlKernelContext)     = threadgroup_position_in_grid_1d()
-GPUArrays.blockdim(ctx::mtlKernelContext)     = threads_per_threadgroup_1d()
-GPUArrays.threadidx(ctx::mtlKernelContext)    = thread_position_in_threadgroup_1d()
-GPUArrays.griddim(ctx::mtlKernelContext)      = threadgroups_per_grid_1d()
-GPUArrays.global_index(ctx::mtlKernelContext) = thread_position_in_grid_1d()
-GPUArrays.global_size(ctx::mtlKernelContext)  = threads_per_grid_1d()
-
-# memory
-
-@inline function GPUArrays.LocalMemory(::mtlKernelContext, ::Type{T}, ::Val{dims}, ::Val{id}
-                                      ) where {T, dims, id}
-    ptr = emit_threadgroup_memory(T, Val(prod(dims)))
-    MtlDeviceArray(dims, ptr)
-end
-
-# synchronization
-
-@inline GPUArrays.synchronize_threads(::mtlKernelContext) =
-    threadgroup_barrier(MemoryFlagThreadGroup)
-
-
-
-#
-# Host abstractions
-#
-
-GPUArrays.backend(::Type{<:MtlArray}) = mtlArrayBackend()
-
 const GLOBAL_RNGs = Dict{MTLDevice,GPUArrays.RNG}()
 function GPUArrays.default_rng(::Type{<:MtlArray})
     dev = device()
diff --git a/test/random.jl b/test/random.jl
@@ -246,8 +246,7 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES];
             a = f(T, d)
             Metal.seed!(1)
             b = f(T, d)
-            # TODO: Remove broken parameter once https://github.com/JuliaGPU/GPUArrays.jl/issues/530 is fixed
-            @test Array(a) == Array(b) broken = (T == Float16 && d == (1000,1000))
+            @test Array(a) == Array(b)
         end
     end
 end # testset