Move linalg wrappers out of MPS lib

christiangnrd · christiangnrd · commit d8370f72fed0 · 2025-03-16T16:32:15.000-03:00
diff --git a/lib/mps/MPS.jl b/lib/mps/MPS.jl
@@ -23,6 +23,20 @@ const MtlFloat = Union{Float32, Float16}
 const MPSShape = NSArray#{NSNumber}
 Base.convert(::Type{MPSShape}, tuple::Union{Vector{N},NTuple{N, <:Integer}}) where N = NSArray(NSNumber.(collect(tuple)))
 
+# Valid combination of input (A and B matrices) and output (C) types
+const MPS_VALID_MATMUL_TYPES =
+    [(Int8, Float16),
+     (Int8, Float32),
+     (Int16, Float32),
+     (Float16, Float16),
+     (Float16, Float32),
+     (Float32, Float32)]
+
+const MPS_VALID_MATVECMUL_TYPES =
+    [(Float16, Float16),
+     (Float16, Float32),
+     (Float32, Float32)]
+
 is_supported(dev::MTLDevice) = ccall(:MPSSupportsMTLDevice, Bool, (id{MTLDevice},), dev)
 
 # Load in generated enums and structs
@@ -43,6 +57,5 @@ include("copy.jl")
 
 # integrations
 include("random.jl")
-include("linalg.jl")
 
 end
diff --git a/lib/mps/command_buf.jl b/lib/mps/command_buf.jl
@@ -6,6 +6,8 @@
 
 # @objcwrapper MPSCommandBuffer <: MTLCommandBuffer
 
+export MPSCommandBuffer
+
 function MPSCommandBuffer(commandBuffer::MTLCommandBuffer)
     handle = @objc [MPSCommandBuffer commandBufferWithCommandBuffer:commandBuffer::id{MTLCommandBuffer}]::id{MPSCommandBuffer}
     MPSCommandBuffer(handle)
@@ -32,6 +34,8 @@ function MTL.commit!(f::Base.Callable, cmdbuf::MPSCommandBuffer)
     return ret
 end
 
+export commitAndContinue!
+
 commitAndContinue!(cmdbuf::MPSCommandBuffer) =
     @objc [cmdbuf::id{MPSCommandBuffer} commitAndContinue]::Nothing
 
diff --git a/src/Metal.jl b/src/Metal.jl
@@ -53,6 +53,9 @@ include("compiler/reflection.jl")
 include("../lib/mps/MPS.jl")
 export MPS
 
+# LinearAlgebra
+include("linalg.jl")
+
 # array implementation
 include("utilities.jl")
 include("broadcast.jl")
diff --git a/src/linalg.jl b/src/linalg.jl
@@ -1,13 +1,7 @@
 using LinearAlgebra
 using LinearAlgebra: MulAddMul, wrap
-
-# Valid combination of input (A and B matrices) and output (C) types
-const MPS_VALID_MATMUL_TYPES =
-    [(Int8, Float16),
-     (Int8, Float32),
-     (Int16, Float32),
-     (Float16, Float16),
-     (Float32, Float32)]
+using .MPS
+using .MPS: MPS_VALID_MATMUL_TYPES, MPS_VALID_MATVECMUL_TYPES, MtlFloat
 
 LinearAlgebra.generic_matmatmul!(C::MtlMatrix, tA, tB, A::MtlMatrix, B::MtlMatrix, _add::MulAddMul) =
     LinearAlgebra.generic_matmatmul!(C, tA, tB, A, B, _add.alpha, _add.beta)
@@ -39,19 +33,14 @@ LinearAlgebra.generic_matmatmul!(C::MtlMatrix, tA, tB, A::MtlMatrix, B::MtlMatri
     typC = eltype(C)
 
     # If possible, dispatch to performance shaders
-    if is_supported(device()) &&
-       typA == typB && (typA, typC) in MPS_VALID_MATMUL_TYPES
+    if MPS.is_supported(device()) &&
+            typA == typB && (typA, typC) in MPS_VALID_MATMUL_TYPES
         matmul!(C, A, B, alpha, beta, transA, transB)
     else
         GPUArrays.generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), alpha, beta)
     end
 end
 
-const MPS_VALID_MATVECMUL_TYPES =
-    [(Float16, Float16),
-     (Float16, Float32),
-     (Float32, Float32)]
-
 LinearAlgebra.generic_matvecmul!(C::MtlVector, tA::AbstractChar, A::MtlMatrix, B::MtlVector, _add::MulAddMul) =
     LinearAlgebra.generic_matvecmul!(C, tA, A, B, _add.alpha, _add.beta)
 @autoreleasepool function LinearAlgebra.generic_matvecmul!(C::MtlVector, tA::AbstractChar,
@@ -82,24 +71,24 @@ LinearAlgebra.generic_matvecmul!(C::MtlVector, tA::AbstractChar, A::MtlMatrix, B
     typC = eltype(C)
 
     # If possible, dispatch to performance shaders
-    if is_supported(device()) &&
-        typA == typB && (typA, typC) in MPS_VALID_MATVECMUL_TYPES
+    if MPS.is_supported(device()) &&
+            typA == typB && (typA, typC) in MPS_VALID_MATVECMUL_TYPES
         matvecmul!(C, A, B, alpha, beta, transA)
     else
         GPUArrays.generic_matmatmul!(C, wrap(A, tA), B, alpha, beta)
     end
 end
 
 @inline checkpositivedefinite(status) =
-    status == MPSMatrixDecompositionStatusNonPositiveDefinite || throw(PosDefException(status))
+    status == MPS.MPSMatrixDecompositionStatusNonPositiveDefinite || throw(PosDefException(status))
 @inline checknonsingular(status) =
-    status != MPSMatrixDecompositionStatusSingular || throw(SingularException(status))
+    status != MPS.MPSMatrixDecompositionStatusSingular || throw(SingularException(status))
 
 # GPU-compatible accessors of the LU decomposition properties
-function Base.getproperty(F::LU{T,<:MtlMatrix}, d::Symbol) where T
+function Base.getproperty(F::LU{T, <:MtlMatrix}, d::Symbol) where {T}
     m, n = size(F)
     if d === :L
-        L = tril!(getfield(F, :factors)[1:m, 1:min(m,n)])
+        L = tril!(getfield(F, :factors)[1:m, 1:min(m, n)])
         L[1:m+1:end] .= one(T)
         return L
     else
@@ -111,16 +100,16 @@ end
 # TODO: figure out a GPU-compatible way to get the permutation matrix
 LinearAlgebra.ipiv2perm(v::MtlVector, maxi::Integer) =
     LinearAlgebra.ipiv2perm(Array(v), maxi)
-LinearAlgebra.ipiv2perm(v::MtlVector{<:Any,MTL.CPUStorage}, maxi::Integer) =
+LinearAlgebra.ipiv2perm(v::MtlVector{<:Any, MTL.CPUStorage}, maxi::Integer) =
     LinearAlgebra.ipiv2perm(unsafe_wrap(Array, v), maxi)
 
 @autoreleasepool function LinearAlgebra.lu(A::MtlMatrix{T};
-                                           check::Bool=true) where {T<:MtlFloat}
-    M,N = size(A)
+                                           check::Bool = true) where {T <: MtlFloat}
+    M, N = size(A)
     dev = device()
     queue = global_queue(dev)
 
-    At = MtlMatrix{T,PrivateStorage}(undef, (N, M))
+    At = MtlMatrix{T, PrivateStorage}(undef, (N, M))
     mps_a = MPSMatrix(A)
     mps_at = MPSMatrix(At)
 
@@ -131,7 +120,7 @@ LinearAlgebra.ipiv2perm(v::MtlVector{<:Any,MTL.CPUStorage}, maxi::Integer) =
     end
 
     P = similar(A, UInt32, 1, min(N, M))
-    status = MtlArray{MPSMatrixDecompositionStatus,0,SharedStorage}(undef)
+    status = MtlArray{MPS.MPSMatrixDecompositionStatus, 0, SharedStorage}(undef)
 
     commitAndContinue!(cmdbuf) do cbuf
         mps_p = MPSMatrix(P)
@@ -172,13 +161,13 @@ end
 
 # TODO: dispatch on pivot strategy
 @autoreleasepool function LinearAlgebra.lu!(A::MtlMatrix{T};
-                                            check::Bool=true,
-                                            allowsingular::Bool=false) where {T<:MtlFloat}
-    M,N = size(A)
+                                            check::Bool = true,
+                                            allowsingular::Bool = false) where {T <: MtlFloat}
+    M, N = size(A)
     dev = device()
     queue = global_queue(dev)
 
-    At = MtlMatrix{T,PrivateStorage}(undef, (N, M))
+    At = MtlMatrix{T, PrivateStorage}(undef, (N, M))
     mps_a = MPSMatrix(A)
     mps_at = MPSMatrix(At)
 
@@ -189,7 +178,7 @@ end
     end
 
     P = similar(A, UInt32, 1, min(N, M))
-    status = MtlArray{MPSMatrixDecompositionStatus,0,SharedStorage}(undef)
+    status = MtlArray{MPS.MPSMatrixDecompositionStatus, 0, SharedStorage}(undef)
 
     commitAndContinue!(cmdbuf) do cbuf
         mps_p = MPSMatrix(P)
@@ -215,9 +204,9 @@ end
 
 @autoreleasepool function LinearAlgebra.transpose!(B::MtlMatrix{T},
                                                    A::MtlMatrix{T}) where {T}
-    axes(B,2) == axes(A,1) && axes(B,1) == axes(A,2) || throw(DimensionMismatch("transpose"))
+    axes(B, 2) == axes(A, 1) && axes(B, 1) == axes(A, 2) || throw(DimensionMismatch("transpose"))
 
-    M,N = size(A)
+    M, N = size(A)
     dev = device()
     queue = global_queue(dev)
     cmdbuf = MTLCommandBuffer(queue)
diff --git a/test/linalg.jl b/test/linalg.jl
@@ -0,0 +1,62 @@
+using LinearAlgebra
+
+if MPS.is_supported(device())
+
+
+@testset "test matrix vector multiplication of views" begin
+    N = 20
+
+    a = rand(Float32, N, N)
+    b = rand(Float32, N)
+    c = a * b
+
+    mtl_a = mtl(a)
+    mtl_b = mtl(b)
+    mtl_c = mtl_a * mtl_b
+
+    @test Array(mtl_c) ≈ c
+
+    view_a = @view a[:, 10:end]
+    view_b = @view b[10:end]
+
+    mtl_view_a = @view mtl_a[:, 10:end]
+    mtl_view_b = @view mtl_b[10:end]
+
+    mtl_view_c = mtl_view_a * mtl_view_b
+    view_c = view_a * view_b
+
+    @test Array(mtl_view_c) == view_c
+end
+
+using Metal: storagemode
+@testset "decompositions" begin
+    A = MtlMatrix(rand(Float32, 1024, 1024))
+    lua = lu(A)
+    @test lua.L * lua.U ≈ MtlMatrix(lua.P) * A
+
+    A = MtlMatrix(rand(Float32, 1024, 512))
+    lua = lu(A)
+    @test lua.L * lua.U ≈ MtlMatrix(lua.P) * A
+
+    A = MtlMatrix(rand(Float32, 512, 1024))
+    lua = lu(A)
+    @test lua.L * lua.U ≈ MtlMatrix(lua.P) * A
+
+    a = rand(Float32, 1024, 1024)
+    A = MtlMatrix(a)
+    B = MtlMatrix(a)
+    lua = lu!(A)
+    @test lua.L * lua.U ≈ MtlMatrix(lua.P) * B
+
+    A = MtlMatrix{Float32}([1 2; 0 0])
+    @test_throws SingularException lu(A)
+
+    altStorage = Metal.DefaultStorageMode != Metal.PrivateStorage ? Metal.PrivateStorage : Metal.SharedStorage
+    A = MtlMatrix{Float32, altStorage}(rand(Float32, 1024, 1024))
+    lua = lu(A)
+    @test storagemode(lua.factors) == storagemode(lua.ipiv) == storagemode(A)
+    lua = lu!(A)
+    @test storagemode(lua.factors) == storagemode(lua.ipiv) == storagemode(A)
+end
+
+end
diff --git a/test/mps/linalg.jl b/test/mps/linalg.jl
@@ -69,26 +69,6 @@ end
     end
 end
 
-@testset "test matrix vector multiplication of views" begin
-    N = 20
-    a = rand(Float32, N,N)
-    b = rand(Float32, N)
-
-    mtl_a = mtl(a)
-    mtl_b = mtl(b)
-
-    view_a = @view a[:,10:end]
-    view_b = @view b[10:end]
-
-    mtl_view_a = @view mtl_a[:,10:end]
-    mtl_view_b = @view mtl_b[10:end]
-
-    mtl_c = mtl_view_a * mtl_view_b
-    c = view_a * view_b
-
-    @test Array(mtl_c) == c
-end
-
 @testset "mixed-precision matrix vector multiplication" begin
     N = 10
     rows = N
@@ -180,37 +160,6 @@ end
     end
 end
 
-using Metal: storagemode
-@testset "decompositions" begin
-    A = MtlMatrix(rand(Float32, 1024, 1024))
-    lua = lu(A)
-    @test lua.L * lua.U ≈ MtlMatrix(lua.P) * A
-
-    A = MtlMatrix(rand(Float32, 1024, 512))
-    lua = lu(A)
-    @test lua.L * lua.U ≈ MtlMatrix(lua.P) * A
-
-    A = MtlMatrix(rand(Float32, 512, 1024))
-    lua = lu(A)
-    @test lua.L * lua.U ≈ MtlMatrix(lua.P) * A
-
-    a = rand(Float32, 1024, 1024)
-    A = MtlMatrix(a)
-    B = MtlMatrix(a)
-    lua = lu!(A)
-    @test lua.L * lua.U ≈ MtlMatrix(lua.P) * B
-
-    A = MtlMatrix{Float32}([1 2; 0 0])
-    @test_throws SingularException lu(A)
-
-    altStorage = Metal.DefaultStorageMode != Metal.PrivateStorage ? Metal.PrivateStorage : Metal.SharedStorage
-    A = MtlMatrix{Float32,altStorage}(rand(Float32, 1024, 1024))
-    lua = lu(A)
-    @test storagemode(lua.factors) == storagemode(lua.ipiv) == storagemode(A)
-    lua = lu!(A)
-    @test storagemode(lua.factors) == storagemode(lua.ipiv) == storagemode(A)
-end
-
 using .MPS: MPSMatrixSoftMax, MPSMatrixLogSoftMax
 @testset "MPSMatrixSoftMax" begin
     cols = rand(Int)