Remove copying

christiangnrd · christiangnrd · commit a4e82168a637 · 2025-03-18T20:03:38.000-03:00
diff --git a/lib/mps/MPS.jl b/lib/mps/MPS.jl
@@ -21,7 +21,7 @@ using BFloat16s
 const MtlFloat = Union{Float32, Float16}
 
 const MPSShape = NSArray#{NSNumber}
-Base.convert(::Type{MPSShape}, tuple::Union{Vector{N},NTuple{N, <:Integer}}) where N = NSArray(NSNumber.(collect(tuple)))
+Base.convert(::Type{MPSShape}, tuple::Union{Vector{T},NTuple{T, <:Integer}}) where T = NSArray(NSNumber.(collect(tuple)))
 
 # Valid combination of input (A and B matrices) and output (C) types
 const MPS_VALID_MATMUL_TYPES =
diff --git a/lib/mpsgraphs/matmul.jl b/lib/mpsgraphs/matmul.jl
@@ -1,8 +1,14 @@
-function _matmul!(c::MPSMatrix, ::Type{Tc}, a::MPSMatrix, b::MPSMatrix, ::Type{Tab}, alpha::Number, beta::Number, transpose_a, transpose_b) where {Tc, Tab}
+function _matmul!(c::MtlArray{Tc}, a::MtlArray{Tab}, b::MtlArray{Tab}, alpha::Number, beta::Number, transpose_a, transpose_b) where {Tc, Tab}
     graph = MPSGraph()
 
     placeA = placeholderTensor(graph, size(a), Tab)
     placeB = placeholderTensor(graph, size(b), Tab)
+    outputTensorData = MPSGraphTensorData(c)
+
+    feeds = Dict{MPSGraphTensor, MPSGraphTensorData}(
+        placeA => MPSGraphTensorData(a),
+        placeB => MPSGraphTensorData(b)
+    )
 
     castA, castB = if Tc != Tab
         castTensor(graph, placeA, Tc, "castA"),
@@ -32,51 +38,32 @@ function _matmul!(c::MPSMatrix, ::Type{Tc}, a::MPSMatrix, b::MPSMatrix, ::Type{T
         multiplicationWithPrimaryTensor(graph, alphatensor, matmul)
     end
 
-    feeds = Dict{MPSGraphTensor, MPSGraphTensorData}(
-        placeA => MPSGraphTensorData(a),
-        placeB => MPSGraphTensorData(b)
-    )
-
     afterbeta = if beta == 0
         afteralpha
     else
         placeC = placeholderTensor(graph, size(c), Tc)
-        feeds[placeC] = MPSGraphTensorData(c)
+        feeds[placeC] = outputTensorData
         betatensor = constantWithScalar(graph, beta, Tc)
         betaC = multiplicationWithPrimaryTensor(graph, betatensor, placeC)
         additionWithPrimaryTensor(graph, afteralpha, betaC)
     end
 
-    # Encode and commit matmul kernel
-    cmdbuf = MPSCommandBuffer(Metal.global_queue(device()))
-    resultdict = encode!(cmdbuf, graph, NSDictionary(feeds), NSArray([afterbeta]))
-    commitAndContinue!(cmdbuf)
+    resultdict = Dict{MPSGraphTensor, MPSGraphTensorData}(
+        afterbeta => outputTensorData
+    )
 
-    resultdata = MPSGraphTensorData(id{MPSGraphTensorData}(resultdict[afterbeta]))
+    cmdbuf = MPSCommandBuffer(Metal.global_queue(device()))
+    encode!(cmdbuf, graph, NSDictionary(feeds), NSDictionary(resultdict))
+    commit!(cmdbuf)
+    wait_completed(cmdbuf)
 
-    return cmdbuf, MPSNDArray(resultdata)
+    return c
 end
 
 function graph_matmul!(c::MtlArray{Tc, N}, a::MtlArray{Tab, N}, b::MtlArray{Tab, N}, alpha::Number = true, beta::Number = false, transpose_a = false, transpose_b = false) where {Tc, Tab, N}
-    cmdbuf, resultndarr = _matmul!(MPSMatrix(c), Tc, MPSMatrix(a), MPSMatrix(b), Tab, alpha, beta, transpose_a, transpose_b)
-
-    commit!(cmdbuf) do cmdBuf
-        exportDataWithCommandBuffer(resultndarr, cmdBuf, c.data[], Tc, c.offset)
-    end
-
-    wait_completed(cmdbuf)
-
-    return c
+    _matmul!(c, a, b, alpha, beta, transpose_a, transpose_b)
 end
 
 function graph_matvecmul!(c::MtlVector{Tc}, a::MtlMatrix{Tab}, b::MtlVector{Tab}, alpha::Number = true, beta::Number = false, transpose = false) where {Tc, Tab}
-    cmdbuf, resultndarr = _matmul!(MPSMatrix(c), Tc, MPSMatrix(a), MPSMatrix(b), Tab, alpha, beta, transpose, false)
-
-    commit!(cmdbuf) do cmdBuf
-        exportDataWithCommandBuffer(resultndarr, cmdBuf, c.data[], Tc, c.offset)
-    end
-
-    wait_completed(cmdbuf)
-
-    return c
+    _matmul!(c, a, b, alpha, beta, transpose, false)
 end
diff --git a/lib/mpsgraphs/tensor.jl b/lib/mpsgraphs/tensor.jl
@@ -14,7 +14,7 @@ function Base.size(td::MPSGraphTensor)
 end
 
 function placeholderTensor(graph::MPSGraph, shape::Union{Vector, Tuple}, args...)
-    mpsshape = convert(MPSShape, shape)
+    mpsshape = convert(MPSShape, reverse(shape))
     return placeholderTensor(graph, mpsshape, args...)
 end
 function placeholderTensor(graph::MPSGraph, shape::MPSShape, dataType::Type, name = "placeholder tensor")
@@ -53,9 +53,7 @@ function MPSGraphTensorData(buffer::MTLBuffer, shape::MPSShape, dataType, rowByt
                                     rowBytes:rowBytes::NSUInteger]::id{MPSGraphTensorData}
     return tensor
 end
-# MPSGraphTensorData(matrix::MtlMatrix{T}) where T = MPSGraphTensorData(matrix.data[], convert(MPSShape, reverse(size(matrix))), T)
-MPSGraphTensorData(matrix::MtlMatrix) = MPSGraphTensorData(MPSMatrix(matrix))
-MPSGraphTensorData(arr::MtlArray{<:Any, 3}) = MPSGraphTensorData(MPSMatrix(arr))
+MPSGraphTensorData(matrix::MtlArray{T}) where T = MPSGraphTensorData(matrix.data[], convert(MPSShape, reverse(size(matrix))), T)
 
 function MPSGraphTensorData(matrix::MPSMatrix)
     obj = @objc [MPSGraphTensorData alloc]::id{MPSGraphTensorData}
@@ -82,8 +80,6 @@ function MPSGraphTensorData(vector::MPSVector)
     @objc [tensor::id{MPSGraphTensorData} initWithMPSVector:vector::id{MPSVector}]::id{MPSGraphTensorData}
     return tensor
 end
-# MPSGraphTensorData(vector::MtlVector{T}) where T = MPSGraphTensorData(vector.data[], convert(MPSShape, size(vector)), T)
-MPSGraphTensorData(vector::MtlVector) = MPSGraphTensorData(MPSVector(vector))
 
 # rank must be between 1 and 16 inclusive
 function MPSGraphTensorData(vector::MPSVector, rank)