cont : use ggml_backend_buft_get_alloc_size

ggerganov · ggerganov · commit 400e0379bd4d · 2025-09-13T19:02:59.000+03:00
ggml-ci
diff --git a/ggml/src/ggml-metal/ggml-metal-common.cpp b/ggml/src/ggml-metal/ggml-metal-common.cpp
@@ -52,17 +52,15 @@ static ggml_mem_range ggml_mem_range_from_tensor(const ggml_tensor * tensor, ggm
     ggml_mem_range mrp;
 
     if (tensor->buffer) {
+        // when the tensor is allocated, use the actual memory address range in the buffer
+        //
         // take the actual allocated size
         // this can be larger than the tensor size if the buffer type allocates extra memory
         // ref: https://github.com/ggml-org/llama.cpp/pull/15966
-        ggml_backend_buffer_type_t buft = tensor->buffer->buft;
-        const size_t alloc_size = buft->iface.get_alloc_size ? buft->iface.get_alloc_size(buft, tensor) : ggml_nbytes(tensor);
-
-        // when the tensor is allocated, use the actual memory address range of the buffer
         mrp = {
             /*.pb =*/ (uint64_t) tensor->buffer,
             /*.p0 =*/ (uint64_t) tensor->data,
-            /*.p1 =*/ (uint64_t) tensor->data + alloc_size,
+            /*.p1 =*/ (uint64_t) tensor->data + ggml_backend_buft_get_alloc_size(tensor->buffer->buft, tensor),
             /*.pt =*/ pt,
         };
     } else {
@@ -336,7 +334,7 @@ static std::vector<int> ggml_metal_graph_optimize_reorder(const std::vector<node
 
                 const bool is_empty = node1.is_empty();
 
-                // to add a concurrent node, it has to be:
+                // to reorder a node and add it to the concurrent set, it has to be:
                 //   + empty or concurrent with all nodes in the existing concurrent set (mrs0)
                 //   + concurrent with all nodes prior to it that haven't been processed yet (mrs1)
                 if ((is_empty || h_check(mrs0, node1)) && h_check(mrs1, node1)) {