@@ -52,17 +52,15 @@ static ggml_mem_range ggml_mem_range_from_tensor(const ggml_tensor * tensor, ggm
5252 ggml_mem_range mrp;
5353
5454 if (tensor->buffer ) {
55+ // when the tensor is allocated, use the actual memory address range in the buffer
56+ //
5557 // take the actual allocated size
5658 // this can be larger than the tensor size if the buffer type allocates extra memory
5759 // ref: https://github.com/ggml-org/llama.cpp/pull/15966
58- ggml_backend_buffer_type_t buft = tensor->buffer ->buft ;
59- const size_t alloc_size = buft->iface .get_alloc_size ? buft->iface .get_alloc_size (buft, tensor) : ggml_nbytes (tensor);
60-
61- // when the tensor is allocated, use the actual memory address range of the buffer
6260 mrp = {
6361 /* .pb =*/ (uint64_t ) tensor->buffer ,
6462 /* .p0 =*/ (uint64_t ) tensor->data ,
65- /* .p1 =*/ (uint64_t ) tensor->data + alloc_size ,
63+ /* .p1 =*/ (uint64_t ) tensor->data + ggml_backend_buft_get_alloc_size (tensor-> buffer -> buft , tensor) ,
6664 /* .pt =*/ pt,
6765 };
6866 } else {
@@ -336,7 +334,7 @@ static std::vector<int> ggml_metal_graph_optimize_reorder(const std::vector<node
336334
337335 const bool is_empty = node1.is_empty ();
338336
339- // to add a concurrent node , it has to be:
337+ // to reorder a node and add it to the concurrent set , it has to be:
340338 // + empty or concurrent with all nodes in the existing concurrent set (mrs0)
341339 // + concurrent with all nodes prior to it that haven't been processed yet (mrs1)
342340 if ((is_empty || h_check (mrs0, node1)) && h_check (mrs1, node1)) {
0 commit comments