Skip to content

Commit 24d78d8

Browse files
committed
checkpoint
1 parent 0c171fb commit 24d78d8

File tree

1 file changed

+35
-29
lines changed

1 file changed

+35
-29
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6014,7 +6014,8 @@ static enum ggml_status ggml_backend_metal_split_buffer_init_tensor(ggml_backend
60146014

60156015
// Allocate Metal buffer directly using ctx_dev->mtl_device
60166016
GGML_LOG_DEBUG("%s: tensor '%s' allocating Metal buffer with size=%zu\n", __func__, tensor->name, size);
6017-
extra->data_device[id] = [ctx_dev->mtl_device newBufferWithLength:size options:MTLResourceStorageModePrivate];
6017+
extra->data_device[id] = [ctx_dev->mtl_device newBufferWithLength:size
6018+
options:MTLResourceStorageModeShared];
60186019

60196020
if (extra->data_device[id] == nil) {
60206021
GGML_LOG_ERROR("%s: failed to allocate Metal buffer for tensor '%s' with size=%zu\n", __func__, tensor->name, size);
@@ -6043,43 +6044,48 @@ static enum ggml_status ggml_backend_metal_split_buffer_init_tensor(ggml_backend
60436044
}
60446045

60456046
// Buffer set tensor function
6046-
static void ggml_backend_metal_split_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
6047-
// Split tensors must always be set in their entirety at once
6047+
static void ggml_backend_metal_split_buffer_set_tensor(
6048+
ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
6049+
const void * data, size_t offset, size_t size)
6050+
{
6051+
// Must set entire tensor at once
60486052
GGML_ASSERT(offset == 0);
60496053
GGML_ASSERT(size == ggml_nbytes(tensor));
6050-
GGML_ASSERT(ggml_is_contiguous(tensor) && "split buffers only supported for contiguous tensors");
6051-
6052-
struct ggml_backend_metal_split_buffer_type_context * buft_ctx = (struct ggml_backend_metal_split_buffer_type_context *)buffer->buft->context;
6054+
GGML_ASSERT(ggml_is_contiguous(tensor));
60536055

6056+
struct ggml_backend_metal_split_buffer_context *ctx = (struct ggml_backend_metal_split_buffer_type_context *) buffer->buft->context;
60546057
const int64_t ne0 = tensor->ne[0];
60556058
const size_t nb1 = tensor->nb[1];
6056-
struct ggml_tensor_extra_metal * extra = (struct ggml_tensor_extra_metal *)tensor->extra;
6057-
6058-
// For Metal, we only have one device
6059-
int id = 0;
6060-
int64_t row_low, row_high;
6061-
get_row_split(&row_low, &row_high, tensor, buft_ctx->tensor_split, id);
6062-
6063-
int64_t nrows_split = row_high - row_low;
6064-
if (nrows_split == 0) {
6065-
return;
6066-
}
6067-
6068-
const size_t offset_split = row_low * nb1;
6069-
size_t alloc_size = ggml_nbytes_split(tensor, nrows_split);
6070-
const size_t original_size = alloc_size;
6059+
struct ggml_tensor_extra_metal *extra = (struct ggml_tensor_extra_metal *) tensor->extra;
6060+
6061+
// For Metal we treat id=0 as the (only) device; loop structure left in place
6062+
const int device_count = 1;
6063+
for (int id = 0; id < device_count; ++id) {
6064+
const float id_ = 1.0f;
6065+
int64_t row_low = 0, row_high = 0;
6066+
get_row_split(&row_low, &row_high, tensor, &id_, id);
6067+
int64_t nrows = row_high - row_low;
6068+
if (nrows <= 0) {
6069+
continue;
6070+
}
6071+
// Compute offset and sizes for this slice
6072+
const size_t offset_split = (size_t)row_low * nb1;
6073+
size_t original_size = ggml_nbytes_split(tensor, nrows);
6074+
size_t copy_size = original_size;
6075+
// Pad for alignment (if needed) but we only copy original_size bytes
6076+
if (ne0 % MATRIX_ROW_PADDING != 0) {
6077+
copy_size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - (ne0 % MATRIX_ROW_PADDING));
6078+
}
6079+
const char *buf_host = (const char *)data + offset_split;
60716080

6072-
// Pad last row to a multiple of 512 elements to avoid out-of-bounds memory accesses
6073-
if (ne0 % MATRIX_ROW_PADDING != 0) {
6074-
alloc_size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
6081+
// Copy the slice into the Metal buffer (contents pointer to GPU memory)
6082+
memcpy([extra->data_device[id] contents], buf_host, original_size);
6083+
// On macOS, inform Metal that buffer range was modified so GPU sees new data:contentReference[oaicite:2]{index=2}
6084+
//[extra->data_device[id] didModifyRange:NSMakeRange(0, original_size)];
60756085
}
6076-
6077-
const char * buf_host = (const char *)data + offset_split;
6078-
6079-
// Copy data to Metal buffer
6080-
memcpy([extra->data_device[id] contents], buf_host, original_size);
60816086
}
60826087

6088+
60836089
// Buffer get tensor function
60846090
static void ggml_backend_metal_split_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
60856091
// Split tensors must always be retrieved in their entirety at once

0 commit comments

Comments
 (0)