@@ -6014,7 +6014,8 @@ static enum ggml_status ggml_backend_metal_split_buffer_init_tensor(ggml_backend
60146014
60156015 // Allocate Metal buffer directly using ctx_dev->mtl_device
60166016 GGML_LOG_DEBUG (" %s : tensor '%s ' allocating Metal buffer with size=%zu \n " , __func__, tensor->name , size);
6017- extra->data_device [id ] = [ctx_dev->mtl_device newBufferWithLength: size options: MTLResourceStorageModePrivate ];
6017+ extra->data_device [id ] = [ctx_dev->mtl_device newBufferWithLength: size
6018+ options: MTLResourceStorageModeShared ];
60186019
60196020 if (extra->data_device [id ] == nil ) {
60206021 GGML_LOG_ERROR (" %s : failed to allocate Metal buffer for tensor '%s ' with size=%zu \n " , __func__, tensor->name , size);
@@ -6043,43 +6044,48 @@ static enum ggml_status ggml_backend_metal_split_buffer_init_tensor(ggml_backend
60436044}
60446045
60456046// Buffer set tensor function
6046- static void ggml_backend_metal_split_buffer_set_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
6047- // Split tensors must always be set in their entirety at once
6047+ static void ggml_backend_metal_split_buffer_set_tensor (
6048+ ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
6049+ const void * data, size_t offset, size_t size)
6050+ {
6051+ // Must set entire tensor at once
60486052 GGML_ASSERT (offset == 0 );
60496053 GGML_ASSERT (size == ggml_nbytes (tensor));
6050- GGML_ASSERT (ggml_is_contiguous (tensor) && " split buffers only supported for contiguous tensors" );
6051-
6052- struct ggml_backend_metal_split_buffer_type_context * buft_ctx = (struct ggml_backend_metal_split_buffer_type_context *)buffer->buft ->context ;
6054+ GGML_ASSERT (ggml_is_contiguous (tensor));
60536055
6056+ struct ggml_backend_metal_split_buffer_context *ctx = (struct ggml_backend_metal_split_buffer_type_context *) buffer->buft ->context ;
60546057 const int64_t ne0 = tensor->ne [0 ];
60556058 const size_t nb1 = tensor->nb [1 ];
6056- struct ggml_tensor_extra_metal * extra = (struct ggml_tensor_extra_metal *)tensor->extra ;
6057-
6058- // For Metal, we only have one device
6059- int id = 0 ;
6060- int64_t row_low, row_high;
6061- get_row_split (&row_low, &row_high, tensor, buft_ctx->tensor_split , id );
6062-
6063- int64_t nrows_split = row_high - row_low;
6064- if (nrows_split == 0 ) {
6065- return ;
6066- }
6067-
6068- const size_t offset_split = row_low * nb1;
6069- size_t alloc_size = ggml_nbytes_split (tensor, nrows_split);
6070- const size_t original_size = alloc_size;
6059+ struct ggml_tensor_extra_metal *extra = (struct ggml_tensor_extra_metal *) tensor->extra ;
6060+
6061+ // For Metal we treat id=0 as the (only) device; loop structure left in place
6062+ const int device_count = 1 ;
6063+ for (int id = 0 ; id < device_count; ++id) {
6064+ const float id_ = 1.0f;
6065+ int64_t row_low = 0, row_high = 0;
6066+ get_row_split(&row_low, &row_high, tensor, &id_, id);
6067+ int64_t nrows = row_high - row_low;
6068+ if (nrows <= 0) {
6069+ continue;
6070+ }
6071+ // Compute offset and sizes for this slice
6072+ const size_t offset_split = (size_t)row_low * nb1;
6073+ size_t original_size = ggml_nbytes_split(tensor, nrows);
6074+ size_t copy_size = original_size;
6075+ // Pad for alignment (if needed) but we only copy original_size bytes
6076+ if (ne0 % MATRIX_ROW_PADDING != 0) {
6077+ copy_size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - (ne0 % MATRIX_ROW_PADDING));
6078+ }
6079+ const char *buf_host = (const char *)data + offset_split;
60716080
6072- // Pad last row to a multiple of 512 elements to avoid out-of-bounds memory accesses
6073- if (ne0 % MATRIX_ROW_PADDING != 0 ) {
6074- alloc_size += ggml_row_size (tensor->type , MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
6081+ // Copy the slice into the Metal buffer (contents pointer to GPU memory)
6082+ memcpy ([extra->data_device[id ] contents ], buf_host, original_size);
6083+ // On macOS, inform Metal that buffer range was modified so GPU sees new data:contentReference[oaicite:2]{index=2}
6084+ // [extra->data_device[id] didModifyRange:NSMakeRange(0, original_size)];
60756085 }
6076-
6077- const char * buf_host = (const char *)data + offset_split;
6078-
6079- // Copy data to Metal buffer
6080- memcpy ([extra->data_device[id ] contents ], buf_host, original_size);
60816086}
60826087
6088+
60836089// Buffer get tensor function
60846090static void ggml_backend_metal_split_buffer_get_tensor (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
60856091 // Split tensors must always be retrieved in their entirety at once
0 commit comments