Skip to content

Commit 77b1864

Browse files
committed
checkpoint
1 parent 2f25907 commit 77b1864

File tree

1 file changed

+64
-3
lines changed

1 file changed

+64
-3
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5946,47 +5946,87 @@ static void ggml_backend_metal_split_buffer_free_buffer(ggml_backend_buffer_t bu
59465946
static enum ggml_status ggml_backend_metal_split_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
59475947
GGML_ASSERT(tensor->view_src == NULL); // views of split tensors are not supported
59485948
GGML_ASSERT(ggml_is_contiguous(tensor) && "split buffers only supported for contiguous tensors");
5949-
5949+
5950+
GGML_LOG_DEBUG("%s: initializing tensor '%s' with %d dimensions [%lld, %lld, %lld, %lld]\n",
5951+
__func__, tensor->name, ggml_n_dims(tensor),
5952+
tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
5953+
59505954
struct ggml_backend_metal_split_buffer_context * ctx = (struct ggml_backend_metal_split_buffer_context *)buffer->context;
59515955
struct ggml_backend_metal_split_buffer_type_context * buft_ctx = (struct ggml_backend_metal_split_buffer_type_context *)buffer->buft->context;
59525956

59535957
const int64_t ne0 = tensor->ne[0];
59545958

59555959
struct ggml_tensor_extra_metal * extra = calloc(1, sizeof(struct ggml_tensor_extra_metal));
5960+
if (extra == NULL) {
5961+
GGML_LOG_ERROR("%s: failed to allocate tensor extra for '%s'\n", __func__, tensor->name);
5962+
return GGML_STATUS_ALLOC_FAILED;
5963+
}
5964+
59565965
// For a dynamic array, we need to manually manage the array
59575966
ctx->tensor_extras = realloc(ctx->tensor_extras, (ctx->tensor_extras_size + 1) * sizeof(struct ggml_tensor_extra_metal *));
5967+
if (ctx->tensor_extras == NULL) {
5968+
GGML_LOG_ERROR("%s: failed to reallocate tensor_extras array\n", __func__);
5969+
free(extra);
5970+
return GGML_STATUS_ALLOC_FAILED;
5971+
}
59585972
ctx->tensor_extras[ctx->tensor_extras_size] = extra;
59595973
ctx->tensor_extras_size++;
59605974

59615975
// For Metal, we only have one device
59625976
int id = 0;
59635977
int64_t row_low, row_high;
59645978
get_row_split(&row_low, &row_high, tensor, buft_ctx->tensor_split, id);
5979+
5980+
GGML_LOG_DEBUG("%s: tensor '%s' row split: low=%lld, high=%lld\n", __func__, tensor->name, row_low, row_high);
59655981

59665982
int64_t nrows_split = row_high - row_low;
59675983
if (nrows_split == 0) {
5984+
GGML_LOG_DEBUG("%s: tensor '%s' has 0 rows, skipping allocation\n", __func__, tensor->name);
59685985
tensor->extra = extra;
59695986
return GGML_STATUS_SUCCESS;
59705987
}
59715988

59725989
size_t size = ggml_nbytes_split(tensor, nrows_split);
5990+
GGML_LOG_DEBUG("%s: tensor '%s' size=%zu bytes\n", __func__, tensor->name, size);
5991+
59735992
// const size_t original_size = size; // Not used in this implementation
59745993

59755994
// Pad last row to a multiple of 512 elements to avoid out-of-bounds memory accesses
59765995
if (ne0 % MATRIX_ROW_PADDING != 0) {
5977-
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
5996+
size_t padding = ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
5997+
GGML_LOG_DEBUG("%s: tensor '%s' adding padding=%zu bytes\n", __func__, tensor->name, padding);
5998+
size += padding;
59785999
}
59796000

59806001
// Get Metal device context
59816002
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buffer->buft->device->context;
6003+
GGML_LOG_DEBUG("%s: tensor '%s' using Metal device: %s\n", __func__, tensor->name, ctx_dev->name);
59826004

59836005
// Allocate Metal buffer directly using ctx_dev->mtl_device
6006+
GGML_LOG_DEBUG("%s: tensor '%s' allocating Metal buffer with size=%zu\n", __func__, tensor->name, size);
59846007
extra->data_device[id] = [ctx_dev->mtl_device newBufferWithLength:size options:MTLResourceStorageModePrivate];
59856008

6009+
if (extra->data_device[id] == nil) {
6010+
GGML_LOG_ERROR("%s: failed to allocate Metal buffer for tensor '%s' with size=%zu\n", __func__, tensor->name, size);
6011+
free(extra);
6012+
return GGML_STATUS_ALLOC_FAILED;
6013+
}
6014+
6015+
GGML_LOG_DEBUG("%s: tensor '%s' Metal buffer allocated at %p\n", __func__, tensor->name, extra->data_device[id]);
6016+
59866017
// Initialize buffer with zeros
5987-
memset([extra->data_device[id] contents], 0, size);
6018+
GGML_LOG_DEBUG("%s: tensor '%s' initializing buffer with zeros\n", __func__, tensor->name);
6019+
void * bufferContents = [extra->data_device[id] contents];
6020+
if (bufferContents == NULL) {
6021+
GGML_LOG_ERROR("%s: Metal buffer contents is NULL for tensor '%s'\n", __func__, tensor->name);
6022+
[extra->data_device[id] release];
6023+
free(extra);
6024+
return GGML_STATUS_ALLOC_FAILED;
6025+
}
6026+
memset(bufferContents, 0, size);
59886027

59896028
tensor->extra = extra;
6029+
GGML_LOG_DEBUG("%s: tensor '%s' initialization completed\n", __func__, tensor->name);
59906030
return GGML_STATUS_SUCCESS;
59916031
}
59926032

@@ -6163,15 +6203,36 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_split_buffer_type(int m
61636203
// We'll just create a new buffer type context each time since Metal only has one device
61646204

61656205
struct ggml_backend_metal_split_buffer_type_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_split_buffer_type_context));
6206+
if (ctx == NULL) {
6207+
GGML_LOG_ERROR("%s: failed to allocate buffer type context\n", __func__);
6208+
return NULL;
6209+
}
6210+
61666211
ctx->main_device = main_device;
61676212
ctx->tensor_split[0] = 1.0f; // All tensors go to the single Metal device
61686213
ctx->name = "Metal_Split";
6214+
6215+
GGML_LOG_DEBUG("%s: tensor_split[0] = %f\n", __func__, ctx->tensor_split[0]);
61696216

61706217
// Allocate a new buffer type structure each time
61716218
struct ggml_backend_buffer_type * buft = calloc(1, sizeof(struct ggml_backend_buffer_type));
6219+
if (buft == NULL) {
6220+
GGML_LOG_ERROR("%s: failed to allocate buffer type\n", __func__);
6221+
free(ctx);
6222+
return NULL;
6223+
}
6224+
61726225
buft->iface = ggml_backend_split_buffer_type_interface;
61736226
buft->device = ggml_backend_reg_dev_get(ggml_backend_metal_reg(), main_device);
6227+
if (buft->device == NULL) {
6228+
GGML_LOG_ERROR("%s: failed to get device for main_device=%d\n", __func__, main_device);
6229+
free(ctx);
6230+
free(buft);
6231+
return NULL;
6232+
}
61746233
buft->context = ctx;
6234+
6235+
GGML_LOG_DEBUG("%s: buffer type created successfully\n", __func__);
61756236

61766237
return buft;
61776238
}

0 commit comments

Comments
 (0)