Skip to content

Commit 831b204

Browse files
committed
metal : cleanp
ggml-ci
1 parent 7a23824 commit 831b204

File tree

3 files changed

+28
-38
lines changed

3 files changed

+28
-38
lines changed

ggml/src/ggml-metal/ggml-metal-context.m

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -234,17 +234,6 @@ void ggml_metal_synchronize(ggml_metal_t ctx) {
234234
}
235235
}
236236

237-
// TODO: temporary shim
238-
static id<MTLBuffer> ggml_metal_get_buffer(const struct ggml_tensor * t, size_t * offs) {
239-
ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
240-
241-
struct ggml_metal_buffer_id res = ggml_metal_buffer_get_id(buffer->context, t);
242-
243-
*offs = res.offs;
244-
245-
return res.metal;
246-
}
247-
248237
static struct ggml_metal_buffer_id ggml_metal_get_buffer_id(const struct ggml_tensor * t) {
249238
if (!t) {
250239
return (struct ggml_metal_buffer_id) { nil, 0 };
@@ -262,14 +251,12 @@ void ggml_metal_set_tensor_async(ggml_metal_t ctx, struct ggml_tensor * tensor,
262251
length:size
263252
options:MTLResourceStorageModeShared];
264253

265-
size_t buf_dst_offset = 0;
266-
id<MTLBuffer> buf_dst = ggml_metal_get_buffer(tensor, &buf_dst_offset);
267-
268-
if (buf_dst == nil) {
254+
struct ggml_metal_buffer_id bid_dst = ggml_metal_get_buffer_id(tensor);
255+
if (bid_dst.metal == nil) {
269256
GGML_ABORT("%s: failed to find buffer for tensor '%s'\n", __func__, tensor->name);
270257
}
271258

272-
buf_dst_offset += offset;
259+
bid_dst.offs += offset;
273260

274261
// queue the copy operation into the queue of the Metal context
275262
// this will be queued at the end, after any currently ongoing GPU operations
@@ -278,8 +265,8 @@ void ggml_metal_set_tensor_async(ggml_metal_t ctx, struct ggml_tensor * tensor,
278265

279266
[encoder copyFromBuffer:buf_src
280267
sourceOffset:0
281-
toBuffer:buf_dst
282-
destinationOffset:buf_dst_offset
268+
toBuffer:bid_dst.metal
269+
destinationOffset:bid_dst.offs
283270
size:size];
284271

285272
[encoder endEncoding];
@@ -303,22 +290,20 @@ void ggml_metal_get_tensor_async(ggml_metal_t ctx, const struct ggml_tensor * te
303290
options:MTLResourceStorageModeShared
304291
deallocator:nil];
305292

306-
size_t buf_src_offset = 0;
307-
id<MTLBuffer> buf_src = ggml_metal_get_buffer(tensor, &buf_src_offset);
308-
309-
if (buf_src == nil) {
293+
struct ggml_metal_buffer_id bid_src = ggml_metal_get_buffer_id(tensor);
294+
if (bid_src.metal == nil) {
310295
GGML_ABORT("%s: failed to find buffer for tensor '%s'\n", __func__, tensor->name);
311296
}
312297

313-
buf_src_offset += offset;
298+
bid_src.offs += offset;
314299

315300
// queue the copy operation into the queue of the Metal context
316301
// this will be queued at the end, after any currently ongoing GPU operations
317302
id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
318303
id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
319304

320-
[encoder copyFromBuffer:buf_src
321-
sourceOffset:buf_src_offset
305+
[encoder copyFromBuffer:bid_src.metal
306+
sourceOffset:bid_src.offs
322307
toBuffer:buf_dst
323308
destinationOffset:0
324309
size:size];

ggml/src/ggml-metal/ggml-metal-device.m

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,17 +1119,17 @@ void ggml_metal_buffer_memset_tensor(ggml_metal_buffer_t buf, struct ggml_tensor
11191119

11201120
@autoreleasepool {
11211121
// dst
1122-
struct ggml_metal_buffer_id buf_dst = ggml_metal_buffer_get_id(buf, tensor);
1123-
buf_dst.offs += offset;
1122+
struct ggml_metal_buffer_id bid_dst = ggml_metal_buffer_get_id(buf, tensor);
1123+
bid_dst.offs += offset;
11241124

11251125
id<MTLCommandQueue> queue = buf->queue;
11261126
id<MTLCommandBuffer> cmd_buf = [queue commandBufferWithUnretainedReferences];
11271127

11281128
{
11291129
id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
11301130

1131-
[encoder fillBuffer:buf_dst.metal
1132-
range:NSMakeRange(buf_dst.offs, buf_dst.offs + size)
1131+
[encoder fillBuffer:bid_dst.metal
1132+
range:NSMakeRange(bid_dst.offs, bid_dst.offs + size)
11331133
value:value];
11341134

11351135
[encoder endEncoding];
@@ -1155,8 +1155,8 @@ void ggml_metal_buffer_set_tensor(ggml_metal_buffer_t buf, struct ggml_tensor *
11551155
deallocator:nil];
11561156

11571157
// dst
1158-
struct ggml_metal_buffer_id buf_dst = ggml_metal_buffer_get_id(buf, tensor);
1159-
buf_dst.offs += offset;
1158+
struct ggml_metal_buffer_id bid_dst = ggml_metal_buffer_get_id(buf, tensor);
1159+
bid_dst.offs += offset;
11601160

11611161
// note: for experimentation purposes, here we use a semaphore to wait for the copy to complete
11621162
// this is alternative to waitUntilCompleted, which should be faster, but don't seem to make much difference
@@ -1170,8 +1170,8 @@ void ggml_metal_buffer_set_tensor(ggml_metal_buffer_t buf, struct ggml_tensor *
11701170

11711171
[encoder copyFromBuffer:buf_src
11721172
sourceOffset:0
1173-
toBuffer:buf_dst.metal
1174-
destinationOffset:buf_dst.offs
1173+
toBuffer:bid_dst.metal
1174+
destinationOffset:bid_dst.offs
11751175
size:size];
11761176

11771177
[encoder endEncoding];
@@ -1187,6 +1187,8 @@ void ggml_metal_buffer_set_tensor(ggml_metal_buffer_t buf, struct ggml_tensor *
11871187
[cmd_buf commit];
11881188

11891189
dispatch_semaphore_wait(completion_semaphore, DISPATCH_TIME_FOREVER);
1190+
dispatch_release(completion_semaphore);
1191+
11901192
//[cmd_buf waitUntilCompleted];
11911193
}
11921194
}
@@ -1199,8 +1201,8 @@ void ggml_metal_buffer_get_tensor(ggml_metal_buffer_t buf, const struct ggml_ten
11991201

12001202
@autoreleasepool {
12011203
// src
1202-
struct ggml_metal_buffer_id buf_src = ggml_metal_buffer_get_id(buf, tensor);
1203-
buf_src.offs += offset;
1204+
struct ggml_metal_buffer_id bid_src = ggml_metal_buffer_get_id(buf, tensor);
1205+
bid_src.offs += offset;
12041206

12051207
// dst
12061208
id<MTLBuffer> buf_dst = [buf->device newBufferWithBytesNoCopy:data
@@ -1214,8 +1216,8 @@ void ggml_metal_buffer_get_tensor(ggml_metal_buffer_t buf, const struct ggml_ten
12141216
{
12151217
id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
12161218

1217-
[encoder copyFromBuffer:buf_src.metal
1218-
sourceOffset:buf_src.offs
1219+
[encoder copyFromBuffer:bid_src.metal
1220+
sourceOffset:bid_src.offs
12191221
toBuffer:buf_dst
12201222
destinationOffset:0
12211223
size:size];

ggml/src/ggml-metal/ggml-metal.metal

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1406,7 +1406,10 @@ kernel void kernel_elu_f32_4(
14061406
device float4 * dst,
14071407
uint tpig[[thread_position_in_grid]]) {
14081408
const float4 x = src0[tpig];
1409-
dst[tpig] = float4(x > 0.0f)*x + float4(x <= 0.0f)*(exp(x) - 1.0f);
1409+
dst[tpig][0] = (x[0] > 0.0f) ? x[0] : (exp(x[0]) - 1.0f);
1410+
dst[tpig][1] = (x[1] > 0.0f) ? x[1] : (exp(x[1]) - 1.0f);
1411+
dst[tpig][2] = (x[2] > 0.0f) ? x[2] : (exp(x[2]) - 1.0f);
1412+
dst[tpig][3] = (x[3] > 0.0f) ? x[3] : (exp(x[3]) - 1.0f);
14101413
}
14111414

14121415
kernel void kernel_sqr_f32(

0 commit comments

Comments
 (0)