Skip to content

Commit 76e8816

Browse files
committed
metal : sync before destroying the backend
ggml-ci
1 parent 83ae5b0 commit 76e8816

File tree

1 file changed

+29
-23
lines changed

1 file changed

+29
-23
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ - (void) dealloc {
376376

377377
// how many times a given op was fused
378378
uint64_t fuse_cnt[GGML_OP_COUNT];
379+
379380
// capture state
380381
bool capture_next_compute;
381382
bool capture_started;
@@ -490,7 +491,7 @@ - (void) dealloc {
490491

491492
ctx->cmd_buf_last = nil;
492493

493-
// load kernels
494+
// load default kernels
494495
{
495496
NSError * error = nil;
496497

@@ -501,12 +502,12 @@ - (void) dealloc {
501502
#define GGML_METAL_ADD_KERNEL(e, name, supported) \
502503
if (supported) { \
503504
struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \
504-
id<MTLFunction> metal_function = [ctx->library newFunctionWithName:@"kernel_"#name]; \
505-
kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:metal_function error:&error]; \
505+
id<MTLFunction> function = [ctx->library newFunctionWithName:@"kernel_"#name]; \
506+
kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:function error:&error]; \
506507
GGML_LOG_DEBUG("%s: loaded %-40s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) kernel->pipeline, \
507508
(int) kernel->pipeline.maxTotalThreadsPerThreadgroup, \
508509
(int) kernel->pipeline.threadExecutionWidth); \
509-
[metal_function release]; \
510+
[function release]; \
510511
if (error) { \
511512
GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
512513
return NULL; \
@@ -1140,6 +1141,25 @@ static size_t ggml_metal_flash_attn_ext_extra_tmp(const struct ggml_tensor * op)
11401141
static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
11411142
GGML_LOG_INFO("%s: deallocating\n", __func__);
11421143

1144+
for (int i = 0; i < GGML_METAL_MAX_COMMAND_BUFFERS; ++i) {
1145+
if (ctx->cmd_bufs[i].obj) {
1146+
[ctx->cmd_bufs[i].obj release];
1147+
}
1148+
1149+
if (ctx->cmd_bufs[i].mem_ranges) {
1150+
ggml_mem_ranges_free(ctx->cmd_bufs[i].mem_ranges);
1151+
}
1152+
}
1153+
1154+
for (int i = 0; i < (int) ctx->cmd_bufs_ext.count; ++i) {
1155+
if (ctx->cmd_bufs_ext[i]) {
1156+
[ctx->cmd_bufs_ext[i] release];
1157+
}
1158+
}
1159+
1160+
[ctx->cmd_bufs_ext removeAllObjects];
1161+
[ctx->cmd_bufs_ext release];
1162+
11431163
for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
11441164
[ctx->kernels[i].pipeline release];
11451165
}
@@ -1165,25 +1185,6 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
11651185

11661186
//[ctx->queue release]; // [TAG_QUEUE_PER_BACKEND]
11671187

1168-
for (int i = 0; i < GGML_METAL_MAX_COMMAND_BUFFERS; ++i) {
1169-
if (ctx->cmd_bufs[i].obj) {
1170-
[ctx->cmd_bufs[i].obj release];
1171-
}
1172-
1173-
if (ctx->cmd_bufs[i].mem_ranges) {
1174-
ggml_mem_ranges_free(ctx->cmd_bufs[i].mem_ranges);
1175-
}
1176-
}
1177-
1178-
for (int i = 0; i < (int) ctx->cmd_bufs_ext.count; ++i) {
1179-
if (ctx->cmd_bufs_ext[i]) {
1180-
[ctx->cmd_bufs_ext[i] release];
1181-
}
1182-
}
1183-
1184-
[ctx->cmd_bufs_ext removeAllObjects];
1185-
[ctx->cmd_bufs_ext release];
1186-
11871188
dispatch_release(ctx->d_queue);
11881189

11891190
free(ctx);
@@ -5528,6 +5529,8 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_mapped(void) {
55285529

55295530
// backend
55305531

5532+
static void ggml_backend_metal_synchronize(ggml_backend_t backend);
5533+
55315534
static const char * ggml_backend_metal_name(ggml_backend_t backend) {
55325535
return "Metal";
55335536

@@ -5537,6 +5540,9 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_mapped(void) {
55375540
static void ggml_backend_metal_free(ggml_backend_t backend) {
55385541
struct ggml_backend_metal_context * ctx = backend->context;
55395542

5543+
// wait for any ongoing async operations to finish
5544+
ggml_backend_metal_synchronize(backend);
5545+
55405546
ggml_metal_free(ctx);
55415547

55425548
free(backend);

0 commit comments

Comments
 (0)