Skip to content

Commit 84d7b2f

Browse files
authored
metal : fix memory leaks (#15962)
ggml-ci
1 parent 40be511 commit 84d7b2f

File tree

1 file changed

+24
-13
lines changed

1 file changed

+24
-13
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,6 @@ @implementation GGMLMetalClass
13611361
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q5_1_F32, mul_mm_q5_1_f32, has_simdgroup_mm);
13621362
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q8_0_F32, mul_mm_q8_0_f32, has_simdgroup_mm);
13631363
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_MXFP4_F32, mul_mm_mxfp4_f32, has_simdgroup_mm);
1364-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_MXFP4_F32, mul_mm_mxfp4_f32, has_simdgroup_mm);
13651364
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q2_K_F32, mul_mm_q2_K_f32, has_simdgroup_mm);
13661365
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q3_K_F32, mul_mm_q3_K_f32, has_simdgroup_mm);
13671366
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_K_F32, mul_mm_q4_K_f32, has_simdgroup_mm);
@@ -1521,6 +1520,9 @@ @implementation GGMLMetalClass
15211520
NSString * key = [NSString stringWithUTF8String:name];
15221521
[ctx->kernels_ext setObject:obj forKey:key];
15231522

1523+
[metal_function release];
1524+
[obj release];
1525+
15241526
GGML_LOG_DEBUG("%s: loaded %-40s %16p | th_max = %4d | th_width = %4d\n", __func__, name, (void *) kernel.pipeline,
15251527
(int) kernel.pipeline.maxTotalThreadsPerThreadgroup,
15261528
(int) kernel.pipeline.threadExecutionWidth);
@@ -1542,8 +1544,6 @@ @implementation GGMLMetalClass
15421544
char name[256];
15431545

15441546
@autoreleasepool {
1545-
MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc] init];
1546-
15471547
const int32_t dk = (int32_t) op->src[1]->ne[0];
15481548
const int32_t dv = (int32_t) op->src[2]->ne[0];
15491549

@@ -1575,7 +1575,7 @@ @implementation GGMLMetalClass
15751575
return res;
15761576
}
15771577

1578-
cv = [[MTLFunctionConstantValues alloc] init];
1578+
MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc] init];
15791579

15801580
[cv setConstantValue:&has_mask type:MTLDataTypeBool atIndex:FC_FLASH_ATTN_EXT + 0];
15811581
[cv setConstantValue:&has_sinks type:MTLDataTypeBool atIndex:FC_FLASH_ATTN_EXT + 1];
@@ -1586,7 +1586,11 @@ @implementation GGMLMetalClass
15861586
[cv setConstantValue:&ns20 type:MTLDataTypeInt atIndex:FC_FLASH_ATTN_EXT + 21];
15871587
[cv setConstantValue:&nsg type:MTLDataTypeInt atIndex:FC_FLASH_ATTN_EXT + 22];
15881588

1589-
return ggml_metal_compile_kernel(backend, base, name, cv);
1589+
res = ggml_metal_compile_kernel(backend, base, name, cv);
1590+
1591+
[cv release];
1592+
1593+
return res;
15901594
}
15911595
}
15921596

@@ -1604,8 +1608,6 @@ @implementation GGMLMetalClass
16041608
char name[256];
16051609

16061610
@autoreleasepool {
1607-
MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc] init];
1608-
16091611
const int32_t dk = (int32_t) op->src[1]->ne[0];
16101612
const int32_t dv = (int32_t) op->src[2]->ne[0];
16111613

@@ -1637,7 +1639,7 @@ @implementation GGMLMetalClass
16371639
return res;
16381640
}
16391641

1640-
cv = [[MTLFunctionConstantValues alloc] init];
1642+
MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc] init];
16411643

16421644
[cv setConstantValue:&has_mask type:MTLDataTypeBool atIndex:FC_FLASH_ATTN_EXT_VEC + 0];
16431645
[cv setConstantValue:&has_sinks type:MTLDataTypeBool atIndex:FC_FLASH_ATTN_EXT_VEC + 1];
@@ -1649,7 +1651,11 @@ @implementation GGMLMetalClass
16491651
[cv setConstantValue:&nsg type:MTLDataTypeInt atIndex:FC_FLASH_ATTN_EXT_VEC + 22];
16501652
[cv setConstantValue:&nwg type:MTLDataTypeInt atIndex:FC_FLASH_ATTN_EXT_VEC + 23];
16511653

1652-
return ggml_metal_compile_kernel(backend, base, name, cv);
1654+
res = ggml_metal_compile_kernel(backend, base, name, cv);
1655+
1656+
[cv release];
1657+
1658+
return res;
16531659
}
16541660
}
16551661

@@ -1663,8 +1669,6 @@ @implementation GGMLMetalClass
16631669
char name[256];
16641670

16651671
@autoreleasepool {
1666-
MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc] init];
1667-
16681672
snprintf(base, 256, "kernel_flash_attn_ext_vec_reduce");
16691673
snprintf(name, 256, "kernel_flash_attn_ext_vec_reduce_dv=%d_nwg=%d", dv, nwg);
16701674

@@ -1674,12 +1678,16 @@ @implementation GGMLMetalClass
16741678
return res;
16751679
}
16761680

1677-
cv = [[MTLFunctionConstantValues alloc] init];
1681+
MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc] init];
16781682

16791683
[cv setConstantValue:&dv type:MTLDataTypeInt atIndex:FC_FLASH_ATTN_EXT_VEC_REDUCE + 0];
16801684
[cv setConstantValue:&nwg type:MTLDataTypeInt atIndex:FC_FLASH_ATTN_EXT_VEC_REDUCE + 1];
16811685

1682-
return ggml_metal_compile_kernel(backend, base, name, cv);
1686+
res = ggml_metal_compile_kernel(backend, base, name, cv);
1687+
1688+
[cv release];
1689+
1690+
return res;
16831691
}
16841692

16851693
GGML_UNUSED(op);
@@ -5770,6 +5778,9 @@ static enum ggml_status ggml_metal_graph_compute(
57705778
id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBuffer];
57715779
[cmd_buf retain];
57725780

5781+
if (ctx->cmd_bufs[n_cb].obj) {
5782+
[ctx->cmd_bufs[n_cb].obj release];
5783+
}
57735784
ctx->cmd_bufs[n_cb].obj = cmd_buf;
57745785

57755786
[cmd_buf enqueue];

0 commit comments

Comments
 (0)