@@ -1361,7 +1361,6 @@ @implementation GGMLMetalClass
13611361 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q5_1_F32, mul_mm_q5_1_f32, has_simdgroup_mm);
13621362 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q8_0_F32, mul_mm_q8_0_f32, has_simdgroup_mm);
13631363 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_MXFP4_F32, mul_mm_mxfp4_f32, has_simdgroup_mm);
1364- GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_MXFP4_F32, mul_mm_mxfp4_f32, has_simdgroup_mm);
13651364 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q2_K_F32, mul_mm_q2_K_f32, has_simdgroup_mm);
13661365 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q3_K_F32, mul_mm_q3_K_f32, has_simdgroup_mm);
13671366 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_K_F32, mul_mm_q4_K_f32, has_simdgroup_mm);
@@ -1521,6 +1520,9 @@ @implementation GGMLMetalClass
15211520 NSString * key = [NSString stringWithUTF8String: name];
15221521 [ctx->kernels_ext setObject: obj forKey: key];
15231522
1523+ [metal_function release ];
1524+ [obj release ];
1525+
15241526 GGML_LOG_DEBUG (" %s : loaded %-40s %16p | th_max = %4d | th_width = %4d \n " , __func__, name, (void *) kernel.pipeline ,
15251527 (int ) kernel.pipeline .maxTotalThreadsPerThreadgroup ,
15261528 (int ) kernel.pipeline .threadExecutionWidth );
@@ -1542,8 +1544,6 @@ @implementation GGMLMetalClass
15421544 char name[256 ];
15431545
15441546 @autoreleasepool {
1545- MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
1546-
15471547 const int32_t dk = (int32_t ) op->src [1 ]->ne [0 ];
15481548 const int32_t dv = (int32_t ) op->src [2 ]->ne [0 ];
15491549
@@ -1575,7 +1575,7 @@ @implementation GGMLMetalClass
15751575 return res;
15761576 }
15771577
1578- cv = [[MTLFunctionConstantValues alloc ] init ];
1578+ MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
15791579
15801580 [cv setConstantValue: &has_mask type: MTLDataTypeBool atIndex: FC_FLASH_ATTN_EXT + 0 ];
15811581 [cv setConstantValue: &has_sinks type: MTLDataTypeBool atIndex: FC_FLASH_ATTN_EXT + 1 ];
@@ -1586,7 +1586,11 @@ @implementation GGMLMetalClass
15861586 [cv setConstantValue: &ns20 type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT + 21 ];
15871587 [cv setConstantValue: &nsg type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT + 22 ];
15881588
1589- return ggml_metal_compile_kernel (backend, base, name, cv);
1589+ res = ggml_metal_compile_kernel (backend, base, name, cv);
1590+
1591+ [cv release ];
1592+
1593+ return res;
15901594 }
15911595}
15921596
@@ -1604,8 +1608,6 @@ @implementation GGMLMetalClass
16041608 char name[256 ];
16051609
16061610 @autoreleasepool {
1607- MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
1608-
16091611 const int32_t dk = (int32_t ) op->src [1 ]->ne [0 ];
16101612 const int32_t dv = (int32_t ) op->src [2 ]->ne [0 ];
16111613
@@ -1637,7 +1639,7 @@ @implementation GGMLMetalClass
16371639 return res;
16381640 }
16391641
1640- cv = [[MTLFunctionConstantValues alloc ] init ];
1642+ MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
16411643
16421644 [cv setConstantValue: &has_mask type: MTLDataTypeBool atIndex: FC_FLASH_ATTN_EXT_VEC + 0 ];
16431645 [cv setConstantValue: &has_sinks type: MTLDataTypeBool atIndex: FC_FLASH_ATTN_EXT_VEC + 1 ];
@@ -1649,7 +1651,11 @@ @implementation GGMLMetalClass
16491651 [cv setConstantValue: &nsg type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT_VEC + 22 ];
16501652 [cv setConstantValue: &nwg type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT_VEC + 23 ];
16511653
1652- return ggml_metal_compile_kernel (backend, base, name, cv);
1654+ res = ggml_metal_compile_kernel (backend, base, name, cv);
1655+
1656+ [cv release ];
1657+
1658+ return res;
16531659 }
16541660}
16551661
@@ -1663,8 +1669,6 @@ @implementation GGMLMetalClass
16631669 char name[256 ];
16641670
16651671 @autoreleasepool {
1666- MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
1667-
16681672 snprintf (base, 256 , " kernel_flash_attn_ext_vec_reduce" );
16691673 snprintf (name, 256 , " kernel_flash_attn_ext_vec_reduce_dv=%d _nwg=%d " , dv, nwg);
16701674
@@ -1674,12 +1678,16 @@ @implementation GGMLMetalClass
16741678 return res;
16751679 }
16761680
1677- cv = [[MTLFunctionConstantValues alloc ] init ];
1681+ MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
16781682
16791683 [cv setConstantValue: &dv type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT_VEC_REDUCE + 0 ];
16801684 [cv setConstantValue: &nwg type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT_VEC_REDUCE + 1 ];
16811685
1682- return ggml_metal_compile_kernel (backend, base, name, cv);
1686+ res = ggml_metal_compile_kernel (backend, base, name, cv);
1687+
1688+ [cv release ];
1689+
1690+ return res;
16831691 }
16841692
16851693 GGML_UNUSED (op);
@@ -5770,6 +5778,9 @@ static enum ggml_status ggml_metal_graph_compute(
57705778 id <MTLCommandBuffer > cmd_buf = [ctx->queue commandBuffer ];
57715779 [cmd_buf retain ];
57725780
5781+ if (ctx->cmd_bufs [n_cb].obj ) {
5782+ [ctx->cmd_bufs[n_cb].obj release ];
5783+ }
57735784 ctx->cmd_bufs [n_cb].obj = cmd_buf;
57745785
57755786 [cmd_buf enqueue ];
0 commit comments