@@ -1361,7 +1361,6 @@ @implementation GGMLMetalClass
1361
1361
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q5_1_F32, mul_mm_q5_1_f32, has_simdgroup_mm);
1362
1362
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q8_0_F32, mul_mm_q8_0_f32, has_simdgroup_mm);
1363
1363
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_MXFP4_F32, mul_mm_mxfp4_f32, has_simdgroup_mm);
1364
- GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_MXFP4_F32, mul_mm_mxfp4_f32, has_simdgroup_mm);
1365
1364
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q2_K_F32, mul_mm_q2_K_f32, has_simdgroup_mm);
1366
1365
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q3_K_F32, mul_mm_q3_K_f32, has_simdgroup_mm);
1367
1366
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_K_F32, mul_mm_q4_K_f32, has_simdgroup_mm);
@@ -1521,6 +1520,9 @@ @implementation GGMLMetalClass
1521
1520
NSString * key = [NSString stringWithUTF8String: name];
1522
1521
[ctx->kernels_ext setObject: obj forKey: key];
1523
1522
1523
+ [metal_function release ];
1524
+ [obj release ];
1525
+
1524
1526
GGML_LOG_DEBUG (" %s : loaded %-40s %16p | th_max = %4d | th_width = %4d \n " , __func__, name, (void *) kernel.pipeline ,
1525
1527
(int ) kernel.pipeline .maxTotalThreadsPerThreadgroup ,
1526
1528
(int ) kernel.pipeline .threadExecutionWidth );
@@ -1542,8 +1544,6 @@ @implementation GGMLMetalClass
1542
1544
char name[256 ];
1543
1545
1544
1546
@autoreleasepool {
1545
- MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
1546
-
1547
1547
const int32_t dk = (int32_t ) op->src [1 ]->ne [0 ];
1548
1548
const int32_t dv = (int32_t ) op->src [2 ]->ne [0 ];
1549
1549
@@ -1575,7 +1575,7 @@ @implementation GGMLMetalClass
1575
1575
return res;
1576
1576
}
1577
1577
1578
- cv = [[MTLFunctionConstantValues alloc ] init ];
1578
+ MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
1579
1579
1580
1580
[cv setConstantValue: &has_mask type: MTLDataTypeBool atIndex: FC_FLASH_ATTN_EXT + 0 ];
1581
1581
[cv setConstantValue: &has_sinks type: MTLDataTypeBool atIndex: FC_FLASH_ATTN_EXT + 1 ];
@@ -1586,7 +1586,11 @@ @implementation GGMLMetalClass
1586
1586
[cv setConstantValue: &ns20 type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT + 21 ];
1587
1587
[cv setConstantValue: &nsg type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT + 22 ];
1588
1588
1589
- return ggml_metal_compile_kernel (backend, base, name, cv);
1589
+ res = ggml_metal_compile_kernel (backend, base, name, cv);
1590
+
1591
+ [cv release ];
1592
+
1593
+ return res;
1590
1594
}
1591
1595
}
1592
1596
@@ -1604,8 +1608,6 @@ @implementation GGMLMetalClass
1604
1608
char name[256 ];
1605
1609
1606
1610
@autoreleasepool {
1607
- MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
1608
-
1609
1611
const int32_t dk = (int32_t ) op->src [1 ]->ne [0 ];
1610
1612
const int32_t dv = (int32_t ) op->src [2 ]->ne [0 ];
1611
1613
@@ -1637,7 +1639,7 @@ @implementation GGMLMetalClass
1637
1639
return res;
1638
1640
}
1639
1641
1640
- cv = [[MTLFunctionConstantValues alloc ] init ];
1642
+ MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
1641
1643
1642
1644
[cv setConstantValue: &has_mask type: MTLDataTypeBool atIndex: FC_FLASH_ATTN_EXT_VEC + 0 ];
1643
1645
[cv setConstantValue: &has_sinks type: MTLDataTypeBool atIndex: FC_FLASH_ATTN_EXT_VEC + 1 ];
@@ -1649,7 +1651,11 @@ @implementation GGMLMetalClass
1649
1651
[cv setConstantValue: &nsg type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT_VEC + 22 ];
1650
1652
[cv setConstantValue: &nwg type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT_VEC + 23 ];
1651
1653
1652
- return ggml_metal_compile_kernel (backend, base, name, cv);
1654
+ res = ggml_metal_compile_kernel (backend, base, name, cv);
1655
+
1656
+ [cv release ];
1657
+
1658
+ return res;
1653
1659
}
1654
1660
}
1655
1661
@@ -1663,8 +1669,6 @@ @implementation GGMLMetalClass
1663
1669
char name[256 ];
1664
1670
1665
1671
@autoreleasepool {
1666
- MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
1667
-
1668
1672
snprintf (base, 256 , " kernel_flash_attn_ext_vec_reduce" );
1669
1673
snprintf (name, 256 , " kernel_flash_attn_ext_vec_reduce_dv=%d _nwg=%d " , dv, nwg);
1670
1674
@@ -1674,12 +1678,16 @@ @implementation GGMLMetalClass
1674
1678
return res;
1675
1679
}
1676
1680
1677
- cv = [[MTLFunctionConstantValues alloc ] init ];
1681
+ MTLFunctionConstantValues * cv = [[MTLFunctionConstantValues alloc ] init ];
1678
1682
1679
1683
[cv setConstantValue: &dv type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT_VEC_REDUCE + 0 ];
1680
1684
[cv setConstantValue: &nwg type: MTLDataTypeInt atIndex: FC_FLASH_ATTN_EXT_VEC_REDUCE + 1 ];
1681
1685
1682
- return ggml_metal_compile_kernel (backend, base, name, cv);
1686
+ res = ggml_metal_compile_kernel (backend, base, name, cv);
1687
+
1688
+ [cv release ];
1689
+
1690
+ return res;
1683
1691
}
1684
1692
1685
1693
GGML_UNUSED (op);
@@ -5770,6 +5778,9 @@ static enum ggml_status ggml_metal_graph_compute(
5770
5778
id <MTLCommandBuffer > cmd_buf = [ctx->queue commandBuffer ];
5771
5779
[cmd_buf retain ];
5772
5780
5781
+ if (ctx->cmd_bufs [n_cb].obj ) {
5782
+ [ctx->cmd_bufs[n_cb].obj release ];
5783
+ }
5773
5784
ctx->cmd_bufs [n_cb].obj = cmd_buf;
5774
5785
5775
5786
[cmd_buf enqueue ];
0 commit comments