Skip to content

Commit a444d39

Browse files
committed
metal : optimize FA-vec kernel
ggml-ci
1 parent 1e0f5ad commit a444d39

File tree

2 files changed

+145
-138
lines changed

2 files changed

+145
-138
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4179,10 +4179,9 @@ static void ggml_metal_encode_node(
41794179
// ne00*(nsg)
41804180
// each simdgroup has a full f16 head vector in shared mem to accumulate results
41814181
//
4182-
#define FATTN_SMEM(nsg) (GGML_PAD((nqptg*(ne00 + 2*ncpsg*(nsg)) + ne00*(nsg))*(sizeof(float)/2), 16))
4182+
#define FATTN_SMEM(nsg) (GGML_PAD((nqptg*(GGML_PAD(ne00, 128) + 2*ncpsg*(nsg)) + ne20*(nsg))*(sizeof(float)/2), 16))
41834183

41844184
int64_t nsgmax = 2;
4185-
41864185
while (true) {
41874186
const size_t smem = FATTN_SMEM(nsgmax);
41884187
if (smem > device.maxThreadgroupMemoryLength) {

0 commit comments

Comments
 (0)