Skip to content

Commit 6e03506

Browse files
committed
metal : avoid too much shared memory usage with mul_mat_id
ggml-ci
1 parent 94af548 commit 6e03506

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2856,17 +2856,19 @@ static void ggml_metal_encode_node(
28562856
// ne21 = n_rows
28572857
const int dst_rows = ne20*ne21;
28582858
const int dst_rows_min = n_as;
2859-
const int dst_rows_max = (device.maxThreadgroupMemoryLength - 32 - 8192)/4;
2859+
const int dst_rows_max = (device.maxThreadgroupMemoryLength/2 - 8192)/4;
28602860

28612861
// max size of the rowids array in the kernel shared buffer
2862-
GGML_ASSERT(dst_rows <= dst_rows_max);
2862+
//GGML_ASSERT(dst_rows <= dst_rows_max);
28632863

28642864
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
28652865
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
28662866
if ([device supportsFamily:MTLGPUFamilyApple7] &&
28672867
ne00 % 32 == 0 && ne00 >= 64 &&
2868-
ne01 / ne02 >= 512 && // NOTE: this is based on Mixtral shapes, might need adjustments
2869-
dst_rows > dst_rows_min) {
2868+
//ne01 / ne02 >= 512 && // NOTE: this is based on Mixtral shapes, might need adjustments
2869+
dst_rows > dst_rows_min &&
2870+
dst_rows <= dst_rows_max) {
2871+
28702872
// some Metal matrix data types require aligned pointers
28712873
// ref: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf (Table 2.5)
28722874
switch (src0->type) {

0 commit comments

Comments
 (0)