Skip to content

Commit b534e39

Browse files
committed
metal : opt mul_mm_id map0
1 parent ef7bda3 commit b534e39

File tree

2 files changed

+4
-10
lines changed

2 files changed

+4
-10
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3894,8 +3894,6 @@ static int ggml_metal_encode_node(
38943894
}
38953895

38963896
{
3897-
const int nth = MIN(1024, ne10/4);
3898-
38993897
ggml_metal_kargs_mul_mm_id_map0 args = {
39003898
ne10,
39013899
ne11, // n_expert_used (bcast)
@@ -3916,7 +3914,7 @@ static int ggml_metal_encode_node(
39163914
[encoder setBuffer: h_tpe offset:0 atIndex:2];
39173915
[encoder setBuffer: h_ids offset:0 atIndex:3];
39183916

3919-
[encoder dispatchThreadgroups:MTLSizeMake(ne02, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
3917+
[encoder dispatchThreadgroups:MTLSizeMake(ne02, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
39203918
}
39213919

39223920
{

ggml/src/ggml-metal/ggml-metal.metal

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7497,18 +7497,14 @@ kernel void kernel_mul_mm_id_map0(
74977497
continue;
74987498
}
74997499

7500-
if (tpitg.x == 0) {
7501-
ids_i32[ide*args.ne21 + n_all] = i21*args.ne20 + i20;
7502-
}
7500+
ids_i32[ide*args.ne21 + n_all] = i21*args.ne20 + i20;
75037501

75047502
++n_all;
75057503
}
75067504
}
75077505

7508-
if (tpitg.x == 0) {
7509-
device int32_t * tpe_i32 = (device int32_t *) (htpe);
7510-
tpe_i32[ide] = n_all;
7511-
}
7506+
device int32_t * tpe_i32 = (device int32_t *) (htpe);
7507+
tpe_i32[ide] = n_all;
75127508
}
75137509

75147510
typedef decltype(kernel_mul_mm_id_map0<half4>) kernel_mul_mm_id_map0_t;

0 commit comments

Comments
 (0)