@@ -211,11 +211,14 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
211
211
GGML_METAL_KERNEL_TYPE_RWKV_WKV6_F32,
212
212
GGML_METAL_KERNEL_TYPE_RWKV_WKV7_F32,
213
213
GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32,
214
+ GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32_C4,
214
215
GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32,
216
+ GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_C4,
215
217
GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_1ROW,
216
218
GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_L4,
217
219
GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F16,
218
220
GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32,
221
+ GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_C4,
219
222
GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_1ROW,
220
223
GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_L4,
221
224
GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_BF16,
@@ -1175,11 +1178,14 @@ @implementation GGMLMetalClass
1175
1178
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_RWKV_WKV6_F32, rwkv_wkv6_f32, true );
1176
1179
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_RWKV_WKV7_F32, rwkv_wkv7_f32, true );
1177
1180
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32, mul_mv_f32_f32, has_simdgroup_reduction);
1181
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32_C4, mul_mv_f32_f32_c4, true );
1178
1182
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32, mul_mv_bf16_f32, has_simdgroup_reduction && use_bfloat);
1183
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_C4, mul_mv_bf16_f32_c4, use_bfloat);
1179
1184
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_1ROW, mul_mv_bf16_f32_1row, has_simdgroup_reduction && use_bfloat);
1180
1185
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_L4, mul_mv_bf16_f32_l4, has_simdgroup_reduction && use_bfloat);
1181
1186
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_BF16, mul_mv_bf16_bf16, has_simdgroup_reduction && use_bfloat);
1182
1187
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32, mul_mv_f16_f32, has_simdgroup_reduction);
1188
+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_C4, mul_mv_f16_f32_c4, true );
1183
1189
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_1ROW, mul_mv_f16_f32_1row, has_simdgroup_reduction);
1184
1190
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_L4, mul_mv_f16_f32_l4, has_simdgroup_reduction);
1185
1191
GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F16, mul_mv_f16_f16, has_simdgroup_reduction);
@@ -3111,14 +3117,23 @@ static bool ggml_metal_encode_node(
3111
3117
nsg = 1 ;
3112
3118
nr0 = 1 ;
3113
3119
nr1 = 4 ;
3114
- pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32].pipeline ;
3120
+ if (ne00 == 4 ) {
3121
+ nr0 = 32 ;
3122
+ pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32_C4].pipeline ;
3123
+ } else {
3124
+ pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32].pipeline ;
3125
+ }
3115
3126
} break ;
3116
3127
case GGML_TYPE_F16:
3117
3128
{
3118
3129
nsg = 1 ;
3119
3130
nr0 = 1 ;
3120
3131
if (src1t == GGML_TYPE_F32) {
3121
- if (ne11 * ne12 < 4 ) {
3132
+ if (ne00 == 4 ) {
3133
+ nr0 = 32 ;
3134
+ nr1 = 4 ;
3135
+ pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_C4].pipeline ;
3136
+ } else if (ne11 * ne12 < 4 ) {
3122
3137
pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_1ROW].pipeline ;
3123
3138
} else if (ne00 >= 128 && ne01 >= 8 && ne00%4 == 0 ) {
3124
3139
pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_L4].pipeline ;
@@ -3137,7 +3152,11 @@ static bool ggml_metal_encode_node(
3137
3152
nsg = 1 ;
3138
3153
nr0 = 1 ;
3139
3154
if (src1t == GGML_TYPE_F32) {
3140
- if (ne11 * ne12 < 4 ) {
3155
+ if (ne00 == 4 ) {
3156
+ nr0 = 32 ;
3157
+ nr1 = 4 ;
3158
+ pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_C4].pipeline ;
3159
+ } else if (ne11 * ne12 < 4 ) {
3141
3160
pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_1ROW].pipeline ;
3142
3161
} else if (ne00 >= 128 && ne01 >= 8 && ne00%4 == 0 ) {
3143
3162
pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_L4].pipeline ;
0 commit comments