6262 GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XS,
6363 GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_XXS,
6464 GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_S,
65+ GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_S,
6566 GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_S,
6667 GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL,
6768 GGML_METAL_KERNEL_TYPE_GET_ROWS_I32,
8788 GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XS_F32,
8889 GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_XXS_F32,
8990 GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_S_F32,
91+ GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_S_F32,
9092 GGML_METAL_KERNEL_TYPE_MUL_MV_IQ1_S_F32,
9193 GGML_METAL_KERNEL_TYPE_MUL_MV_IQ4_NL_F32,
9294 GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F32_F32,
108110 GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XS_F32,
109111 GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_XXS_F32,
110112 GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_S_F32,
113+ GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_S_F32,
111114 GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ1_S_F32,
112115 GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ4_NL_F32,
113116 GGML_METAL_KERNEL_TYPE_MUL_MM_F32_F32,
126129 GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XS_F32,
127130 GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_XXS_F32,
128131 GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_S_F32,
132+ GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_S_F32,
129133 GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_S_F32,
130134 GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_NL_F32,
131135 GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F32_F32,
144148 GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XS_F32,
145149 GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_XXS_F32,
146150 GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_S_F32,
151+ GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_S_F32,
147152 GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_S_F32,
148153 GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_NL_F32,
149154 GGML_METAL_KERNEL_TYPE_ROPE_F32,
@@ -458,6 +463,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
458463 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XS, get_rows_iq2_xs, true );
459464 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_XXS, get_rows_iq3_xxs, true );
460465 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_S, get_rows_iq3_s, true );
466+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_S, get_rows_iq2_s, true );
461467 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_S, get_rows_iq1_s, true );
462468 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL, get_rows_iq4_nl, true );
463469 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_GET_ROWS_I32, get_rows_i32, true );
@@ -483,6 +489,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
483489 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XS_F32, mul_mv_iq2_xs_f32, ctx->support_simdgroup_reduction );
484490 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_XXS_F32, mul_mv_iq3_xxs_f32, ctx->support_simdgroup_reduction );
485491 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_S_F32, mul_mv_iq3_s_f32, ctx->support_simdgroup_reduction );
492+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_S_F32, mul_mv_iq2_s_f32, ctx->support_simdgroup_reduction );
486493 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ1_S_F32, mul_mv_iq1_s_f32, ctx->support_simdgroup_reduction );
487494 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_IQ4_NL_F32, mul_mv_iq4_nl_f32, ctx->support_simdgroup_reduction );
488495 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F32_F32, mul_mv_id_f32_f32, ctx->support_simdgroup_reduction );
@@ -504,6 +511,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
504511 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XS_F32, mul_mv_id_iq2_xs_f32, ctx->support_simdgroup_reduction );
505512 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_XXS_F32, mul_mv_id_iq3_xxs_f32, ctx->support_simdgroup_reduction );
506513 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_S_F32, mul_mv_id_iq3_s_f32, ctx->support_simdgroup_reduction );
514+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_S_F32, mul_mv_id_iq2_s_f32, ctx->support_simdgroup_reduction );
507515 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ1_S_F32, mul_mv_id_iq1_s_f32, ctx->support_simdgroup_reduction );
508516 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ4_NL_F32, mul_mv_id_iq4_nl_f32, ctx->support_simdgroup_reduction );
509517 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_F32_F32, mul_mm_f32_f32, ctx->support_simdgroup_mm );
@@ -522,6 +530,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
522530 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XS_F32, mul_mm_iq2_xs_f32, ctx->support_simdgroup_mm );
523531 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_XXS_F32, mul_mm_iq3_xxs_f32, ctx->support_simdgroup_mm );
524532 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_S_F32, mul_mm_iq3_s_f32, ctx->support_simdgroup_mm );
533+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_S_F32, mul_mm_iq2_s_f32, ctx->support_simdgroup_mm );
525534 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_S_F32, mul_mm_iq1_s_f32, ctx->support_simdgroup_mm );
526535 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_NL_F32, mul_mm_iq4_nl_f32, ctx->support_simdgroup_mm );
527536 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F32_F32, mul_mm_id_f32_f32, ctx->support_simdgroup_mm );
@@ -540,6 +549,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
540549 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XS_F32, mul_mm_id_iq2_xs_f32, ctx->support_simdgroup_mm );
541550 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_XXS_F32, mul_mm_id_iq3_xxs_f32, ctx->support_simdgroup_mm );
542551 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_S_F32, mul_mm_id_iq3_s_f32, ctx->support_simdgroup_mm );
552+ GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_S_F32, mul_mm_id_iq2_s_f32, ctx->support_simdgroup_mm );
543553 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_S_F32, mul_mm_id_iq1_s_f32, ctx->support_simdgroup_mm );
544554 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_NL_F32, mul_mm_id_iq4_nl_f32, ctx->support_simdgroup_mm );
545555 GGML_METAL_ADD_KERNEL (GGML_METAL_KERNEL_TYPE_ROPE_F32, rope_f32, true );
@@ -1358,6 +1368,7 @@ static bool ggml_metal_graph_compute(
13581368 case GGML_TYPE_IQ2_XS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XS_F32 ].pipeline ; break ;
13591369 case GGML_TYPE_IQ3_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_XXS_F32].pipeline ; break ;
13601370 case GGML_TYPE_IQ3_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_S_F32 ].pipeline ; break ;
1371+ case GGML_TYPE_IQ2_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_S_F32 ].pipeline ; break ;
13611372 case GGML_TYPE_IQ1_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_S_F32 ].pipeline ; break ;
13621373 case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_NL_F32 ].pipeline ; break ;
13631374 default : GGML_ASSERT (false && " MUL MAT-MAT not implemented" );
@@ -1500,6 +1511,12 @@ static bool ggml_metal_graph_compute(
15001511 nth1 = 16 ;
15011512 pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_S_F32].pipeline ;
15021513 } break ;
1514+ case GGML_TYPE_IQ2_S:
1515+ {
1516+ nth0 = 4 ;
1517+ nth1 = 16 ;
1518+ pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_S_F32].pipeline ;
1519+ } break ;
15031520 case GGML_TYPE_IQ1_S:
15041521 {
15051522 nth0 = 4 ;
@@ -1544,9 +1561,9 @@ static bool ggml_metal_graph_compute(
15441561 [encoder setBytes: &r2 length: sizeof (r2) atIndex: 17 ];
15451562 [encoder setBytes: &r3 length: sizeof (r3) atIndex: 18 ];
15461563
1547- if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
1548- src0t == GGML_TYPE_Q5_0 || src0t == GGML_TYPE_Q5_1 || src0t == GGML_TYPE_Q8_0 ||
1549- src0t == GGML_TYPE_Q2_K || src0t == GGML_TYPE_IQ1_S) { // || src0t == GGML_TYPE_Q4_K ) {
1564+ if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
1565+ src0t == GGML_TYPE_Q5_0 || src0t == GGML_TYPE_Q5_1 || src0t == GGML_TYPE_Q8_0 ||
1566+ src0t == GGML_TYPE_Q2_K || src0t == GGML_TYPE_IQ1_S || src0t == GGML_TYPE_IQ2_S ) {
15501567 [encoder dispatchThreadgroups: MTLSizeMake ((ne01 + 7 )/8 , ne11, ne12*ne13) threadsPerThreadgroup: MTLSizeMake (nth0, nth1, 1 )];
15511568 }
15521569 else if (src0t == GGML_TYPE_IQ2_XXS || src0t == GGML_TYPE_IQ2_XS) {
@@ -1658,6 +1675,7 @@ static bool ggml_metal_graph_compute(
16581675 case GGML_TYPE_IQ2_XS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XS_F32 ].pipeline ; break ;
16591676 case GGML_TYPE_IQ3_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_XXS_F32].pipeline ; break ;
16601677 case GGML_TYPE_IQ3_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_S_F32 ].pipeline ; break ;
1678+ case GGML_TYPE_IQ2_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_S_F32 ].pipeline ; break ;
16611679 case GGML_TYPE_IQ1_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_S_F32 ].pipeline ; break ;
16621680 case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_NL_F32 ].pipeline ; break ;
16631681 default : GGML_ASSERT (false && " MUL_MAT_ID not implemented" );
@@ -1803,6 +1821,12 @@ static bool ggml_metal_graph_compute(
18031821 nth1 = 16 ;
18041822 pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_S_F32].pipeline ;
18051823 } break ;
1824+ case GGML_TYPE_IQ2_S:
1825+ {
1826+ nth0 = 4 ;
1827+ nth1 = 16 ;
1828+ pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_S_F32].pipeline ;
1829+ } break ;
18061830 case GGML_TYPE_IQ1_S:
18071831 {
18081832 nth0 = 4 ;
@@ -1863,9 +1887,9 @@ static bool ggml_metal_graph_compute(
18631887 [encoder setBuffer: id_src_cur offset: offs_src_cur atIndex: 23 + j];
18641888 }
18651889
1866- if (src2t == GGML_TYPE_Q4_0 || src2t == GGML_TYPE_Q4_1 ||
1867- src2t == GGML_TYPE_Q5_0 || src2t == GGML_TYPE_Q5_1 || src2t == GGML_TYPE_Q8_0 ||
1868- src2t == GGML_TYPE_Q2_K || src2t == GGML_TYPE_IQ1_S) { // || src2t == GGML_TYPE_Q4_K ) {
1890+ if (src2t == GGML_TYPE_Q4_0 || src2t == GGML_TYPE_Q4_1 ||
1891+ src2t == GGML_TYPE_Q5_0 || src2t == GGML_TYPE_Q5_1 || src2t == GGML_TYPE_Q8_0 ||
1892+ src2t == GGML_TYPE_Q2_K || src2t == GGML_TYPE_IQ1_S || src2t == GGML_TYPE_IQ2_S ) {
18691893 [encoder dispatchThreadgroups: MTLSizeMake ((ne21 + 7 )/8 , _ne1, ne01*ne12*ne13) threadsPerThreadgroup: MTLSizeMake (nth0, nth1, 1 )];
18701894 }
18711895 else if (src2t == GGML_TYPE_IQ2_XXS || src2t == GGML_TYPE_IQ2_XS) {
@@ -1925,6 +1949,7 @@ static bool ggml_metal_graph_compute(
19251949 case GGML_TYPE_IQ2_XS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XS ].pipeline ; break ;
19261950 case GGML_TYPE_IQ3_XXS: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_XXS].pipeline ; break ;
19271951 case GGML_TYPE_IQ3_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_S ].pipeline ; break ;
1952+ case GGML_TYPE_IQ2_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_S ].pipeline ; break ;
19281953 case GGML_TYPE_IQ1_S: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_S ].pipeline ; break ;
19291954 case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL ].pipeline ; break ;
19301955 case GGML_TYPE_I32: pipeline = ctx->kernels [GGML_METAL_KERNEL_TYPE_GET_ROWS_I32 ].pipeline ; break ;
0 commit comments