@@ -717,7 +717,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
717717 .vec_dot = ggml_vec_dot_q4_0_q8_0,
718718#if GGML_USE_IQK_MULMAT
719719#if defined __AVX2__
720- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
720+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
721721#else
722722 .vec_dot_type = GGML_TYPE_Q8_0_X4,
723723#endif
@@ -741,7 +741,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
741741 .from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref,
742742 .vec_dot = ggml_vec_dot_q4_1_q8_1,
743743#if GGML_USE_IQK_MULMAT
744- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
744+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
745745#else
746746 .vec_dot_type = GGML_TYPE_Q8_1,
747747#endif
@@ -789,7 +789,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
789789 .vec_dot = ggml_vec_dot_q5_0_q8_0,
790790#if GGML_USE_IQK_MULMAT
791791#if defined __AVX2__
792- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
792+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
793793#else
794794 .vec_dot_type = GGML_TYPE_Q8_0_X4,
795795#endif
@@ -809,7 +809,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
809809 .from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref,
810810 .vec_dot = ggml_vec_dot_q5_1_q8_1,
811811#if GGML_USE_IQK_MULMAT
812- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
812+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
813813#else
814814 .vec_dot_type = GGML_TYPE_Q8_1,
815815#endif
@@ -827,7 +827,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
827827 .vec_dot = ggml_vec_dot_q6_0_q8_0,
828828#if GGML_USE_IQK_MULMAT
829829#if defined __AVX2__
830- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
830+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
831831#else
832832 .vec_dot_type = GGML_TYPE_Q8_0_X4,
833833#endif
@@ -852,7 +852,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
852852 // Remember: we cannot add 128 to the Q8 quants and use iblock sum in Q8_1 to subtract as we do on Zen4 for pure AVX2
853853 // because there the result of the _mm256_maddubs_epi16() instruction may overflow the int16_t range
854854 // (and it gets satured if it does), leading to wrong results.
855- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
855+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
856856#else
857857 .vec_dot_type = GGML_TYPE_Q8_0_X4,
858858#endif
@@ -897,6 +897,16 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
897897 .nrows = 1,
898898 .row_meta_size = 0,
899899 },
900+ [GGML_TYPE_Q8_2_X4] = {
901+ .type_name = "q8_2_x4",
902+ .blck_size = QK8_2,
903+ .type_size = sizeof(block_q8_2),
904+ .is_quantized = true,
905+ .from_float = quantize_row_q8_2_x4,
906+ .from_float_ref = quantize_row_q8_2_x4,
907+ .nrows = 1,
908+ .row_meta_size = 0,
909+ },
900910 [GGML_TYPE_Q2_K] = {
901911 .type_name = "q2_K",
902912 .blck_size = QK_K,
@@ -1272,7 +1282,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
12721282 .vec_dot = ggml_vec_dot_iq4_nl_q8_0,
12731283#if GGML_USE_IQK_MULMAT
12741284#if defined __AVX2__
1275- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
1285+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
12761286#else
12771287 .vec_dot_type = GGML_TYPE_Q8_0_X4,
12781288#endif
@@ -1628,7 +1638,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
16281638 .vec_dot = vec_dot_iq4_nl_r4_q8_0,
16291639#if GGML_USE_IQK_MULMAT
16301640#if defined __AVX2__
1631- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
1641+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
16321642#else
16331643 .vec_dot_type = GGML_TYPE_Q8_0_X4,
16341644#endif
@@ -1662,7 +1672,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
16621672 .vec_dot = vec_dot_q4_0_r8_q8_0,
16631673#if GGML_USE_IQK_MULMAT
16641674#if defined __AVX2__
1665- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
1675+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
16661676#else
16671677 .vec_dot_type = GGML_TYPE_Q8_0_X4,
16681678#endif
@@ -1683,7 +1693,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
16831693 .vec_dot = vec_dot_q8_0_r8_q8_0,
16841694#if GGML_USE_IQK_MULMAT
16851695#if defined __AVX2__
1686- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
1696+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
16871697#else
16881698 .vec_dot_type = GGML_TYPE_Q8_0_X4,
16891699#endif
@@ -1704,7 +1714,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
17041714 .vec_dot = vec_dot_q5_0_r4_q8_0,
17051715#if GGML_USE_IQK_MULMAT
17061716#if defined __AVX2__
1707- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
1717+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
17081718#else
17091719 .vec_dot_type = GGML_TYPE_Q8_0_X4,
17101720#endif
@@ -1725,7 +1735,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
17251735 .vec_dot = vec_dot_q6_0_r4_q8_0,
17261736#if GGML_USE_IQK_MULMAT
17271737#if defined __AVX2__
1728- .vec_dot_type = GGML_TYPE_Q8_1_X4 ,
1738+ .vec_dot_type = GGML_TYPE_Q8_2_X4 ,
17291739#else
17301740 .vec_dot_type = GGML_TYPE_Q8_0_X4,
17311741#endif
@@ -11647,6 +11657,7 @@ static void ggml_compute_forward_add1(
1164711657 case GGML_TYPE_Q8_1:
1164811658 case GGML_TYPE_Q8_0_X4:
1164911659 case GGML_TYPE_Q8_1_X4:
11660+ case GGML_TYPE_Q8_2_X4:
1165011661 case GGML_TYPE_Q2_K:
1165111662 case GGML_TYPE_Q2_K_R4:
1165211663 case GGML_TYPE_Q3_K:
@@ -11815,6 +11826,7 @@ static void ggml_compute_forward_acc(
1181511826 case GGML_TYPE_Q8_1:
1181611827 case GGML_TYPE_Q8_0_X4:
1181711828 case GGML_TYPE_Q8_1_X4:
11829+ case GGML_TYPE_Q8_2_X4:
1181811830 case GGML_TYPE_Q2_K:
1181911831 case GGML_TYPE_Q2_K_R4:
1182011832 case GGML_TYPE_Q3_K:
@@ -15690,6 +15702,7 @@ static void ggml_compute_forward_set(
1569015702 case GGML_TYPE_Q8_1:
1569115703 case GGML_TYPE_Q8_0_X4:
1569215704 case GGML_TYPE_Q8_1_X4:
15705+ case GGML_TYPE_Q8_2_X4:
1569315706 case GGML_TYPE_Q2_K:
1569415707 case GGML_TYPE_Q2_K_R4:
1569515708 case GGML_TYPE_Q3_K:
@@ -15997,6 +16010,7 @@ static void ggml_compute_forward_get_rows(
1599716010 case GGML_TYPE_Q8_1:
1599816011 case GGML_TYPE_Q8_0_X4:
1599916012 case GGML_TYPE_Q8_1_X4:
16013+ case GGML_TYPE_Q8_2_X4:
1600016014 case GGML_TYPE_Q2_K:
1600116015 case GGML_TYPE_Q2_K_R4:
1600216016 case GGML_TYPE_Q3_K:
@@ -16627,6 +16641,7 @@ static void ggml_compute_forward_clamp(
1662716641 case GGML_TYPE_Q8_1:
1662816642 case GGML_TYPE_Q8_0_X4:
1662916643 case GGML_TYPE_Q8_1_X4:
16644+ case GGML_TYPE_Q8_2_X4:
1663016645 case GGML_TYPE_Q2_K:
1663116646 case GGML_TYPE_Q2_K_R4:
1663216647 case GGML_TYPE_Q3_K:
0 commit comments