@@ -1478,23 +1478,11 @@ static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -1
14781478
14791479//===================================== Q8_K ==============================================
14801480
1481- // void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
1482- // quantize_row_q8_K_ref(x, y, k);
1483- // }
1484-
1485- // void quantize_row_q8_K(const float * restrict x, void * restrict y, int64_t k) {
1486- // #ifdef GGML_USE_IQK_MULMAT
1487- // iqk_quantize_row_q8_K(x, y, k);
1488- // #else
1489- // quantize_row_q8_K_ref(x, y, k);
1490- // #endif
1491- // }
1492-
1493- // void quantize_row_q8_K(const float * restrict x, void * restrict y, int64_t k) {
1494- // quantize_row_q8_K_ref(x, y, k);
1495- // }
14961481
14971482void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
1483+ #ifdef GGML_USE_IQK_MULMAT
1484+ iqk_quantize_row_q8_K(x, y, k);
1485+ #else
14981486#ifdef __wasm_simd128__
14991487 assert(k % QK_K == 0);
15001488 const int64_t nb = k / QK_K;
@@ -1576,6 +1564,7 @@ void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
15761564#else
15771565 quantize_row_q8_K_ref(x, y, k);
15781566#endif
1567+ #endif
15791568}
15801569
15811570//===================================== Dot products =================================
@@ -1661,6 +1650,11 @@ static inline __m128i get_scale_shuffle(int i) {
16611650#endif
16621651
16631652void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
1653+ #if GGML_USE_IQK_MULMAT
1654+ if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q4_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
1655+ return;
1656+ }
1657+ #endif
16641658 const int qk = QK8_0;
16651659 const int nb = n / qk;
16661660
@@ -2359,6 +2353,11 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
23592353}
23602354
23612355void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
2356+ #if GGML_USE_IQK_MULMAT
2357+ if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q4_1, vx, bx, GGML_TYPE_Q8_1, vy, by, s, bs, 0, 1)) {
2358+ return;
2359+ }
2360+ #endif
23622361 const int qk = QK8_1;
23632362 const int nb = n / qk;
23642363
@@ -2679,6 +2678,16 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi
26792678}
26802679
26812680void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
2681+ #if GGML_USE_IQK_MULMAT
2682+ #ifdef __AVX2__
2683+ const enum ggml_type vec_dot_type = GGML_TYPE_Q8_1;
2684+ #else
2685+ const enum ggml_type vec_dot_type = GGML_TYPE_Q8_0;
2686+ #endif
2687+ if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q5_0, vx, bx, vec_dot_type, vy, by, s, bs, 0, 1)) {
2688+ return;
2689+ }
2690+ #endif
26822691 const int qk = QK8_0;
26832692 const int nb = n / qk;
26842693
@@ -3002,6 +3011,11 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
30023011}
30033012
30043013void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
3014+ #if GGML_USE_IQK_MULMAT
3015+ if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q5_1, vx, bx, GGML_TYPE_Q8_1, vy, by, s, bs, 0, 1)) {
3016+ return;
3017+ }
3018+ #endif
30053019 const int qk = QK8_1;
30063020 const int nb = n / qk;
30073021
@@ -3361,6 +3375,16 @@ void ggml_vec_dot_q6_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
33613375}
33623376
33633377void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
3378+ #if GGML_USE_IQK_MULMAT
3379+ #ifdef HAVE_FANCY_SIMD
3380+ enum ggml_type dot_type = GGML_TYPE_Q8_1_X4;
3381+ #else
3382+ enum ggml_type dot_type = GGML_TYPE_Q8_0_X4;
3383+ #endif
3384+ if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q8_0, vx, bx, dot_type, vy, by, s, bs, 0, 1)) {
3385+ return;
3386+ }
3387+ #endif
33643388 const int qk = QK8_0;
33653389 const int nb = n / qk;
33663390
@@ -13086,6 +13110,11 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const
1308613110}
1308713111
1308813112void ggml_vec_dot_iq4_nl_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
13113+ #if GGML_USE_IQK_MULMAT
13114+ if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_IQ4_NL, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
13115+ return;
13116+ }
13117+ #endif
1308913118 assert(nrc == 1);
1309013119 UNUSED(nrc);
1309113120 UNUSED(bx);
0 commit comments