Add quantize row declarations for IQ quants

Nexesenex · Nexesenex · commit 99d12deec2f6 · 2025-06-19T01:46:49.000+02:00
diff --git a/ggml/src/ggml-cpu/ggml-cpu-quants.c b/ggml/src/ggml-cpu/ggml-cpu-quants.c
@@ -13660,6 +13660,46 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v
 #endif
 }
 
+// ============================ IQ quants
+
+void quantize_row_iq1_s  (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
+    quantize_row_iq1_s_ref(x, (block_iq1_s *)y, k);
+}
+
+void quantize_row_iq1_m  (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
+    quantize_row_iq1_m_ref(x, (block_iq1_m *)y, k);
+}
+
+void quantize_row_iq2_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
+    assert(k % QK_K == 0);
+    block_iq2_xxs * restrict y = vy;
+    quantize_row_iq2_xxs_ref(x, y, k);
+}
+
+void quantize_row_iq2_xs(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
+    assert(k % QK_K == 0);
+    block_iq2_xs * restrict y = vy;
+    quantize_row_iq2_xs_ref(x, y, k);
+}
+
+void quantize_row_iq2_s(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
+    assert(k % QK_K == 0);
+    block_iq2_s * restrict y = vy;
+    quantize_row_iq2_s_ref(x, y, k);
+}
+
+void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
+    assert(k % QK_K == 0);
+    block_iq3_xxs * restrict y = vy;
+    quantize_row_iq3_xxs_ref(x, y, k);
+}
+
+void quantize_row_iq3_s(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
+    assert(k % QK_K == 0);
+    block_iq3_s * restrict y = vy;
+    quantize_row_iq3_s_ref(x, y, k);
+}
+
 // ============================ 4-bit non-linear quants
 
 void quantize_row_iq4_nl(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
diff --git a/ggml/src/ggml-cpu/ggml-cpu-quants.h b/ggml/src/ggml-cpu/ggml-cpu-quants.h
@@ -32,6 +32,14 @@ void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
 void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 
+void quantize_row_iq1_s(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
+void quantize_row_iq1_m(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
+void quantize_row_iq2_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
+void quantize_row_iq2_xs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
+void quantize_row_iq2_s(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
+void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
+void quantize_row_iq3_s(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
+
 void quantize_row_tq1_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
 
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -450,12 +450,12 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
     [GGML_TYPE_Q8_K_R8] = {
         .from_float               = quantize_row_q8_k_r8,
         .vec_dot                  = vec_dot_q8_k_r8_q8_k,
-        // .vec_dot_type             = GGML_TYPE_Q8_KR8,
+        .vec_dot_type             = GGML_TYPE_Q8_KR8,
         .vec_dot_type             = GGML_TYPE_Q8_K,
         .nrows                    = 1,
     },
     [GGML_TYPE_IQ2_XXS] = {
-        .from_float               = NULL,
+        .from_float               = quantize_row_iq2_xxs,
         .vec_dot                  = ggml_vec_dot_iq2_xxs_q8_K,
 // #ifdef __AVX2__
        // .vec_dot_type             = GGML_TYPE_Q8_2_X4,
@@ -471,7 +471,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
         .nrows                    = 1,
     },
     [GGML_TYPE_IQ2_XS] = {
-        .from_float               = NULL,
+        .from_float               = quantize_row_iq2_xs,
         .vec_dot                  = ggml_vec_dot_iq2_xs_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
         .nrows                    = 1,
@@ -484,7 +484,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
     },
     [GGML_TYPE_IQ3_XXS] = {
         // NOTE: from_float for iq3 and iq2_s was removed because these quants require initialization in ggml_quantize_init
-        //.from_float               = quantize_row_iq3_xxs,
+        .from_float               = quantize_row_iq3_xxs,
         .vec_dot                  = ggml_vec_dot_iq3_xxs_q8_K,
 // #ifdef __AVX2__
         // .vec_dot_type             = GGML_TYPE_Q8_2_X4,
@@ -500,7 +500,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
         .nrows                    = 1,
     },
     [GGML_TYPE_IQ3_S] = {
-        //.from_float               = quantize_row_iq3_s,
+        .from_float               = quantize_row_iq3_s,
         .vec_dot                  = ggml_vec_dot_iq3_s_q8_K,
 // #ifdef __AVX2__
         // .vec_dot_type             = GGML_TYPE_Q8_2_X4,
@@ -516,7 +516,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
         .nrows                    = 1,
     },
     [GGML_TYPE_IQ2_S] = {
-        //.from_float               = quantize_row_iq2_s,
+        .from_float               = quantize_row_iq2_s,
         .vec_dot                  = ggml_vec_dot_iq2_s_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
         .nrows                    = 1,
@@ -528,7 +528,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
         .nrows                    = 1,
     },
     [GGML_TYPE_IQ1_S] = {
-        .from_float               = NULL,
+        .from_float               = quantize_row_iq1_s,
         .vec_dot                  = ggml_vec_dot_iq1_s_q8_K,
 // #ifdef __AVX2__
         // .vec_dot_type             = GGML_TYPE_Q8_2_X4,
@@ -544,7 +544,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
         .nrows                    = 1,
     },
     [GGML_TYPE_IQ1_M] = {
-        .from_float               = NULL,
+        .from_float               = quantize_row_iq1_m,
         .vec_dot                  = ggml_vec_dot_iq1_m_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
         .nrows                    = 1,