Skip to content

Commit 99d12de

Browse files
committed
Add quantize row declarations for IQ quants
1 parent dc650a0 commit 99d12de

File tree

3 files changed

+56
-8
lines changed

3 files changed

+56
-8
lines changed

ggml/src/ggml-cpu/ggml-cpu-quants.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13660,6 +13660,46 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v
1366013660
#endif
1366113661
}
1366213662

13663+
// ============================ IQ quants
13664+
13665+
void quantize_row_iq1_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
13666+
quantize_row_iq1_s_ref(x, (block_iq1_s *)y, k);
13667+
}
13668+
13669+
void quantize_row_iq1_m (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
13670+
quantize_row_iq1_m_ref(x, (block_iq1_m *)y, k);
13671+
}
13672+
13673+
void quantize_row_iq2_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
13674+
assert(k % QK_K == 0);
13675+
block_iq2_xxs * restrict y = vy;
13676+
quantize_row_iq2_xxs_ref(x, y, k);
13677+
}
13678+
13679+
void quantize_row_iq2_xs(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
13680+
assert(k % QK_K == 0);
13681+
block_iq2_xs * restrict y = vy;
13682+
quantize_row_iq2_xs_ref(x, y, k);
13683+
}
13684+
13685+
void quantize_row_iq2_s(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
13686+
assert(k % QK_K == 0);
13687+
block_iq2_s * restrict y = vy;
13688+
quantize_row_iq2_s_ref(x, y, k);
13689+
}
13690+
13691+
void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
13692+
assert(k % QK_K == 0);
13693+
block_iq3_xxs * restrict y = vy;
13694+
quantize_row_iq3_xxs_ref(x, y, k);
13695+
}
13696+
13697+
void quantize_row_iq3_s(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
13698+
assert(k % QK_K == 0);
13699+
block_iq3_s * restrict y = vy;
13700+
quantize_row_iq3_s_ref(x, y, k);
13701+
}
13702+
1366313703
// ============================ 4-bit non-linear quants
1366413704

1366513705
void quantize_row_iq4_nl(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {

ggml/src/ggml-cpu/ggml-cpu-quants.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
3232
void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
3333
void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
3434

35+
void quantize_row_iq1_s(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
36+
void quantize_row_iq1_m(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
37+
void quantize_row_iq2_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
38+
void quantize_row_iq2_xs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
39+
void quantize_row_iq2_s(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
40+
void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
41+
void quantize_row_iq3_s(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
42+
3543
void quantize_row_tq1_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
3644
void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
3745

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -450,12 +450,12 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
450450
[GGML_TYPE_Q8_K_R8] = {
451451
.from_float = quantize_row_q8_k_r8,
452452
.vec_dot = vec_dot_q8_k_r8_q8_k,
453-
// .vec_dot_type = GGML_TYPE_Q8_KR8,
453+
.vec_dot_type = GGML_TYPE_Q8_KR8,
454454
.vec_dot_type = GGML_TYPE_Q8_K,
455455
.nrows = 1,
456456
},
457457
[GGML_TYPE_IQ2_XXS] = {
458-
.from_float = NULL,
458+
.from_float = quantize_row_iq2_xxs,
459459
.vec_dot = ggml_vec_dot_iq2_xxs_q8_K,
460460
// #ifdef __AVX2__
461461
// .vec_dot_type = GGML_TYPE_Q8_2_X4,
@@ -471,7 +471,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
471471
.nrows = 1,
472472
},
473473
[GGML_TYPE_IQ2_XS] = {
474-
.from_float = NULL,
474+
.from_float = quantize_row_iq2_xs,
475475
.vec_dot = ggml_vec_dot_iq2_xs_q8_K,
476476
.vec_dot_type = GGML_TYPE_Q8_K,
477477
.nrows = 1,
@@ -484,7 +484,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
484484
},
485485
[GGML_TYPE_IQ3_XXS] = {
486486
// NOTE: from_float for iq3 and iq2_s was removed because these quants require initialization in ggml_quantize_init
487-
//.from_float = quantize_row_iq3_xxs,
487+
.from_float = quantize_row_iq3_xxs,
488488
.vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
489489
// #ifdef __AVX2__
490490
// .vec_dot_type = GGML_TYPE_Q8_2_X4,
@@ -500,7 +500,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
500500
.nrows = 1,
501501
},
502502
[GGML_TYPE_IQ3_S] = {
503-
//.from_float = quantize_row_iq3_s,
503+
.from_float = quantize_row_iq3_s,
504504
.vec_dot = ggml_vec_dot_iq3_s_q8_K,
505505
// #ifdef __AVX2__
506506
// .vec_dot_type = GGML_TYPE_Q8_2_X4,
@@ -516,7 +516,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
516516
.nrows = 1,
517517
},
518518
[GGML_TYPE_IQ2_S] = {
519-
//.from_float = quantize_row_iq2_s,
519+
.from_float = quantize_row_iq2_s,
520520
.vec_dot = ggml_vec_dot_iq2_s_q8_K,
521521
.vec_dot_type = GGML_TYPE_Q8_K,
522522
.nrows = 1,
@@ -528,7 +528,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
528528
.nrows = 1,
529529
},
530530
[GGML_TYPE_IQ1_S] = {
531-
.from_float = NULL,
531+
.from_float = quantize_row_iq1_s,
532532
.vec_dot = ggml_vec_dot_iq1_s_q8_K,
533533
// #ifdef __AVX2__
534534
// .vec_dot_type = GGML_TYPE_Q8_2_X4,
@@ -544,7 +544,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
544544
.nrows = 1,
545545
},
546546
[GGML_TYPE_IQ1_M] = {
547-
.from_float = NULL,
547+
.from_float = quantize_row_iq1_m,
548548
.vec_dot = ggml_vec_dot_iq1_m_q8_K,
549549
.vec_dot_type = GGML_TYPE_Q8_K,
550550
.nrows = 1,

0 commit comments

Comments
 (0)