Skip to content

Commit d0b5207

Browse files
ikawrakowIwan Kawrakow
andauthored
Use bf16 instead of fp16 block scales for q8_1 (#292)
* WIP - not working * q8_0 without bells and wistles works * It works for q8_0 * Use bf16 instead of f16,int16 * q4_0_r8 * q5_0_r4 * q6_0_r4 * Also q4_1 and q5_1 * q8_0_r8 on avx2 --------- Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent a22250d commit d0b5207

File tree

6 files changed

+348
-255
lines changed

6 files changed

+348
-255
lines changed

ggml/include/ggml.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,8 +396,9 @@ extern "C" {
396396
//
397397
GGML_TYPE_I2_S = 36,
398398
//
399-
GGML_TYPE_Q8_0_X4 = 98,
400-
GGML_TYPE_Q8_1_X4 = 99,
399+
GGML_TYPE_Q8_0_X4 = 97,
400+
GGML_TYPE_Q8_1_X4 = 98,
401+
GGML_TYPE_Q8_2_X4 = 99,
401402
GGML_TYPE_Q6_0 = 133,
402403
GGML_TYPE_IQ1_BN = 134,
403404
GGML_TYPE_IQ2_BN = 135,

ggml/src/ggml-common.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,20 @@ typedef struct {
266266
} block_q8_0x8;
267267
static_assert(sizeof(block_q8_0x8) == 8 * sizeof(ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
268268

269+
#define QK8_2 32
270+
typedef struct {
271+
uint16_t d;
272+
uint16_t s;
273+
int8_t qs[QK8_2]; // quants
274+
} block_q8_2;
275+
static_assert(sizeof(block_q8_2) == sizeof(ggml_half) + sizeof(int16_t) + QK8_2, "wrong q8_2 block size/padding");
276+
277+
typedef struct {
278+
uint16_t d[8];
279+
int8_t qs[4*QK8_2];
280+
} block_q8_2_x4;
281+
static_assert(sizeof(block_q8_2_x4) == 4*sizeof(block_q8_2), "wrong q8_2_x4 block size/padding");
282+
269283
//
270284
// Super-block quantization structures
271285
//

ggml/src/ggml.c

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
717717
.vec_dot = ggml_vec_dot_q4_0_q8_0,
718718
#if GGML_USE_IQK_MULMAT
719719
#if defined __AVX2__
720-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
720+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
721721
#else
722722
.vec_dot_type = GGML_TYPE_Q8_0_X4,
723723
#endif
@@ -741,7 +741,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
741741
.from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref,
742742
.vec_dot = ggml_vec_dot_q4_1_q8_1,
743743
#if GGML_USE_IQK_MULMAT
744-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
744+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
745745
#else
746746
.vec_dot_type = GGML_TYPE_Q8_1,
747747
#endif
@@ -789,7 +789,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
789789
.vec_dot = ggml_vec_dot_q5_0_q8_0,
790790
#if GGML_USE_IQK_MULMAT
791791
#if defined __AVX2__
792-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
792+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
793793
#else
794794
.vec_dot_type = GGML_TYPE_Q8_0_X4,
795795
#endif
@@ -809,7 +809,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
809809
.from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref,
810810
.vec_dot = ggml_vec_dot_q5_1_q8_1,
811811
#if GGML_USE_IQK_MULMAT
812-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
812+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
813813
#else
814814
.vec_dot_type = GGML_TYPE_Q8_1,
815815
#endif
@@ -827,7 +827,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
827827
.vec_dot = ggml_vec_dot_q6_0_q8_0,
828828
#if GGML_USE_IQK_MULMAT
829829
#if defined __AVX2__
830-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
830+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
831831
#else
832832
.vec_dot_type = GGML_TYPE_Q8_0_X4,
833833
#endif
@@ -852,7 +852,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
852852
// Remember: we cannot add 128 to the Q8 quants and use iblock sum in Q8_1 to subtract as we do on Zen4 for pure AVX2
853853
// because there the result of the _mm256_maddubs_epi16() instruction may overflow the int16_t range
854854
// (and it gets satured if it does), leading to wrong results.
855-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
855+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
856856
#else
857857
.vec_dot_type = GGML_TYPE_Q8_0_X4,
858858
#endif
@@ -897,6 +897,16 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
897897
.nrows = 1,
898898
.row_meta_size = 0,
899899
},
900+
[GGML_TYPE_Q8_2_X4] = {
901+
.type_name = "q8_2_x4",
902+
.blck_size = QK8_2,
903+
.type_size = sizeof(block_q8_2),
904+
.is_quantized = true,
905+
.from_float = quantize_row_q8_2_x4,
906+
.from_float_ref = quantize_row_q8_2_x4,
907+
.nrows = 1,
908+
.row_meta_size = 0,
909+
},
900910
[GGML_TYPE_Q2_K] = {
901911
.type_name = "q2_K",
902912
.blck_size = QK_K,
@@ -1272,7 +1282,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
12721282
.vec_dot = ggml_vec_dot_iq4_nl_q8_0,
12731283
#if GGML_USE_IQK_MULMAT
12741284
#if defined __AVX2__
1275-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
1285+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
12761286
#else
12771287
.vec_dot_type = GGML_TYPE_Q8_0_X4,
12781288
#endif
@@ -1628,7 +1638,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
16281638
.vec_dot = vec_dot_iq4_nl_r4_q8_0,
16291639
#if GGML_USE_IQK_MULMAT
16301640
#if defined __AVX2__
1631-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
1641+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
16321642
#else
16331643
.vec_dot_type = GGML_TYPE_Q8_0_X4,
16341644
#endif
@@ -1662,7 +1672,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
16621672
.vec_dot = vec_dot_q4_0_r8_q8_0,
16631673
#if GGML_USE_IQK_MULMAT
16641674
#if defined __AVX2__
1665-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
1675+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
16661676
#else
16671677
.vec_dot_type = GGML_TYPE_Q8_0_X4,
16681678
#endif
@@ -1683,7 +1693,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
16831693
.vec_dot = vec_dot_q8_0_r8_q8_0,
16841694
#if GGML_USE_IQK_MULMAT
16851695
#if defined __AVX2__
1686-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
1696+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
16871697
#else
16881698
.vec_dot_type = GGML_TYPE_Q8_0_X4,
16891699
#endif
@@ -1704,7 +1714,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
17041714
.vec_dot = vec_dot_q5_0_r4_q8_0,
17051715
#if GGML_USE_IQK_MULMAT
17061716
#if defined __AVX2__
1707-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
1717+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
17081718
#else
17091719
.vec_dot_type = GGML_TYPE_Q8_0_X4,
17101720
#endif
@@ -1725,7 +1735,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
17251735
.vec_dot = vec_dot_q6_0_r4_q8_0,
17261736
#if GGML_USE_IQK_MULMAT
17271737
#if defined __AVX2__
1728-
.vec_dot_type = GGML_TYPE_Q8_1_X4,
1738+
.vec_dot_type = GGML_TYPE_Q8_2_X4,
17291739
#else
17301740
.vec_dot_type = GGML_TYPE_Q8_0_X4,
17311741
#endif
@@ -11647,6 +11657,7 @@ static void ggml_compute_forward_add1(
1164711657
case GGML_TYPE_Q8_1:
1164811658
case GGML_TYPE_Q8_0_X4:
1164911659
case GGML_TYPE_Q8_1_X4:
11660+
case GGML_TYPE_Q8_2_X4:
1165011661
case GGML_TYPE_Q2_K:
1165111662
case GGML_TYPE_Q2_K_R4:
1165211663
case GGML_TYPE_Q3_K:
@@ -11815,6 +11826,7 @@ static void ggml_compute_forward_acc(
1181511826
case GGML_TYPE_Q8_1:
1181611827
case GGML_TYPE_Q8_0_X4:
1181711828
case GGML_TYPE_Q8_1_X4:
11829+
case GGML_TYPE_Q8_2_X4:
1181811830
case GGML_TYPE_Q2_K:
1181911831
case GGML_TYPE_Q2_K_R4:
1182011832
case GGML_TYPE_Q3_K:
@@ -15690,6 +15702,7 @@ static void ggml_compute_forward_set(
1569015702
case GGML_TYPE_Q8_1:
1569115703
case GGML_TYPE_Q8_0_X4:
1569215704
case GGML_TYPE_Q8_1_X4:
15705+
case GGML_TYPE_Q8_2_X4:
1569315706
case GGML_TYPE_Q2_K:
1569415707
case GGML_TYPE_Q2_K_R4:
1569515708
case GGML_TYPE_Q3_K:
@@ -15997,6 +16010,7 @@ static void ggml_compute_forward_get_rows(
1599716010
case GGML_TYPE_Q8_1:
1599816011
case GGML_TYPE_Q8_0_X4:
1599916012
case GGML_TYPE_Q8_1_X4:
16013+
case GGML_TYPE_Q8_2_X4:
1600016014
case GGML_TYPE_Q2_K:
1600116015
case GGML_TYPE_Q2_K_R4:
1600216016
case GGML_TYPE_Q3_K:
@@ -16627,6 +16641,7 @@ static void ggml_compute_forward_clamp(
1662716641
case GGML_TYPE_Q8_1:
1662816642
case GGML_TYPE_Q8_0_X4:
1662916643
case GGML_TYPE_Q8_1_X4:
16644+
case GGML_TYPE_Q8_2_X4:
1663016645
case GGML_TYPE_Q2_K:
1663116646
case GGML_TYPE_Q2_K_R4:
1663216647
case GGML_TYPE_Q3_K:

0 commit comments

Comments
 (0)