Skip to content

Commit a08501e

Browse files
ikawrakowIwan Kawrakow
andauthored
Rename q4_0_r4, q8_0_r4 and iq4_xs_r4 to _r8 (#189)
* Rename q4_0_r4 to q4_0_r8 to reflect actual row interleaving * Rename q8_0_r4 to q8_0_r8 to reflect actual row interleaving * Rename iq4_xs_r4 to iq4_xs_r8 to reflect actual row interleaving --------- Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent 7f61b30 commit a08501e

File tree

10 files changed

+197
-197
lines changed

10 files changed

+197
-197
lines changed

examples/quantize/quantize.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
5151
{ "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L, " 3.35G, +0.1764 ppl @ LLaMA-v1-7B", },
5252
{ "IQ4_NL", LLAMA_FTYPE_MOSTLY_IQ4_NL, " 4.50 bpw non-linear quantization", },
5353
{ "IQ4_NL_R4",LLAMA_FTYPE_MOSTLY_IQ4_NL_R4," 4.50 bpw non-linear quantization", },
54-
{ "IQ4_XS_R4",LLAMA_FTYPE_MOSTLY_IQ4_XS_R4," 4.25 bpw non-linear quantization", },
55-
{ "Q4_0_R4", LLAMA_FTYPE_MOSTLY_Q4_0_R4, " 4.50 bpw quantization", },
54+
{ "IQ4_XS_R8",LLAMA_FTYPE_MOSTLY_IQ4_XS_R8," 4.25 bpw non-linear quantization", },
55+
{ "Q4_0_R8", LLAMA_FTYPE_MOSTLY_Q4_0_R8, " 4.50 bpw quantization", },
5656
{ "Q5_0_R4", LLAMA_FTYPE_MOSTLY_Q5_0_R4, " 5.50 bpw quantization", },
5757
{ "Q6_0_R4", LLAMA_FTYPE_MOSTLY_Q6_0_R4, " 6.50 bpw quantization", },
58-
{ "Q8_0_R4", LLAMA_FTYPE_MOSTLY_Q8_0_R4, " 8.50 bpw quantization", },
58+
{ "Q8_0_R8", LLAMA_FTYPE_MOSTLY_Q8_0_R8, " 8.50 bpw quantization", },
5959
{ "IQ4_XS", LLAMA_FTYPE_MOSTLY_IQ4_XS, " 4.25 bpw non-linear quantization", },
6060
{ "IQ4_KS", LLAMA_FTYPE_MOSTLY_IQ4_KS, " 4.25 bpw non-linear quantization", },
6161
{ "IQ4_KS_R4",LLAMA_FTYPE_MOSTLY_IQ4_KS_R4,"IQ4_KS repacked", },

ggml/include/ggml.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -416,9 +416,9 @@ extern "C" {
416416
GGML_TYPE_Q8_K32 = 148,
417417
GGML_TYPE_Q8_KR8 = 149,
418418

419-
GGML_TYPE_Q4_0_R4 = 202,
419+
GGML_TYPE_Q4_0_R8 = 202,
420420
GGML_TYPE_Q5_0_R4 = 206,
421-
GGML_TYPE_Q8_0_R4 = 208,
421+
GGML_TYPE_Q8_0_R8 = 208,
422422
GGML_TYPE_Q2_K_R4 = 210,
423423
GGML_TYPE_Q3_K_R4 = 211,
424424
GGML_TYPE_Q4_K_R4 = 212,
@@ -431,7 +431,7 @@ extern "C" {
431431
GGML_TYPE_IQ4_NL_R4 = 220,
432432
GGML_TYPE_IQ3_S_R4 = 221,
433433
GGML_TYPE_IQ2_S_R4 = 222,
434-
GGML_TYPE_IQ4_XS_R4 = 223,
434+
GGML_TYPE_IQ4_XS_R8 = 223,
435435
GGML_TYPE_IQ1_M_R4 = 229,
436436
GGML_TYPE_BF16_R16 = 230,
437437
GGML_TYPE_Q6_0_R4 = 233,
@@ -501,8 +501,8 @@ extern "C" {
501501
GGML_FTYPE_MOSTLY_IQ2_KS = 138, // except 1d tensors
502502
GGML_FTYPE_MOSTLY_IQ4_KSS = 139, // except 1d tensors
503503
//
504-
GGML_FTYPE_MOSTLY_Q4_0_R4 = 202, // except 1d tensors
505-
GGML_FTYPE_MOSTLY_Q8_0_R4 = 207, // except 1d tensors
504+
GGML_FTYPE_MOSTLY_Q4_0_R8 = 202, // except 1d tensors
505+
GGML_FTYPE_MOSTLY_Q8_0_R8 = 207, // except 1d tensors
506506
GGML_FTYPE_MOSTLY_Q5_0_R4 = 208, // except 1d tensors
507507
GGML_FTYPE_MOSTLY_Q2_K_R4 = 210, // except 1d tensors
508508
GGML_FTYPE_MOSTLY_Q3_K_R4 = 211, // except 1d tensors
@@ -516,7 +516,7 @@ extern "C" {
516516
GGML_FTYPE_MOSTLY_IQ4_NL_R4 = 219, // except 1d tensors
517517
GGML_FTYPE_MOSTLY_IQ3_S_R4 = 220, // except 1d tensors
518518
GGML_FTYPE_MOSTLY_IQ2_S_R4 = 221, // except 1d tensors
519-
GGML_FTYPE_MOSTLY_IQ4_XS_R4 = 222, // except 1d tensors
519+
GGML_FTYPE_MOSTLY_IQ4_XS_R8 = 222, // except 1d tensors
520520
GGML_FTYPE_MOSTLY_IQ1_M_R4 = 223, // except 1d tensors
521521
GGML_FTYPE_MOSTLY_BF16_R16 = 224, // except 1d tensors
522522
GGML_FTYPE_MOSTLY_Q6_0_R4 = 227, // except 1d tensors

ggml/src/ggml-common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -562,8 +562,8 @@ typedef struct {
562562
uint8_t scales_h[QK_K/16];
563563
uint8_t scales_l[QK_K/ 8];
564564
uint8_t qs[QK_K*4];
565-
} block_iq4_xs_r4;
566-
static_assert(sizeof(block_iq4_xs_r4) == 8*sizeof(block_iq4_xs), "wrong iq4_xs_rs block size/padding");
565+
} block_iq4_xs_r8;
566+
static_assert(sizeof(block_iq4_xs_r8) == 8*sizeof(block_iq4_xs), "wrong iq4_xs_rs block size/padding");
567567

568568
typedef struct {
569569
uint8_t scales[QK_K/32];

ggml/src/ggml-quants.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15193,18 +15193,18 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
1519315193
case GGML_TYPE_IQ4_KS: break;
1519415194
case GGML_TYPE_IQ4_KSS: break;
1519515195
case GGML_TYPE_IQ4_NL_R4: break;
15196-
case GGML_TYPE_IQ4_XS_R4: break;
15196+
case GGML_TYPE_IQ4_XS_R8: break;
1519715197
case GGML_TYPE_IQ2_XXS_R4: break;
1519815198
case GGML_TYPE_IQ2_XS_R4: break;
1519915199
case GGML_TYPE_IQ3_XXS_R4: break;
1520015200
case GGML_TYPE_IQ3_S_R4: break;
1520115201
case GGML_TYPE_IQ2_S_R4: break;
1520215202
case GGML_TYPE_IQ1_S_R4: break;
1520315203
case GGML_TYPE_IQ1_M_R4: break;
15204-
case GGML_TYPE_Q4_0_R4: break;
15204+
case GGML_TYPE_Q4_0_R8: break;
1520515205
case GGML_TYPE_Q5_0_R4: break;
1520615206
case GGML_TYPE_Q6_0_R4: break;
15207-
case GGML_TYPE_Q8_0_R4: break;
15207+
case GGML_TYPE_Q8_0_R8: break;
1520815208
case GGML_TYPE_Q2_K_R4: break;
1520915209
case GGML_TYPE_Q3_K_R4: break;
1521015210
case GGML_TYPE_Q4_K_R4: break;

ggml/src/ggml.c

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1606,28 +1606,28 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
16061606
.nrows = 1,
16071607
.row_meta_size = 0,
16081608
},
1609-
[GGML_TYPE_IQ4_XS_R4] = {
1610-
.type_name = "iq4_xs_r4",
1609+
[GGML_TYPE_IQ4_XS_R8] = {
1610+
.type_name = "iq4_xs_r8",
16111611
.blck_size = QK_K,
16121612
.type_size = sizeof(block_iq4_xs),
16131613
.is_quantized = true,
1614-
.to_float = (ggml_to_float_t) dequantize_row_iq4_xs_r4,
1615-
.from_float = quantize_row_iq4_xs_r4,
1616-
.from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_r4_ref,
1617-
.vec_dot = vec_dot_iq4_xs_r4_q8_k,
1614+
.to_float = (ggml_to_float_t) dequantize_row_iq4_xs_r8,
1615+
.from_float = quantize_row_iq4_xs_r8,
1616+
.from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_r8_ref,
1617+
.vec_dot = vec_dot_iq4_xs_r8_q8_k,
16181618
.vec_dot_type = GGML_TYPE_Q8_K32,
16191619
.nrows = 1,
16201620
.row_meta_size = 0,
16211621
},
1622-
[GGML_TYPE_Q4_0_R4] = {
1623-
.type_name = "q4_0_r4",
1622+
[GGML_TYPE_Q4_0_R8] = {
1623+
.type_name = "q4_0_r8",
16241624
.blck_size = QK4_NL,
16251625
.type_size = sizeof(block_iq4_nl),
16261626
.is_quantized = true,
1627-
.to_float = (ggml_to_float_t) dequantize_row_q4_0_r4,
1628-
.from_float = quantize_row_q4_0_r4,
1629-
.from_float_ref = (ggml_from_float_t)quantize_row_q4_0_r4_ref,
1630-
.vec_dot = vec_dot_q4_0_r4_q8_0,
1627+
.to_float = (ggml_to_float_t) dequantize_row_q4_0_r8,
1628+
.from_float = quantize_row_q4_0_r8,
1629+
.from_float_ref = (ggml_from_float_t)quantize_row_q4_0_r8_ref,
1630+
.vec_dot = vec_dot_q4_0_r8_q8_0,
16311631
#if GGML_USE_IQK_MULMAT
16321632
#if defined __AVX2__
16331633
.vec_dot_type = GGML_TYPE_Q8_1_X4,
@@ -1640,15 +1640,15 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
16401640
.nrows = 1,
16411641
.row_meta_size = 0,
16421642
},
1643-
[GGML_TYPE_Q8_0_R4] = {
1644-
.type_name = "q8_0_r4",
1643+
[GGML_TYPE_Q8_0_R8] = {
1644+
.type_name = "q8_0_r8",
16451645
.blck_size = QK8_0,
16461646
.type_size = sizeof(block_q8_0),
16471647
.is_quantized = true,
1648-
.to_float = (ggml_to_float_t) dequantize_row_q8_0_r4,
1649-
.from_float = quantize_row_q8_0_r4,
1650-
.from_float_ref = (ggml_from_float_t)quantize_row_q8_0_r4_ref,
1651-
.vec_dot = vec_dot_q8_0_r4_q8_0,
1648+
.to_float = (ggml_to_float_t) dequantize_row_q8_0_r8,
1649+
.from_float = quantize_row_q8_0_r8,
1650+
.from_float_ref = (ggml_from_float_t)quantize_row_q8_0_r8_ref,
1651+
.vec_dot = vec_dot_q8_0_r8_q8_0,
16521652
#if GGML_USE_IQK_MULMAT
16531653
#if defined __AVX2__
16541654
.vec_dot_type = GGML_TYPE_Q8_1_X4,
@@ -4390,11 +4390,11 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
43904390
case GGML_FTYPE_MOSTLY_IQ2_BN_R4: wtype = GGML_TYPE_IQ2_BN_R4;break;
43914391
case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
43924392
case GGML_FTYPE_MOSTLY_IQ4_NL_R4: wtype = GGML_TYPE_IQ4_NL_R4;break;
4393-
case GGML_FTYPE_MOSTLY_IQ4_XS_R4: wtype = GGML_TYPE_IQ4_XS_R4;break;
4394-
case GGML_FTYPE_MOSTLY_Q4_0_R4: wtype = GGML_TYPE_Q4_0_R4; break;
4393+
case GGML_FTYPE_MOSTLY_IQ4_XS_R8: wtype = GGML_TYPE_IQ4_XS_R8;break;
4394+
case GGML_FTYPE_MOSTLY_Q4_0_R8: wtype = GGML_TYPE_Q4_0_R8; break;
43954395
case GGML_FTYPE_MOSTLY_Q5_0_R4: wtype = GGML_TYPE_Q5_0_R4; break;
43964396
case GGML_FTYPE_MOSTLY_Q6_0_R4: wtype = GGML_TYPE_Q6_0_R4; break;
4397-
case GGML_FTYPE_MOSTLY_Q8_0_R4: wtype = GGML_TYPE_Q8_0_R4; break;
4397+
case GGML_FTYPE_MOSTLY_Q8_0_R8: wtype = GGML_TYPE_Q8_0_R8; break;
43984398
case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
43994399
case GGML_FTYPE_MOSTLY_IQ4_KS: wtype = GGML_TYPE_IQ4_KS; break;
44004400
case GGML_FTYPE_MOSTLY_IQ4_KS_R4: wtype = GGML_TYPE_IQ4_KS_R4;break;
@@ -10938,12 +10938,12 @@ static void ggml_compute_forward_add(
1093810938
case GGML_TYPE_IQ2_BN_R4:
1093910939
case GGML_TYPE_IQ4_NL:
1094010940
case GGML_TYPE_IQ4_NL_R4:
10941-
case GGML_TYPE_IQ4_XS_R4:
10942-
case GGML_TYPE_Q4_0_R4:
10941+
case GGML_TYPE_IQ4_XS_R8:
10942+
case GGML_TYPE_Q4_0_R8:
1094310943
case GGML_TYPE_Q5_0_R4:
1094410944
case GGML_TYPE_Q6_0_R4:
1094510945
case GGML_TYPE_I2_S:
10946-
case GGML_TYPE_Q8_0_R4:
10946+
case GGML_TYPE_Q8_0_R8:
1094710947
case GGML_TYPE_IQ4_XS:
1094810948
case GGML_TYPE_IQ4_KS:
1094910949
case GGML_TYPE_IQ4_KS_R4:
@@ -11408,12 +11408,12 @@ static void ggml_compute_forward_add1(
1140811408
case GGML_TYPE_IQ2_BN_R4:
1140911409
case GGML_TYPE_IQ4_NL:
1141011410
case GGML_TYPE_IQ4_NL_R4:
11411-
case GGML_TYPE_IQ4_XS_R4:
11412-
case GGML_TYPE_Q4_0_R4:
11411+
case GGML_TYPE_IQ4_XS_R8:
11412+
case GGML_TYPE_Q4_0_R8:
1141311413
case GGML_TYPE_Q5_0_R4:
1141411414
case GGML_TYPE_Q6_0_R4:
1141511415
case GGML_TYPE_I2_S:
11416-
case GGML_TYPE_Q8_0_R4:
11416+
case GGML_TYPE_Q8_0_R8:
1141711417
case GGML_TYPE_IQ4_XS:
1141811418
case GGML_TYPE_IQ4_KS:
1141911419
case GGML_TYPE_IQ4_KS_R4:
@@ -11575,12 +11575,12 @@ static void ggml_compute_forward_acc(
1157511575
case GGML_TYPE_IQ2_BN_R4:
1157611576
case GGML_TYPE_IQ4_NL:
1157711577
case GGML_TYPE_IQ4_NL_R4:
11578-
case GGML_TYPE_IQ4_XS_R4:
11579-
case GGML_TYPE_Q4_0_R4:
11578+
case GGML_TYPE_IQ4_XS_R8:
11579+
case GGML_TYPE_Q4_0_R8:
1158011580
case GGML_TYPE_Q5_0_R4:
1158111581
case GGML_TYPE_Q6_0_R4:
1158211582
case GGML_TYPE_I2_S:
11583-
case GGML_TYPE_Q8_0_R4:
11583+
case GGML_TYPE_Q8_0_R8:
1158411584
case GGML_TYPE_IQ4_XS:
1158511585
case GGML_TYPE_IQ4_KS:
1158611586
case GGML_TYPE_IQ4_KS_R4:
@@ -14815,12 +14815,12 @@ static void ggml_compute_forward_out_prod(
1481514815
case GGML_TYPE_IQ2_BN_R4:
1481614816
case GGML_TYPE_IQ4_NL:
1481714817
case GGML_TYPE_IQ4_NL_R4:
14818-
case GGML_TYPE_IQ4_XS_R4:
14819-
case GGML_TYPE_Q4_0_R4:
14818+
case GGML_TYPE_IQ4_XS_R8:
14819+
case GGML_TYPE_Q4_0_R8:
1482014820
case GGML_TYPE_Q5_0_R4:
1482114821
case GGML_TYPE_Q6_0_R4:
1482214822
case GGML_TYPE_I2_S:
14823-
case GGML_TYPE_Q8_0_R4:
14823+
case GGML_TYPE_Q8_0_R8:
1482414824
case GGML_TYPE_IQ4_XS:
1482514825
case GGML_TYPE_IQ4_KS:
1482614826
case GGML_TYPE_IQ4_KS_R4:
@@ -15222,12 +15222,12 @@ static void ggml_compute_forward_set(
1522215222
case GGML_TYPE_IQ2_BN_R4:
1522315223
case GGML_TYPE_IQ4_NL:
1522415224
case GGML_TYPE_IQ4_NL_R4:
15225-
case GGML_TYPE_IQ4_XS_R4:
15226-
case GGML_TYPE_Q4_0_R4:
15225+
case GGML_TYPE_IQ4_XS_R8:
15226+
case GGML_TYPE_Q4_0_R8:
1522715227
case GGML_TYPE_Q5_0_R4:
1522815228
case GGML_TYPE_Q6_0_R4:
1522915229
case GGML_TYPE_I2_S:
15230-
case GGML_TYPE_Q8_0_R4:
15230+
case GGML_TYPE_Q8_0_R8:
1523115231
case GGML_TYPE_IQ4_XS:
1523215232
case GGML_TYPE_IQ4_KS:
1523315233
case GGML_TYPE_IQ4_KS_R4:
@@ -15523,12 +15523,12 @@ static void ggml_compute_forward_get_rows(
1552315523
case GGML_TYPE_IQ2_BN_R4:
1552415524
case GGML_TYPE_IQ4_NL:
1552515525
case GGML_TYPE_IQ4_NL_R4:
15526-
case GGML_TYPE_IQ4_XS_R4:
15527-
case GGML_TYPE_Q4_0_R4:
15526+
case GGML_TYPE_IQ4_XS_R8:
15527+
case GGML_TYPE_Q4_0_R8:
1552815528
case GGML_TYPE_Q5_0_R4:
1552915529
case GGML_TYPE_Q6_0_R4:
1553015530
case GGML_TYPE_I2_S:
15531-
case GGML_TYPE_Q8_0_R4:
15531+
case GGML_TYPE_Q8_0_R8:
1553215532
case GGML_TYPE_IQ4_XS:
1553315533
case GGML_TYPE_IQ4_KS:
1553415534
case GGML_TYPE_IQ4_KS_R4:
@@ -16153,12 +16153,12 @@ static void ggml_compute_forward_clamp(
1615316153
case GGML_TYPE_IQ2_BN_R4:
1615416154
case GGML_TYPE_IQ4_NL:
1615516155
case GGML_TYPE_IQ4_NL_R4:
16156-
case GGML_TYPE_IQ4_XS_R4:
16157-
case GGML_TYPE_Q4_0_R4:
16156+
case GGML_TYPE_IQ4_XS_R8:
16157+
case GGML_TYPE_Q4_0_R8:
1615816158
case GGML_TYPE_Q5_0_R4:
1615916159
case GGML_TYPE_Q6_0_R4:
1616016160
case GGML_TYPE_I2_S:
16161-
case GGML_TYPE_Q8_0_R4:
16161+
case GGML_TYPE_Q8_0_R8:
1616216162
case GGML_TYPE_IQ4_XS:
1616316163
case GGML_TYPE_IQ4_KS:
1616416164
case GGML_TYPE_IQ4_KS_R4:
@@ -23028,11 +23028,11 @@ size_t ggml_quantize_chunk(
2302823028
case GGML_TYPE_IQ2_BN_R4:result = quantize_iq2_bn_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2302923029
case GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303023030
case GGML_TYPE_IQ4_NL_R4: result = quantize_iq4_nl_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23031-
case GGML_TYPE_IQ4_XS_R4: result = quantize_iq4_xs_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23032-
case GGML_TYPE_Q4_0_R4: result = quantize_q4_0_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23031+
case GGML_TYPE_IQ4_XS_R8: result = quantize_iq4_xs_r8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23032+
case GGML_TYPE_Q4_0_R8: result = quantize_q4_0_r8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303323033
case GGML_TYPE_Q5_0_R4: result = quantize_q5_0_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303423034
case GGML_TYPE_Q6_0_R4: result = quantize_q6_0_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23035-
case GGML_TYPE_Q8_0_R4: result = quantize_q8_0_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
23035+
case GGML_TYPE_Q8_0_R8: result = quantize_q8_0_r8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303623036
case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303723037
case GGML_TYPE_IQ4_KS: result = quantize_iq4_ks (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
2303823038
case GGML_TYPE_IQ4_KS_R4:result = quantize_iq4_ks_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;

0 commit comments

Comments
 (0)