Skip to content

Commit df8a2d3

Browse files
q4_4: general support for q4_4 and q8_4
1 parent 8e8f8ce commit df8a2d3

File tree

11 files changed

+450
-1
lines changed

11 files changed

+450
-1
lines changed

convert_llama_ggml_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ class GGMLFType(IntEnum):
4343
MOSTLY_Q5_K_M = 17
4444
MOSTLY_Q6_K = 18
4545

46+
MOSTLY_Q4_4 = 32
47+
MOSTLY_Q8_4 = 33
48+
4649

4750
class Hyperparameters:
4851
def __init__(self):

examples/quantize/quantize.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
4949
{ "Q4_0_4_4", LLAMA_FTYPE_MOSTLY_Q4_0_4_4, " 4.34G, +0.4685 ppl @ Llama-3-8B", },
5050
{ "Q4_0_4_8", LLAMA_FTYPE_MOSTLY_Q4_0_4_8, " 4.34G, +0.4685 ppl @ Llama-3-8B", },
5151
{ "Q4_0_8_8", LLAMA_FTYPE_MOSTLY_Q4_0_8_8, " 4.34G, +0.4685 ppl @ Llama-3-8B", },
52+
{ "Q4_4", LLAMA_FTYPE_MOSTLY_Q4_4, " 3.35G, ??? ppl @ LLaMA-v1-7B", },
53+
{ "Q8_4", LLAMA_FTYPE_MOSTLY_Q8_4, " 6.??G, ??? ppl @ LLaMA-v1-7B", },
5254
{ "F16", LLAMA_FTYPE_MOSTLY_F16, "14.00G, +0.0020 ppl @ Mistral-7B", },
5355
{ "BF16", LLAMA_FTYPE_MOSTLY_BF16, "14.00G, -0.0050 ppl @ Mistral-7B", },
5456
{ "F32", LLAMA_FTYPE_ALL_F32, "26.00G @ 7B", },

ggml/include/ggml.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,8 @@ extern "C" {
395395
GGML_TYPE_Q4_0_4_4 = 31,
396396
GGML_TYPE_Q4_0_4_8 = 32,
397397
GGML_TYPE_Q4_0_8_8 = 33,
398+
GGML_TYPE_Q4_4 = 34,
399+
GGML_TYPE_Q8_4 = 35,
398400
GGML_TYPE_COUNT,
399401
};
400402

@@ -439,6 +441,8 @@ extern "C" {
439441
GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
440442
GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
441443
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
444+
GGML_FTYPE_MOSTLY_Q4_4 = 28, // except 1d tensors
445+
GGML_FTYPE_MOSTLY_Q8_4 = 29, // except 1d tensors
442446
};
443447

444448
// available tensor operations:

ggml/src/ggml-common.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,20 @@ typedef struct {
227227
} block_q8_0x8;
228228
static_assert(sizeof(block_q8_0x8) == 8 * sizeof(ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
229229

230+
#define QK4_4 128
231+
typedef struct {
232+
ggml_half d; // delta
233+
uint8_t qs[QK4_4 / 2]; // nibbles / quants
234+
} block_q4_4;
235+
static_assert(sizeof(block_q4_4) == sizeof(ggml_half) + QK4_4 / 2, "wrong q4_4 block size/padding");
236+
237+
#define QK8_4 128
238+
typedef struct {
239+
ggml_half d; // delta
240+
int8_t qs[QK8_4]; // quants
241+
} block_q8_4;
242+
static_assert(sizeof(block_q8_4) == sizeof(ggml_half) + QK8_4, "wrong q8_4 block size/padding");
243+
230244
//
231245
// Super-block quantization structures
232246
//

0 commit comments

Comments
 (0)