Skip to content

Commit 5ae5971

Browse files
committed
Revamp Q2_K and Q3_K quants
Q3_K_XL takes the place of Q3_K_L. Q3_K_L becomes intermediary between Q3_K_M and XL.
1 parent 1bde168 commit 5ae5971

File tree

4 files changed

+97
-50
lines changed

4 files changed

+97
-50
lines changed

examples/quantize/quantize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
4141
{ "IQ3_XS", LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization", },
4242
{ "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G, +1.6321 ppl @ Llama-3-8B", },
4343
{ "Q3_K_M", LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.74G, +0.6569 ppl @ Llama-3-8B", },
44-
{ "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L, " 4.03G, +0.5562 ppl @ Llama-3-8B", },
44+
{ "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L, " 4.10 bpw quantization mix", },
45+
{ "Q3_K_XL", LLAMA_FTYPE_MOSTLY_Q3_K_XL, " 4.03G, +0.5562 ppl @ Llama-3-8B", },
4546
{ "IQ4_NL", LLAMA_FTYPE_MOSTLY_IQ4_NL, " 4.50 bpw non-linear quantization", },
4647
{ "IQ4_XS", LLAMA_FTYPE_MOSTLY_IQ4_XS, " 4.25 bpw non-linear quantization", },
4748
{ "IQ4_XSR", LLAMA_FTYPE_MOSTLY_IQ4_XSR, " 4.xx bpw non-linear quantization", },

gguf-py/gguf/constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1227,7 +1227,7 @@ class LlamaFileType(IntEnum):
12271227
MOSTLY_Q2_K = 10 # except 1d tensors
12281228
MOSTLY_Q3_K_S = 11 # except 1d tensors
12291229
MOSTLY_Q3_K_M = 12 # except 1d tensors
1230-
MOSTLY_Q3_K_L = 13 # except 1d tensors
1230+
MOSTLY_Q3_K_XL = 13 # except 1d tensors
12311231
MOSTLY_Q4_K_S = 14 # except 1d tensors
12321232
MOSTLY_Q4_K_M = 15 # except 1d tensors
12331233
MOSTLY_Q5_K_S = 16 # except 1d tensors
@@ -1257,6 +1257,7 @@ class LlamaFileType(IntEnum):
12571257
MOSTLY_IQ1_XL = 42 # except 1d tensors
12581258
MOSTLY_IQ4_XSR = 43 # except 1d tensors
12591259
MOSTLY_IQ3_XXL = 44 # except 1d tensors
1260+
MOSTLY_Q3_K_L = 45 # except 1d tensors
12601261

12611262
GUESSED = 1024 # not specified in the model file
12621263

include/llama.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ extern "C" {
143143
LLAMA_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
144144
LLAMA_FTYPE_MOSTLY_Q3_K_S = 11, // except 1d tensors
145145
LLAMA_FTYPE_MOSTLY_Q3_K_M = 12, // except 1d tensors
146-
LLAMA_FTYPE_MOSTLY_Q3_K_L = 13, // except 1d tensors
146+
LLAMA_FTYPE_MOSTLY_Q3_K_XL = 13, // except 1d tensors
147147
LLAMA_FTYPE_MOSTLY_Q4_K_S = 14, // except 1d tensors
148148
LLAMA_FTYPE_MOSTLY_Q4_K_M = 15, // except 1d tensors
149149
LLAMA_FTYPE_MOSTLY_Q5_K_S = 16, // except 1d tensors
@@ -173,6 +173,7 @@ extern "C" {
173173
LLAMA_FTYPE_MOSTLY_IQ1_XL = 42, // except 1d tensors
174174
LLAMA_FTYPE_MOSTLY_IQ4_XSR = 43, // except 1d tensors
175175
LLAMA_FTYPE_MOSTLY_IQ3_XXL = 44, // except 1d tensors
176+
LLAMA_FTYPE_MOSTLY_Q3_K_L = 45, // except 1d tensors
176177
LLAMA_FTYPE_CQS = 99, // except 1d tensors
177178

178179
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file

0 commit comments

Comments
 (0)