@@ -67,40 +67,42 @@ IMPORTANCE_MATRIX_DIRECTORY=.\imatrix
6767#
6868# Possible llama.cpp quantization types:
6969#
70- # 2 or Q4_0 : 4.34G, +0.4685 ppl @ Llama-3-8B
71- # 3 or Q4_1 : 4.78G, +0.4511 ppl @ Llama-3-8B
72- # 8 or Q5_0 : 5.21G, +0.1316 ppl @ Llama-3-8B
73- # 9 or Q5_1 : 5.65G, +0.1062 ppl @ Llama-3-8B
74- # 19 or IQ2_XXS : 2.06 bpw quantization
75- # 20 or IQ2_XS : 2.31 bpw quantization
76- # 28 or IQ2_S : 2.5 bpw quantization
77- # 29 or IQ2_M : 2.7 bpw quantization
78- # 24 or IQ1_S : 1.56 bpw quantization
79- # 31 or IQ1_M : 1.75 bpw quantization
80- # 10 or Q2_K : 2.96G, +3.5199 ppl @ Llama-3-8B
81- # 21 or Q2_K_S : 2.96G, +3.1836 ppl @ Llama-3-8B
82- # 23 or IQ3_XXS : 3.06 bpw quantization
83- # 26 or IQ3_S : 3.44 bpw quantization
84- # 27 or IQ3_M : 3.66 bpw quantization mix
85- # 22 or IQ3_XS : 3.3 bpw quantization
86- # 11 or Q3_K_S : 3.41G, +1.6321 ppl @ Llama-3-8B
87- # 12 or Q3_K_M : 3.74G, +0.6569 ppl @ Llama-3-8B
88- # 13 or Q3_K_L : 4.03G, +0.5562 ppl @ Llama-3-8B
89- # 25 or IQ4_NL : 4.50 bpw non-linear quantization
90- # 30 or IQ4_XS : 4.25 bpw non-linear quantization
91- # 14 or Q4_K_S : 4.37G, +0.2689 ppl @ Llama-3-8B
92- # 15 or Q4_K_M : 4.58G, +0.1754 ppl @ Llama-3-8B
93- # 16 or Q5_K_S : 5.21G, +0.1049 ppl @ Llama-3-8B
94- # 17 or Q5_K_M : 5.33G, +0.0569 ppl @ Llama-3-8B
95- # 18 or Q6_K : 6.14G, +0.0217 ppl @ Llama-3-8B
96- # 7 or Q8_0 : 7.96G, +0.0026 ppl @ Llama-3-8B
97- # 33 or Q4_0_4_4 : 4.34G, +0.4685 ppl @ Llama-3-8B
98- # 34 or Q4_0_4_8 : 4.34G, +0.4685 ppl @ Llama-3-8B
99- # 35 or Q4_0_8_8 : 4.34G, +0.4685 ppl @ Llama-3-8B
100- # 1 or F16 : 14.00G, +0.0020 ppl @ Mistral-7B
101- # 32 or BF16 : 14.00G, -0.0050 ppl @ Mistral-7B
102- # 0 or F32 : 26.00G @ 7B
103- # COPY : only copy tensors, no quantizing
70+ # 2 or Q4_0 : 4.34G, +0.4685 ppl @ Llama-3-8B
71+ # 3 or Q4_1 : 4.78G, +0.4511 ppl @ Llama-3-8B
72+ # 8 or Q5_0 : 5.21G, +0.1316 ppl @ Llama-3-8B
73+ # 9 or Q5_1 : 5.65G, +0.1062 ppl @ Llama-3-8B
74+ # 19 or IQ2_XXS : 2.06 bpw quantization
75+ # 20 or IQ2_XS : 2.31 bpw quantization
76+ # 28 or IQ2_S : 2.5 bpw quantization
77+ # 29 or IQ2_M : 2.7 bpw quantization
78+ # 24 or IQ1_S : 1.56 bpw quantization
79+ # 31 or IQ1_M : 1.75 bpw quantization
80+ # 36 or TQ1_0 : 1.69 bpw ternarization
81+ # 37 or TQ2_0 : 2.06 bpw ternarization
82+ # 10 or Q2_K : 2.96G, +3.5199 ppl @ Llama-3-8B
83+ # 21 or Q2_K_S : 2.96G, +3.1836 ppl @ Llama-3-8B
84+ # 23 or IQ3_XXS : 3.06 bpw quantization
85+ # 26 or IQ3_S : 3.44 bpw quantization
86+ # 27 or IQ3_M : 3.66 bpw quantization mix
87+ # 12 or Q3_K : alias for Q3_K_M
88+ # 22 or IQ3_XS : 3.3 bpw quantization
89+ # 11 or Q3_K_S : 3.41G, +1.6321 ppl @ Llama-3-8B
90+ # 12 or Q3_K_M : 3.74G, +0.6569 ppl @ Llama-3-8B
91+ # 13 or Q3_K_L : 4.03G, +0.5562 ppl @ Llama-3-8B
92+ # 25 or IQ4_NL : 4.50 bpw non-linear quantization
93+ # 30 or IQ4_XS : 4.25 bpw non-linear quantization
94+ # 15 or Q4_K : alias for Q4_K_M
95+ # 14 or Q4_K_S : 4.37G, +0.2689 ppl @ Llama-3-8B
96+ # 15 or Q4_K_M : 4.58G, +0.1754 ppl @ Llama-3-8B
97+ # 17 or Q5_K : alias for Q5_K_M
98+ # 16 or Q5_K_S : 5.21G, +0.1049 ppl @ Llama-3-8B
99+ # 17 or Q5_K_M : 5.33G, +0.0569 ppl @ Llama-3-8B
100+ # 18 or Q6_K : 6.14G, +0.0217 ppl @ Llama-3-8B
101+ # 7 or Q8_0 : 7.96G, +0.0026 ppl @ Llama-3-8B
102+ # 1 or F16 : 14.00G, +0.0020 ppl @ Mistral-7B
103+ # 32 or BF16 : 14.00G, -0.0050 ppl @ Mistral-7B
104+ # 0 or F32 : 26.00G @ 7B
105+ # COPY : only copy tensors, no quantizing
104106#
105107# Hint: A very good quantization with minimal quality loss is
106108# Q5_K_M. Quantization below 4-bit causes measurable quality
0 commit comments