Update quantization types

countzero · countzero · commit 1f64092fe1a6 · 2024-02-28T09:31:40.000+01:00
diff --git a/README.md b/README.md
@@ -58,28 +58,35 @@ CACHE_DIRECTORY=.\cache
 #
 # Possible llama.cpp quantization types:
 #
-#     IQ2_XXS :  2.06 bpw quantization
-#     IQ2_XS  :  2.31 bpw quantization
-#     Q2_K    :  2.63G, +0.6717 ppl @ LLaMA-v1-7B
-#     Q2_K_S  :  2.16G, +9.0634 ppl @ LLaMA-v1-7B
-#     IQ3_XXS :  3.06 bpw quantization
-#     Q3_K_XS :  3-bit extra small quantization
-#     Q3_K_S  :  2.75G, +0.5551 ppl @ LLaMA-v1-7B
-#     Q3_K_M  :  3.07G, +0.2496 ppl @ LLaMA-v1-7B
-#     Q3_K_L  :  3.35G, +0.1764 ppl @ LLaMA-v1-7B
-#     Q4_0    :  3.56G, +0.2166 ppl @ LLaMA-v1-7B
-#     Q4_1    :  3.90G, +0.1585 ppl @ LLaMA-v1-7B
-#     Q4_K_S  :  3.59G, +0.0992 ppl @ LLaMA-v1-7B
-#     Q4_K_M  :  3.80G, +0.0532 ppl @ LLaMA-v1-7B
-#     Q5_0    :  4.33G, +0.0683 ppl @ LLaMA-v1-7B
-#     Q5_1    :  4.70G, +0.0349 ppl @ LLaMA-v1-7B
-#     Q5_K_S  :  4.33G, +0.0400 ppl @ LLaMA-v1-7B
-#     Q5_K_M  :  4.45G, +0.0122 ppl @ LLaMA-v1-7B
-#     Q6_K    :  5.15G, -0.0008 ppl @ LLaMA-v1-7B
-#     Q8_0    :  6.70G, +0.0004 ppl @ LLaMA-v1-7B
-#     F16     : 13.00G              @ 7B
-#     F32     : 26.00G              @ 7B
-#     COPY    : only copy tensors, no quantizing
+#      2  or  Q4_0    :  3.56G, +0.2166 ppl @ LLaMA-v1-7B
+#      3  or  Q4_1    :  3.90G, +0.1585 ppl @ LLaMA-v1-7B
+#      8  or  Q5_0    :  4.33G, +0.0683 ppl @ LLaMA-v1-7B
+#      9  or  Q5_1    :  4.70G, +0.0349 ppl @ LLaMA-v1-7B
+#     19  or  IQ2_XXS :  2.06 bpw quantization
+#     20  or  IQ2_XS  :  2.31 bpw quantization
+#     28  or  IQ2_S   :  2.5  bpw quantization
+#     29  or  IQ2_M   :  2.7  bpw quantization
+#     24  or  IQ1_S   :  1.56 bpw quantization
+#     10  or  Q2_K    :  2.63G, +0.6717 ppl @ LLaMA-v1-7B
+#     21  or  Q2_K_S  :  2.16G, +9.0634 ppl @ LLaMA-v1-7B
+#     23  or  IQ3_XXS :  3.06 bpw quantization
+#     26  or  IQ3_S   :  3.44 bpw quantization
+#     27  or  IQ3_M   :  3.66 bpw quantization mix
+#     22  or  IQ3_XS  :  3.3 bpw quantization
+#     11  or  Q3_K_S  :  2.75G, +0.5551 ppl @ LLaMA-v1-7B
+#     12  or  Q3_K_M  :  3.07G, +0.2496 ppl @ LLaMA-v1-7B
+#     13  or  Q3_K_L  :  3.35G, +0.1764 ppl @ LLaMA-v1-7B
+#     25  or  IQ4_NL  :  4.50 bpw non-linear quantization
+#     30  or  IQ4_XS  :  4.25 bpw non-linear quantization
+#     14  or  Q4_K_S  :  3.59G, +0.0992 ppl @ LLaMA-v1-7B
+#     15  or  Q4_K_M  :  3.80G, +0.0532 ppl @ LLaMA-v1-7B
+#     16  or  Q5_K_S  :  4.33G, +0.0400 ppl @ LLaMA-v1-7B
+#     17  or  Q5_K_M  :  4.45G, +0.0122 ppl @ LLaMA-v1-7B
+#     18  or  Q6_K    :  5.15G, +0.0008 ppl @ LLaMA-v1-7B
+#      7  or  Q8_0    :  6.70G, +0.0004 ppl @ LLaMA-v1-7B
+#      1  or  F16     : 13.00G              @ 7B
+#      0  or  F32     : 26.00G              @ 7B
+#             COPY    : only copy tensors, no quantizing
 #
 # Hint: The sweet spot is Q5_K_M. The smallest quantization
 # without the need for an importance matrix is IQ3_XXS.