@@ -28,13 +28,14 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
2828 { " IQ1_XS" , LLAMA_FTYPE_MOSTLY_IQ1_XS, " 1.6-1.7 bpw quantization mix" , },
2929 { " IQ1_S" , LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization" , },
3030 { " IQ1_M" , LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization" , },
31+ { " IQ1_XL" , LLAMA_FTYPE_MOSTLY_IQ1_XL, " 1.90 bpw quantization" , },
3132 { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.96G, +3.5199 ppl @ Llama-3-8B" , },
3233 { " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B" , },
33- { " Q2_K_L" , LLAMA_FTYPE_MOSTLY_Q2_K_L, " 2.96G , +3.1836 ppl @ Llama-3-8B" , },
34+ { " Q2_K_L" , LLAMA_FTYPE_MOSTLY_Q2_K_L, " 3.20G , +3.1836 ppl @ Llama-3-8B" , },
3435 { " IQ3_XXS" , LLAMA_FTYPE_MOSTLY_IQ3_XXS, " 3.06 bpw quantization" , },
3536 { " IQ3_S" , LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization" , },
36- { " IQ3_M" , LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix" , },
37- { " IQ3_XL" , LLAMA_FTYPE_MOSTLY_IQ3_XL, " 3.85 bpw quantization mix" , },
37+ { " IQ3_M" , LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.70 bpw quantization mix" , },
38+ { " IQ3_XL" , LLAMA_FTYPE_MOSTLY_IQ3_XL, " 3.95 bpw quantization mix" , },
3839 { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
3940 { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
4041 { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G, +1.6321 ppl @ Llama-3-8B" , },
0 commit comments