@@ -846,12 +846,32 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
846846 // since many people will miss the error and not realize that most of the model is being quantized without an imatrix
847847 // tok_embd should be ignored in this case, since it always causes this warning
848848 if (name != tn (LLM_TENSOR_TOKEN_EMBD, " weight" )) {
849- throw std::runtime_error (format (" imatrix size %d is different from tensor size %d for %s" ,
850- int (it->second .size ()), int (tensor->ne [0 ]*tensor->ne [2 ]), tensor->name ));
849+ LLAMA_LOG_INFO (" imatrix size %d is different from tensor size %d for %s" , int (it->second .size ()), int (tensor->ne [0 ]*tensor->ne [2 ]), tensor->name );
851850 }
852851 }
853852 }
854853 }
854+ if ((new_type == GGML_TYPE_IQ2_XXS ||
855+ new_type == GGML_TYPE_IQ1_S ||
856+ (new_type == GGML_TYPE_IQ1_M && strcmp (tensor->name , " token_embd.weight" ) && strcmp (tensor->name , " output.weight" ))) && !imatrix) {
857+ LLAMA_LOG_INFO (" \n\n ============================================================\n " );
858+ LLAMA_LOG_INFO (" Missing importance matrix for tensor %s in a very low-bit quantization\n " , tensor->name );
859+ LLAMA_LOG_INFO (" The result will be garbage, so using GGML_TYPE_Q2_K\n " );
860+ LLAMA_LOG_INFO (" ============================================================\n\n " );
861+ new_type = GGML_TYPE_Q2_K;
862+ // throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name));
863+ }
864+
865+ if ((new_type == GGML_TYPE_IQ2_XS ||
866+ new_type == GGML_TYPE_IQ2_S ||
867+ (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp (tensor->name , " token_embd.weight" ) != 0 )) && !imatrix) {
868+ LLAMA_LOG_INFO (" \n\n ============================================================\n " );
869+ LLAMA_LOG_INFO (" Missing importance matrix for tensor %s in a very low-bit quantization\n " , tensor->name );
870+ LLAMA_LOG_INFO (" The result will be garbage, so using GGML_TYPE_Q3_K\n " );
871+ LLAMA_LOG_INFO (" ============================================================\n\n " );
872+ new_type = GGML_TYPE_Q3_K;
873+ }
874+
855875 if ((new_type == GGML_TYPE_IQ2_XXS ||
856876 new_type == GGML_TYPE_IQ2_XS ||
857877 new_type == GGML_TYPE_IQ2_S ||
0 commit comments