Skip to content

Commit 392061c

Browse files
committed
Skip overriding when tensor is in fallback mode
1 parent 6e67254 commit 392061c

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

src/llama-quant.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,9 +875,10 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
875875

876876
// get more optimal quantization type based on the tensor shape, layer, etc.
877877
if (!params->pure && ggml_is_quantized(default_type)) {
878+
int fallback = qs.n_fallback;
878879
new_type = llama_tensor_get_type(qs, new_type, tensor, ftype);
879880
// unless the user specifies a type
880-
if (params->tensor_types) {
881+
if (params->tensor_types && qs.n_fallback - fallback == 0) {
881882
const std::vector<tensor_quantization> & tensor_types = *static_cast<const std::vector<tensor_quantization> *>(params->tensor_types);
882883
const std::string tensor_name(tensor->name);
883884
for (const auto & [tname, qtype] : tensor_types) {

0 commit comments

Comments
 (0)