Update comment

EAddario · EAddario · commit b679810167f1 · 2025-07-31T13:04:51.000+01:00
diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
@@ -877,7 +877,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
             if (!params->pure && ggml_is_quantized(default_type)) {
                 int fallback = qs.n_fallback;
                 new_type = llama_tensor_get_type(qs, new_type, tensor, ftype);
-                // unless the user specifies a type
+                // unless the user specifies a type, and the tensor geometry will not require fallback quantisation
                 if (params->tensor_types && qs.n_fallback - fallback == 0) {
                     const std::vector<tensor_quantization> & tensor_types = *static_cast<const std::vector<tensor_quantization> *>(params->tensor_types);
                     const std::string tensor_name(tensor->name);
@@ -891,7 +891,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
                     }
                 }
             }
-
             if (params->token_embedding_type < GGML_TYPE_COUNT && strcmp(tensor->name, "token_embd.weight") == 0) {
                 new_type = params->token_embedding_type;
             }

Original file line number	Diff line number	Diff line change
`@@ -877,7 +877,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::`
`877`	`877`	`if (!params->pure && ggml_is_quantized(default_type)) {`
`878`	`878`	`int fallback = qs.n_fallback;`
`879`	`879`	`new_type = llama_tensor_get_type(qs, new_type, tensor, ftype);`
`880`		`- // unless the user specifies a type`
	`880`	`+ // unless the user specifies a type, and the tensor geometry will not require fallback quantisation`
`881`	`881`	`if (params->tensor_types && qs.n_fallback - fallback == 0) {`
`882`	`882`	`const std::vector<tensor_quantization> & tensor_types = static_cast<const std::vector<tensor_quantization> >(params->tensor_types);`
`883`	`883`	`const std::string tensor_name(tensor->name);`
`@@ -891,7 +891,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::`
`891`	`891`	`}`
`892`	`892`	`}`
`893`	`893`	`}`
`894`		`-`
`895`	`894`	`if (params->token_embedding_type < GGML_TYPE_COUNT && strcmp(tensor->name, "token_embd.weight") == 0) {`
`896`	`895`	`new_type = params->token_embedding_type;`
`897`	`896`	`}`