Skip to content

Commit b679810

Browse files
committed
Update comment
1 parent 392061c commit b679810

File tree

1 file changed

+1
-2
lines changed

1 file changed

+1
-2
lines changed

src/llama-quant.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
877877
if (!params->pure && ggml_is_quantized(default_type)) {
878878
int fallback = qs.n_fallback;
879879
new_type = llama_tensor_get_type(qs, new_type, tensor, ftype);
880-
// unless the user specifies a type
880+
// unless the user specifies a type, and the tensor geometry will not require fallback quantisation
881881
if (params->tensor_types && qs.n_fallback - fallback == 0) {
882882
const std::vector<tensor_quantization> & tensor_types = *static_cast<const std::vector<tensor_quantization> *>(params->tensor_types);
883883
const std::string tensor_name(tensor->name);
@@ -891,7 +891,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
891891
}
892892
}
893893
}
894-
895894
if (params->token_embedding_type < GGML_TYPE_COUNT && strcmp(tensor->name, "token_embd.weight") == 0) {
896895
new_type = params->token_embedding_type;
897896
}

0 commit comments

Comments
 (0)