Skip to content

Commit a67ef0f

Browse files
authored
llama : fix sanity checks during quantization (#17721)
1 parent ef75a89 commit a67ef0f

File tree

1 file changed

+5
-7
lines changed

1 file changed

+5
-7
lines changed

src/llama-quant.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -726,21 +726,19 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
726726
// sanity checks for models that have attention layers
727727
if (qs.n_attention_wv != 0 && !is_clip_model)
728728
{
729-
const auto & n_head_kv_iter = model.hparams.n_head_kv_arr.begin();
730-
// attention layers have a non-zero number of kv heads
731-
int32_t n_layer_attn = model.hparams.n_layer - std::count(n_head_kv_iter, n_head_kv_iter + model.hparams.n_layer, 0);
729+
int32_t n_layer_all = model.hparams.n_layer;
732730
if (llama_model_has_encoder(&model)) {
733-
// now n_layer_attn is the number of attention layers in the encoder
731+
// now n_layer_all is the number of attention layers in the encoder
734732
// for each decoder block, there are 2 attention layers
735-
n_layer_attn += 2 * model.hparams.dec_n_layer;
733+
n_layer_all += 2 * model.hparams.dec_n_layer;
736734
}
737735

738736
// note: for linear-attention models (such as Qwen3 Next) this is the number of linear layers
739737
const int32_t n_layer_recr = std::count(model.hparams.recurrent_layer_arr.begin(), model.hparams.recurrent_layer_arr.end(), true);
740738

741-
LLAMA_LOG_INFO("%s: n_layer_attn = %d, n_layer_recr = %d, pruned_attention_w = %d\n", __func__, n_layer_attn, n_layer_recr, pruned_attention_w);
739+
LLAMA_LOG_INFO("%s: n_layer_all = %d, n_layer_recr = %d, pruned_attention_w = %d\n", __func__, n_layer_all, n_layer_recr, pruned_attention_w);
742740

743-
GGML_ASSERT((qs.n_attention_wv == n_layer_attn - pruned_attention_w - n_layer_recr) && "n_attention_wv is unexpected");
741+
GGML_ASSERT((qs.n_attention_wv == n_layer_all - pruned_attention_w - n_layer_recr) && "n_attention_wv is unexpected");
744742
}
745743

746744
size_t total_size_org = 0;

0 commit comments

Comments
 (0)