Skip to content

Commit 149b98c

Browse files
committed
Revert unnecessary cast because the problem can be solved by excluding attn_k, attn_q when quantizing
1 parent 7e4c5ec commit 149b98c

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

src/llama-model.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10536,8 +10536,7 @@ struct llm_build_plamo2 : public llm_graph_context_mamba {
1053610536
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head_k, n_head_kv, n_tokens);
1053710537
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head_v, n_head_kv, n_tokens);
1053810538

10539-
ggml_tensor * wq = ggml_cast(ctx0, model.layers[il].wq, Qcur->type);
10540-
Qcur = build_norm(Qcur, wq, NULL, LLM_NORM_RMS, il);
10539+
Qcur = build_norm(Qcur, model.layers[il].wq, NULL, LLM_NORM_RMS, il);
1054110540
cb(Qcur, "Qcur_normed", il);
1054210541

1054310542
Qcur = ggml_rope_ext(
@@ -10546,8 +10545,7 @@ struct llm_build_plamo2 : public llm_graph_context_mamba {
1054610545
ext_factor, attn_factor, beta_fast, beta_slow
1054710546
);
1054810547

10549-
ggml_tensor * wk = ggml_cast(ctx0, model.layers[il].wk, Kcur->type);
10550-
Kcur = build_norm(Kcur, wk, NULL, LLM_NORM_RMS, il);
10548+
Kcur = build_norm(Kcur, model.layers[il].wk, NULL, LLM_NORM_RMS, il);
1055110549
cb(Kcur, "Kcur_normed", il);
1055210550

1055310551
Kcur = ggml_rope_ext(

0 commit comments

Comments
 (0)