Skip to content

Commit 67c4346

Browse files
committed
cont : apply scale before attn
1 parent 36469ad commit 67c4346

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

src/llama-model.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -973,7 +973,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
973973
default: type = LLM_TYPE_UNKNOWN;
974974
}
975975

976-
hparams.f_attention_scale = 1.0f / std::sqrt(float(hparams.n_embd_head_k));
976+
hparams.f_attention_scale = type == LLM_TYPE_27B
977+
? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0)))
978+
: 1.0f / std::sqrt(float(hparams.n_embd_head_k));
977979
} break;
978980
case LLM_ARCH_STARCODER2:
979981
{
@@ -8627,9 +8629,11 @@ struct llm_build_gemma3_iswa : public llm_graph_context {
86278629
cb(Kcur, "Kcur", il);
86288630
cb(Vcur, "Vcur", il);
86298631

8632+
Qcur = ggml_scale(ctx0, Qcur, hparams.f_attention_scale);
8633+
86308634
cur = build_attn(inp_attn, gf,
86318635
model.layers[il].wo, NULL,
8632-
Qcur, Kcur, Vcur, nullptr, nullptr, hparams.f_attention_scale, il);
8636+
Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il);
86338637
}
86348638

86358639
cur = build_norm(cur,

0 commit comments

Comments
 (0)