Skip to content

Commit 23c5b57

Browse files
committed
llama : default pooling last for qwen3
1 parent 3a07714 commit 23c5b57

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

src/llama-model.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
820820
case LLM_ARCH_QWEN3:
821821
{
822822
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
823+
hparams.pooling_type = LLAMA_POOLING_TYPE_LAST; // for embeddings model
824+
823825
switch (hparams.n_layer) {
824826
case 28: type = hparams.n_embd == 1024 ? LLM_TYPE_0_6B : LLM_TYPE_1_7B; break;
825827
case 36: type = hparams.n_embd == 2560 ? LLM_TYPE_4B : LLM_TYPE_8B; break;
@@ -830,8 +832,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
830832
} break;
831833
case LLM_ARCH_QWEN3MOE:
832834
{
833-
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp, false);
834-
835+
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp, false);
835836
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
836837
switch (hparams.n_layer) {
837838
case 48: type = LLM_TYPE_30B_A3B; break;

0 commit comments

Comments
 (0)