Skip to content

Commit 339bd02

Browse files
authored
model : support Qwen3-Embedding (#15023)
1 parent f906275 commit 339bd02

File tree

3 files changed

+18
-2
lines changed

3 files changed

+18
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
849849
if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb":
850850
# ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B
851851
res = "exaone4"
852+
if chkhsh == "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c":
853+
# ref: https://huggingface.co/Qwen/Qwen3-Embedding-8B
854+
res = "qwen2"
852855

853856
if res is None:
854857
logger.warning("\n")

gguf-py/gguf/tensor_mapping.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class TensorNameMap:
3333
"language_model.model.embed_tokens", # llama4
3434
"encoder", # neobert
3535
"model.transformer.wte", # llada
36+
"embed_tokens", # qwen3-embedding
3637
),
3738

3839
# Token type embeddings
@@ -143,6 +144,7 @@ class TensorNameMap:
143144
"transformer_encoder.{bid}.attention_norm", # neobert
144145
"model.layers.{bid}.operator_norm", # lfm2
145146
"model.transformer.blocks.{bid}.attn_norm", # llada
147+
"layers.{bid}.input_layernorm", # qwen3-embedding
146148
),
147149

148150
# Attention norm 2
@@ -188,6 +190,7 @@ class TensorNameMap:
188190
"transformer.h.{bid}.attn.attention.q_proj", # exaone
189191
"model.layers.{bid}.self_attn.q_proj", # llama4
190192
"model.transformer.blocks.{bid}.q_proj", # llada
193+
"layers.{bid}.self_attn.q_proj", # qwen3-embedding
191194
),
192195

193196
# Attention key
@@ -205,6 +208,7 @@ class TensorNameMap:
205208
"transformer.h.{bid}.attn.attention.k_proj", # exaone
206209
"model.layers.{bid}.self_attn.k_proj", # llama4
207210
"model.transformer.blocks.{bid}.k_proj", # llada
211+
"layers.{bid}.self_attn.k_proj", # qwen3-embedding
208212
),
209213

210214
# Attention value
@@ -221,6 +225,7 @@ class TensorNameMap:
221225
"transformer.h.{bid}.attn.attention.v_proj", # exaone
222226
"model.layers.{bid}.self_attn.v_proj", # llama4
223227
"model.transformer.blocks.{bid}.v_proj", # llada
228+
"layers.{bid}.self_attn.v_proj", # qwen3-embedding
224229
),
225230

226231
# Attention output
@@ -254,6 +259,7 @@ class TensorNameMap:
254259
"model.layers.{bid}.self_attn.o_proj", # llama4
255260
"transformer_encoder.{bid}.wo", # neobert
256261
"model.transformer.blocks.{bid}.attn_out", # llada
262+
"layers.{bid}.self_attn.o_proj", # qwen3-embedding
257263
),
258264

259265
# Attention output norm
@@ -300,6 +306,7 @@ class TensorNameMap:
300306
"transformer_encoder.{bid}.ffn_norm", # neobert
301307
"model.layers.layers.{bid}.pre_mlp_norm", # plamo2
302308
"model.transformer.blocks.{bid}.ff_norm", # llada
309+
"layers.{bid}.post_attention_layernorm", # qwen3-embedding
303310
),
304311

305312
# Post feed-forward norm
@@ -373,7 +380,8 @@ class TensorNameMap:
373380
"model.layers.{bid}.feed_forward.up_proj", # llama4 jamba granite-hybrid
374381
"transformer_encoder.{bid}.ffn.w12", # neobert
375382
"model.layers.{bid}.block_sparse_moe.up", # smallthinker
376-
"model.transformer.blocks.{bid}.up_proj", # llada
383+
"model.transformer.blocks.{bid}.up_proj", # llada
384+
"layers.{bid}.mlp.up_proj", # qwen3-embedding
377385
),
378386

379387
MODEL_TENSOR.FFN_UP_EXP: (
@@ -416,6 +424,7 @@ class TensorNameMap:
416424
"model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba granite-hybrid
417425
"model.layers.{bid}.block_sparse_moe.gate", # smallthinker
418426
"model.transformer.blocks.{bid}.ff_proj", # llada
427+
"layers.{bid}.mlp.gate_proj", # qwen3-embedding
419428
),
420429

421430
MODEL_TENSOR.FFN_GATE_EXP: (
@@ -465,7 +474,8 @@ class TensorNameMap:
465474
"model.layers.{bid}.feed_forward.down_proj", # llama4 jamba granite-hybrid
466475
"transformer_encoder.{bid}.ffn.w3", # neobert
467476
"model.layers.{bid}.block_sparse_moe.down", # smallthinker
468-
"model.transformer.blocks.{bid}.ff_out", # llada
477+
"model.transformer.blocks.{bid}.ff_out", # llada
478+
"layers.{bid}.mlp.down_proj", # qwen3-embedding
469479
),
470480

471481
MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -497,6 +507,7 @@ class TensorNameMap:
497507
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
498508
"transformer.layers.{bid}.attn.q_norm", # openelm
499509
"model.layers.layers.{bid}.mixer.q", # plamo2
510+
"layers.{bid}.self_attn.q_norm", # qwen3-embedding
500511
),
501512

502513
MODEL_TENSOR.ATTN_K_NORM: (
@@ -508,6 +519,7 @@ class TensorNameMap:
508519
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
509520
"transformer.layers.{bid}.attn.k_norm", # openelm
510521
"model.layers.layers.{bid}.mixer.k", # plamo2
522+
"layers.{bid}.self_attn.k_norm", # qwen3-embedding
511523
),
512524

513525
MODEL_TENSOR.ROPE_FREQS: (

src/llama-model.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
899899
} break;
900900
case LLM_ARCH_QWEN3:
901901
{
902+
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
902903
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
903904
switch (hparams.n_layer) {
904905
case 28: type = hparams.n_embd == 1024 ? LLM_TYPE_0_6B : LLM_TYPE_1_7B; break;

0 commit comments

Comments
 (0)