@@ -33,6 +33,7 @@ class TensorNameMap:
33
33
"language_model.model.embed_tokens" , # llama4
34
34
"encoder" , # neobert
35
35
"model.transformer.wte" , # llada
36
+ "embed_tokens" , # qwen3-embedding
36
37
),
37
38
38
39
# Token type embeddings
@@ -143,6 +144,7 @@ class TensorNameMap:
143
144
"transformer_encoder.{bid}.attention_norm" , # neobert
144
145
"model.layers.{bid}.operator_norm" , # lfm2
145
146
"model.transformer.blocks.{bid}.attn_norm" , # llada
147
+ "layers.{bid}.input_layernorm" , # qwen3-embedding
146
148
),
147
149
148
150
# Attention norm 2
@@ -188,6 +190,7 @@ class TensorNameMap:
188
190
"transformer.h.{bid}.attn.attention.q_proj" , # exaone
189
191
"model.layers.{bid}.self_attn.q_proj" , # llama4
190
192
"model.transformer.blocks.{bid}.q_proj" , # llada
193
+ "layers.{bid}.self_attn.q_proj" , # qwen3-embedding
191
194
),
192
195
193
196
# Attention key
@@ -205,6 +208,7 @@ class TensorNameMap:
205
208
"transformer.h.{bid}.attn.attention.k_proj" , # exaone
206
209
"model.layers.{bid}.self_attn.k_proj" , # llama4
207
210
"model.transformer.blocks.{bid}.k_proj" , # llada
211
+ "layers.{bid}.self_attn.k_proj" , # qwen3-embedding
208
212
),
209
213
210
214
# Attention value
@@ -221,6 +225,7 @@ class TensorNameMap:
221
225
"transformer.h.{bid}.attn.attention.v_proj" , # exaone
222
226
"model.layers.{bid}.self_attn.v_proj" , # llama4
223
227
"model.transformer.blocks.{bid}.v_proj" , # llada
228
+ "layers.{bid}.self_attn.v_proj" , # qwen3-embedding
224
229
),
225
230
226
231
# Attention output
@@ -254,6 +259,7 @@ class TensorNameMap:
254
259
"model.layers.{bid}.self_attn.o_proj" , # llama4
255
260
"transformer_encoder.{bid}.wo" , # neobert
256
261
"model.transformer.blocks.{bid}.attn_out" , # llada
262
+ "layers.{bid}.self_attn.o_proj" , # qwen3-embedding
257
263
),
258
264
259
265
# Attention output norm
@@ -300,6 +306,7 @@ class TensorNameMap:
300
306
"transformer_encoder.{bid}.ffn_norm" , # neobert
301
307
"model.layers.layers.{bid}.pre_mlp_norm" , # plamo2
302
308
"model.transformer.blocks.{bid}.ff_norm" , # llada
309
+ "layers.{bid}.post_attention_layernorm" , # qwen3-embedding
303
310
),
304
311
305
312
# Post feed-forward norm
@@ -373,7 +380,8 @@ class TensorNameMap:
373
380
"model.layers.{bid}.feed_forward.up_proj" , # llama4 jamba granite-hybrid
374
381
"transformer_encoder.{bid}.ffn.w12" , # neobert
375
382
"model.layers.{bid}.block_sparse_moe.up" , # smallthinker
376
- "model.transformer.blocks.{bid}.up_proj" , # llada
383
+ "model.transformer.blocks.{bid}.up_proj" , # llada
384
+ "layers.{bid}.mlp.up_proj" , # qwen3-embedding
377
385
),
378
386
379
387
MODEL_TENSOR .FFN_UP_EXP : (
@@ -416,6 +424,7 @@ class TensorNameMap:
416
424
"model.layers.{bid}.feed_forward.gate_proj" , # llama4 jamba granite-hybrid
417
425
"model.layers.{bid}.block_sparse_moe.gate" , # smallthinker
418
426
"model.transformer.blocks.{bid}.ff_proj" , # llada
427
+ "layers.{bid}.mlp.gate_proj" , # qwen3-embedding
419
428
),
420
429
421
430
MODEL_TENSOR .FFN_GATE_EXP : (
@@ -465,7 +474,8 @@ class TensorNameMap:
465
474
"model.layers.{bid}.feed_forward.down_proj" , # llama4 jamba granite-hybrid
466
475
"transformer_encoder.{bid}.ffn.w3" , # neobert
467
476
"model.layers.{bid}.block_sparse_moe.down" , # smallthinker
468
- "model.transformer.blocks.{bid}.ff_out" , # llada
477
+ "model.transformer.blocks.{bid}.ff_out" , # llada
478
+ "layers.{bid}.mlp.down_proj" , # qwen3-embedding
469
479
),
470
480
471
481
MODEL_TENSOR .FFN_DOWN_EXP : (
@@ -497,6 +507,7 @@ class TensorNameMap:
497
507
"encoder.layer.{bid}.attention.self.layer_norm_q" , # jina-bert-v2
498
508
"transformer.layers.{bid}.attn.q_norm" , # openelm
499
509
"model.layers.layers.{bid}.mixer.q" , # plamo2
510
+ "layers.{bid}.self_attn.q_norm" , # qwen3-embedding
500
511
),
501
512
502
513
MODEL_TENSOR .ATTN_K_NORM : (
@@ -508,6 +519,7 @@ class TensorNameMap:
508
519
"encoder.layer.{bid}.attention.self.layer_norm_k" , # jina-bert-v2
509
520
"transformer.layers.{bid}.attn.k_norm" , # openelm
510
521
"model.layers.layers.{bid}.mixer.k" , # plamo2
522
+ "layers.{bid}.self_attn.k_norm" , # qwen3-embedding
511
523
),
512
524
513
525
MODEL_TENSOR .ROPE_FREQS : (
0 commit comments