Skip to content

Commit 8d15b3b

Browse files
Gh0u1L5diegocastanibm
authored andcommitted
Fix GGUF loader for Qwen3 MoE. (vllm-project#22785)
Signed-off-by: Gh0u1L5 <[email protected]> Signed-off-by: Diego-Castan <[email protected]>
1 parent b4ab153 commit 8d15b3b

File tree

2 files changed

+12
-0
lines changed

2 files changed

+12
-0
lines changed

vllm/model_executor/model_loader/gguf_loader.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,17 @@ def _get_gguf_weights_map(self, model_config: ModelConfig):
7474
f"model.layers.{idx}.mlp.experts.0.gate_proj.weight"
7575
gguf_to_hf_name_map[f"blk.{idx}.ffn_up_exps.weight"] = \
7676
f"model.layers.{idx}.mlp.experts.0.up_proj.weight"
77+
if model_type in ("qwen2_moe", "qwen3_moe"):
78+
model_type = model_type.replace("_", "")
79+
# GGUF layer map assumes that we will have a merged expert weights
80+
# so we need to map them manually
81+
for idx in range(config.num_hidden_layers):
82+
gguf_to_hf_name_map[f"blk.{idx}.ffn_down_exps.weight"] = \
83+
f"model.layers.{idx}.mlp.experts.0.down_proj.weight"
84+
gguf_to_hf_name_map[f"blk.{idx}.ffn_gate_exps.weight"] = \
85+
f"model.layers.{idx}.mlp.experts.0.gate_proj.weight"
86+
gguf_to_hf_name_map[f"blk.{idx}.ffn_up_exps.weight"] = \
87+
f"model.layers.{idx}.mlp.experts.0.up_proj.weight"
7788

7889
arch = None
7990
for key, value in gguf.MODEL_ARCH_NAMES.items():

vllm/model_executor/models/qwen3_moe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
375375
self.embed_tokens = VocabParallelEmbedding(
376376
config.vocab_size,
377377
config.hidden_size,
378+
quant_config=quant_config,
378379
prefix=f"{prefix}.embed_tokens")
379380
self.start_layer, self.end_layer, self.layers = make_layers(
380381
config.num_hidden_layers,

0 commit comments

Comments
 (0)