Skip to content

Commit fb141e2

Browse files
authored
Support loading Qwen3 MoE GGUF (#39638)
* support loading qwen3 gguf * qwen3moe test cases * fix whitespaces * fix ggml tests
1 parent ccb2e0e commit fb141e2

File tree

2 files changed

+31
-0
lines changed

2 files changed

+31
-0
lines changed

src/transformers/integrations/ggml.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,20 @@
102102
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
103103
"vocab_size": "vocab_size",
104104
},
105+
"qwen3moe": {
106+
"context_length": "max_position_embeddings",
107+
"block_count": "num_hidden_layers",
108+
"feed_forward_length": "intermediate_size",
109+
"embedding_length": "hidden_size",
110+
"rope.dimension_count": None,
111+
"rope.freq_base": "rope_theta",
112+
"attention.head_count": "num_attention_heads",
113+
"attention.head_count_kv": "num_key_value_heads",
114+
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
115+
"vocab_size": "vocab_size",
116+
"expert_count": "num_experts",
117+
"expert_used_count": "num_experts_per_tok",
118+
},
105119
"falcon": {
106120
"context_length": "max_position_embeddings",
107121
"block_count": "num_hidden_layers",
@@ -689,6 +703,7 @@ def converted(self) -> Tokenizer:
689703
"qwen2": GGUFQwen2Converter,
690704
"qwen2_moe": GGUFQwen2Converter,
691705
"qwen3": GGUFQwen2Converter,
706+
"qwen3_moe": GGUFQwen2Converter,
692707
"phi3": GGUFPhi3Converter,
693708
"bloom": GGUFGPTConverter,
694709
"falcon": GGUFGPTConverter,

tests/quantization/ggml/test_ggml.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ class GgufModelTests(unittest.TestCase):
302302
gemma3_text_model_id = "unsloth/gemma-3-1b-it-GGUF"
303303
gemma3_vision_model_id = "unsloth/gemma-3-4b-it-GGUF"
304304
qwen3_model_id = "Qwen/Qwen3-0.6B-GGUF"
305+
qwen3moe_model_id = "Qwen/Qwen3-30B-A3B-GGUF"
305306

306307
q4_0_phi3_model_id = "Phi-3-mini-4k-instruct-q4.gguf"
307308
q4_0_mistral_model_id = "mistral-7b-instruct-v0.2.Q4_0.gguf"
@@ -335,6 +336,7 @@ class GgufModelTests(unittest.TestCase):
335336
bf16_gemma3_text_model_id = "gemma-3-1b-it-BF16.gguf"
336337
bf16_gemma3_vision_model_id = "gemma-3-4b-it-BF16.gguf"
337338
q8_0_qwen3_model_id = "Qwen3-0.6B-Q8_0.gguf"
339+
q4_k_m_qwen3moe_model_id = "Qwen3-30B-A3B-Q4_K_M.gguf"
338340

339341
example_text = "Hello"
340342

@@ -973,3 +975,17 @@ def test_qwen3_q8_0(self):
973975

974976
EXPECTED_TEXT = "HelloED\nI need to find the value of the"
975977
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
978+
979+
def test_qwen3moe_q4_k_m(self):
980+
tokenizer = AutoTokenizer.from_pretrained(self.qwen3moe_model_id, gguf_file=self.q4_k_m_qwen3moe_model_id)
981+
model = AutoModelForCausalLM.from_pretrained(
982+
self.qwen3moe_model_id,
983+
gguf_file=self.q4_k_m_qwen3moe_model_id,
984+
torch_dtype=torch.float16,
985+
)
986+
987+
text = tokenizer(self.example_text, return_tensors="pt")
988+
out = model.generate(**text, max_new_tokens=10)
989+
990+
EXPECTED_TEXT = "Hello, I am a 20 year old male"
991+
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)

0 commit comments

Comments
 (0)