Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions modelopt/torch/export/layer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,12 @@ def get_experts_list(module: torch.nn.Module, model_type: str):
linear_names = ["w1", "w2", "w3"]
elif any(
qwen_variant in model_type
for qwen_variant in ["qwenmoeforcausallm", "qwen2moeforcausallm", "qwen3moeforcausallm"]
for qwen_variant in [
"qwenmoeforcausallm",
"qwen2moeforcausallm",
"qwen3moeforcausallm",
"qwen3nextforcausallm",
]
):
linear_names = ["gate_proj", "down_proj", "up_proj"]
else:
Expand Down Expand Up @@ -333,6 +338,7 @@ def is_moe(module: nn.Module) -> bool:
"DeepseekMoE".lower(),
"Qwen2MoeSparseMoeBlock".lower(),
"Qwen3MoeSparseMoeBlock".lower(),
"Qwen3NextSparseMoeBlock".lower(),
]
)

Expand Down Expand Up @@ -987,7 +993,13 @@ def module_match_name_list(module, name_list):
return any(name.lower() in type(module).__name__.lower() for name in name_list)

if module_match_name_list(
module, ["Qwen2MoeSparseMoeBlock", "Qwen3MoeSparseMoeBlock", "DeepseekMoE"]
module,
[
"Qwen2MoeSparseMoeBlock",
"Qwen3MoeSparseMoeBlock",
"Qwen3NextSparseMoeBlock",
"DeepseekMoE",
],
):
return ["gate_proj", "down_proj", "up_proj"]
elif module_match_name_list(module, ["MixtralMoeSparseMoeBlock"]):
Expand Down
10 changes: 10 additions & 0 deletions modelopt/torch/quantization/plugins/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,16 @@ def top_k(self, value):
except ImportError:
pass

try:
from transformers.models.qwen3_next.modeling_qwen3_next import Qwen3NextSparseMoeBlock

if Qwen3NextSparseMoeBlock not in QuantModuleRegistry:
QuantModuleRegistry.register({Qwen3NextSparseMoeBlock: "hf.Qwen3NextSparseMoeBlock"})(
_QuantMoeSparseMoe
)
except ImportError:
pass


class _QuantGptOssExperts(_QuantFunctionalMixin):
"""Quantized wrapper for `transformers.GptOssExperts`.
Expand Down
Loading