moe export in nemotron h

jenchen13 · jenchen13 · commit 89cbd976be9d · 2025-10-17T08:03:08.000-07:00
Signed-off-by: jenchen13 &lt;jennifchen@nvidia.com&gt;
diff --git a/modelopt/torch/export/plugins/mcore_nemotron.py b/modelopt/torch/export/plugins/mcore_nemotron.py
@@ -17,13 +17,17 @@
 """Custom mapping from Nemotron Hugging Face models to Megatron Core models."""
 
 from .mcore_custom import (
+    COL_ETP,
     COL_TP,
+    ROW_ETP,
     REPLICATE,
     ROW_TP,
     CustomModuleMapping,
     NameRemapping,
     QKVMerging,
     QKVSlicing,
+    GatedMLPSlicing,
+    GatedMLPMerging,
 )
 
 # Example on adding a new CausalLM.
@@ -39,6 +43,10 @@
     "linear_fc2": NameRemapping("model.layers.{}.mlp.down_proj."),
     "final_layernorm": NameRemapping("model.norm."),
     "output_layer": NameRemapping("lm_head."),
+    # MoE
+    "router": NameRemapping("model.layers.{}.mlp.gate."),
+    "local_experts.linear_fc1": GatedMLPSlicing("model.layers.{}.mlp.experts.{}."),
+    "local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj."),
 }
 
 
@@ -63,6 +71,12 @@
     "pre_mlp_layernorm": NameRemapping("backbone.layers.{}.norm.", REPLICATE),
     "linear_fc1": NameRemapping("backbone.layers.{}.mixer.up_proj.", COL_TP),
     "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP),
+    # MoE
+    "router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE),
+    "local_experts.linear_fc1": GatedMLPMerging("model.layers.{}.mlp.experts.{}.", COL_ETP),
+    "local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP),
+
+
 }