shared experts

jenchen13 · jenchen13 · commit a3d8c11bc698 · 2025-10-17T09:43:53.000-07:00
Signed-off-by: jenchen13 &lt;jennifchen@nvidia.com&gt;
diff --git a/modelopt/torch/export/plugins/mcore_nemotron.py b/modelopt/torch/export/plugins/mcore_nemotron.py
@@ -71,8 +71,11 @@
     "router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE),
     "local_experts.linear_fc1": GatedMLPMerging("model.layers.{}.mlp.experts.{}.", COL_ETP),
     "local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP),
-
-
+    "shared_experts.linear_fc1": GatedMLPMerging("model.layers.{}.mlp.shared_experts.", COL_TP),
+    "shared_experts.linear_fc2": NameRemapping(
+        "model.layers.{}.mlp.shared_experts.down_proj.", ROW_TP
+    ),
+    
 }
 
 
@@ -101,5 +104,7 @@
     "router": NameRemapping("model.layers.{}.mlp.gate."),
     "local_experts.linear_fc1": GatedMLPSlicing("model.layers.{}.mlp.experts.{}."),
     "local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj."),
+    "shared_experts.linear_fc1": GatedMLPSlicing("model.layers.{}.mlp.shared_experts."),
+    "shared_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.shared_experts.down_proj."),
 
 }