|
67 | 67 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP), |
68 | 68 | # MoE |
69 | 69 | "router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE), |
70 | | - "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.experts.{}.up_proj", COL_ETP), |
71 | | - "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP), |
72 | | - "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.shared_experts.up_proj.", COL_TP), |
| 70 | + "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj", COL_ETP), |
| 71 | + "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj.", ROW_ETP), |
| 72 | + "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj.", COL_TP), |
73 | 73 | "shared_experts.linear_fc2": NameRemapping( |
74 | | - "backbone.layers.{}.mlp.shared_experts.down_proj.", ROW_TP |
| 74 | + "backbone.layers.{}.mixer.shared_experts.down_proj.", ROW_TP |
75 | 75 | ), |
76 | 76 |
|
77 | 77 | } |
|
100 | 100 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."), |
101 | 101 | # MoE |
102 | 102 | "router": NameRemapping("backbone.layers.{}.mlp.gate."), |
103 | | - "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.experts.{}.up_proj."), |
104 | | - "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.experts.{}.down_proj."), |
105 | | - "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.shared_experts.up_proj."), |
106 | | - "shared_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.shared_experts.down_proj."), |
| 103 | + "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj."), |
| 104 | + "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj."), |
| 105 | + "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj."), |
| 106 | + "shared_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.shared_experts.down_proj."), |
107 | 107 |
|
108 | 108 | } |
0 commit comments