@@ -200,7 +200,8 @@ class ModelType(Enum):
200200
201201 ERNIE_MoE = 0x2500
202202
203- PenguMoE = 0x2600
203+ PanguMoE = 0x2600
204+ PanguEmbedded = 0x2601
204205
205206 SmolLM3 = 0x2700
206207
@@ -4335,7 +4336,7 @@ def get_weight_names(config):
43354336 f"model.layers.{ i } .self_attn.o_proj.weight" ,
43364337 f"model.layers.{ i } .input_layernorm.weight" ,
43374338 f"model.layers.{ i } .post_attention_layernorm.weight" ,
4338- f"model.layers.{ i } .mlp.down_proj .weight" ,
4339+ f"model.layers.{ i } .mlp.down _roj .weight" ,
43394340 f"model.layers.{ i } .mlp.up_proj.weight" ,
43404341 f"model.layers.{ i } .mlp.gate_proj.weight" ,
43414342 ]
@@ -4862,7 +4863,7 @@ def get_weight_names(config):
48624863 return weight_names
48634864
48644865class PanguMoEConverter (BaseConverter ):
4865- MODEL_TYPE = ModelType .PenguMoE
4866+ MODEL_TYPE = ModelType .PanguMoE
48664867
48674868 @staticmethod
48684869 def dump_config (f , config , ggml_type ):
@@ -4922,6 +4923,51 @@ def get_weight_names(config):
49224923
49234924 return weight_names
49244925
4926+ class PanguEmbeddedConverter (BaseConverter ):
4927+ MODEL_TYPE = ModelType .PanguEmbedded
4928+
4929+ @staticmethod
4930+ def dump_config (f , config , ggml_type ):
4931+ dump_llama_like_config (f , config , ggml_type )
4932+
4933+ config_values = [
4934+ config .num_key_value_heads ,
4935+ 1 if config .tie_word_embeddings else 0 ,
4936+ config .rope_theta ,
4937+ ]
4938+ f .write (struct .pack ("iif" , * config_values ))
4939+
4940+ @staticmethod
4941+ def get_weight_names (config ):
4942+ weight_names = ["model.embed_tokens.weight" ]
4943+ for i in range (config .num_hidden_layers ):
4944+
4945+ weight_names += [
4946+ f"model.layers.{ i } .input_layernorm.weight" ,
4947+ f"model.layers.{ i } .mlp.down_proj.weight" ,
4948+ f"model.layers.{ i } .mlp.gate_proj.weight" ,
4949+ f"model.layers.{ i } .mlp.up_proj.weight" ,
4950+ f"model.layers.{ i } .post_attention_layernorm.weight" ,
4951+ f"model.layers.{ i } .self_attn.k_proj.weight" ,
4952+ f"model.layers.{ i } .self_attn.k_proj.bias" ,
4953+ f"model.layers.{ i } .self_attn.q_proj.weight" ,
4954+ f"model.layers.{ i } .self_attn.q_proj.bias" ,
4955+ f"model.layers.{ i } .self_attn.v_proj.weight" ,
4956+ f"model.layers.{ i } .self_attn.v_proj.bias" ,
4957+ f"model.layers.{ i } .self_attn.o_proj.weight" ,
4958+ f"model.layers.{ i } .self_attn.o_proj.bias" ,
4959+ ]
4960+
4961+ weight_names += [
4962+ "model.norm.weight" ,
4963+ "lm_head.weight"
4964+ ]
4965+
4966+ if config .tie_word_embeddings :
4967+ weight_names = weight_names [:- 1 ]
4968+
4969+ return weight_names
4970+
49254971class QWen3Converter (BaseConverter ):
49264972 MODEL_TYPE = ModelType .QWen3
49274973
@@ -7807,6 +7853,8 @@ def main():
78077853 ERNIEMoEConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
78087854 elif arch == 'PanguProMoEForCausalLM' :
78097855 PanguMoEConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
7856+ elif arch == 'PanguEmbeddedForCausalLM' :
7857+ PanguEmbeddedConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
78107858 elif arch == 'JiutianForCausalLM' :
78117859 JiuTianConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
78127860 elif arch == 'deepseek-r1-distill-qwen3' :
0 commit comments