Skip to content

Commit 10bcb31

Browse files
committed
support Pangu-embedded
1 parent 4ea1c01 commit 10bcb31

File tree

6 files changed

+104
-6
lines changed

6 files changed

+104
-6
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
1313

1414
**What's New:**
1515

16+
* 2025-08-05: Pangu-Embedded
1617
* 2025-07-29: Jiutian
1718
* 2025-07-10: SmolLM-3
1819
* 2025-07-05: Pangu-Pro-MoE

convert.py

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,8 @@ class ModelType(Enum):
200200

201201
ERNIE_MoE = 0x2500
202202

203-
PenguMoE = 0x2600
203+
PanguMoE = 0x2600
204+
PanguEmbedded = 0x2601
204205

205206
SmolLM3 = 0x2700
206207

@@ -4335,7 +4336,7 @@ def get_weight_names(config):
43354336
f"model.layers.{i}.self_attn.o_proj.weight",
43364337
f"model.layers.{i}.input_layernorm.weight",
43374338
f"model.layers.{i}.post_attention_layernorm.weight",
4338-
f"model.layers.{i}.mlp.down_proj.weight",
4339+
f"model.layers.{i}.mlp.down _roj.weight",
43394340
f"model.layers.{i}.mlp.up_proj.weight",
43404341
f"model.layers.{i}.mlp.gate_proj.weight",
43414342
]
@@ -4862,7 +4863,7 @@ def get_weight_names(config):
48624863
return weight_names
48634864

48644865
class PanguMoEConverter(BaseConverter):
4865-
MODEL_TYPE = ModelType.PenguMoE
4866+
MODEL_TYPE = ModelType.PanguMoE
48664867

48674868
@staticmethod
48684869
def dump_config(f, config, ggml_type):
@@ -4922,6 +4923,51 @@ def get_weight_names(config):
49224923

49234924
return weight_names
49244925

4926+
class PanguEmbeddedConverter(BaseConverter):
4927+
MODEL_TYPE = ModelType.PanguEmbedded
4928+
4929+
@staticmethod
4930+
def dump_config(f, config, ggml_type):
4931+
dump_llama_like_config(f, config, ggml_type)
4932+
4933+
config_values = [
4934+
config.num_key_value_heads,
4935+
1 if config.tie_word_embeddings else 0,
4936+
config.rope_theta,
4937+
]
4938+
f.write(struct.pack("iif", *config_values))
4939+
4940+
@staticmethod
4941+
def get_weight_names(config):
4942+
weight_names = ["model.embed_tokens.weight"]
4943+
for i in range(config.num_hidden_layers):
4944+
4945+
weight_names += [
4946+
f"model.layers.{i}.input_layernorm.weight",
4947+
f"model.layers.{i}.mlp.down_proj.weight",
4948+
f"model.layers.{i}.mlp.gate_proj.weight",
4949+
f"model.layers.{i}.mlp.up_proj.weight",
4950+
f"model.layers.{i}.post_attention_layernorm.weight",
4951+
f"model.layers.{i}.self_attn.k_proj.weight",
4952+
f"model.layers.{i}.self_attn.k_proj.bias",
4953+
f"model.layers.{i}.self_attn.q_proj.weight",
4954+
f"model.layers.{i}.self_attn.q_proj.bias",
4955+
f"model.layers.{i}.self_attn.v_proj.weight",
4956+
f"model.layers.{i}.self_attn.v_proj.bias",
4957+
f"model.layers.{i}.self_attn.o_proj.weight",
4958+
f"model.layers.{i}.self_attn.o_proj.bias",
4959+
]
4960+
4961+
weight_names += [
4962+
"model.norm.weight",
4963+
"lm_head.weight"
4964+
]
4965+
4966+
if config.tie_word_embeddings:
4967+
weight_names = weight_names[:-1]
4968+
4969+
return weight_names
4970+
49254971
class QWen3Converter(BaseConverter):
49264972
MODEL_TYPE = ModelType.QWen3
49274973

@@ -7807,6 +7853,8 @@ def main():
78077853
ERNIEMoEConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
78087854
elif arch == 'PanguProMoEForCausalLM':
78097855
PanguMoEConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
7856+
elif arch == 'PanguEmbeddedForCausalLM':
7857+
PanguEmbeddedConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
78107858
elif arch == 'JiutianForCausalLM':
78117859
JiuTianConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
78127860
elif arch == 'deepseek-r1-distill-qwen3':

docs/models.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,8 @@
204204
* [x] [Chat-14B](https://huggingface.co/OrionStarAI/Orion-14B-Chat)
205205

206206
* Pangu (`PanguProMoEForCausalLM`)
207-
* [x] [Pro-MoE](https://gitcode.com/ascend-tribe/pangu-pro-moe-model/tree/15e45a97fa314d86804f93f7faba107b43f8d25c)
207+
* [x] MoE: [Pro-MoE](https://gitcode.com/ascend-tribe/pangu-pro-moe-model/tree/15e45a97fa314d86804f93f7faba107b43f8d25c)
208+
* [x] Embedded: [7B](https://ai.gitcode.com/ascend-tribe/openpangu-embedded-7b-model/tree/754817a9fc1cc4df2687709b758448f80c2dd64c), [1B](https://ai.gitcode.com/ascend-tribe/openpangu-embedded-1b-model/tree/75dd659167a45d6577555d405edb75e0b88215c2)
208209

209210
* Phi (`PhiForCausalLM`, `Phi3ForCausalLM`)
210211
* [x] [Phi-2](https://huggingface.co/microsoft/phi-2/tree/eb8bbd1d37d258ea74fb082c53346d33056a83d4)

models/pangu.cpp

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ namespace chatllm::pangu::moe
1313
};
1414
static ChatHistoryEncoder _chat_encoder;
1515

16-
Tokenizer::Tokenizer(const Config &config)
16+
Tokenizer::Tokenizer(const BaseConfig &config)
1717
: Tokenizer(config, &_chat_encoder)
1818
{}
1919

@@ -175,4 +175,51 @@ namespace chatllm::pangu::moe
175175
}
176176

177177
REGISTER_MODEL_LOADER(PANGU_MOE, pangu::moe, 1);
178+
}
179+
180+
namespace chatllm::pangu::embedded
181+
{
182+
struct Config : public BaseConfig
183+
{
184+
int num_key_value_heads;
185+
int tie_word_embeddings;
186+
187+
float rope_theta;
188+
};
189+
190+
typedef moe::Tokenizer Tokenizer;
191+
192+
class ConditionalGeneration : public BaseModelForConditionalGeneration
193+
{
194+
public:
195+
typedef LMBlock1<RMSNorm, FullBiasedSelfAttention, RMSNorm, SiLUMLP> PanguDenseBlock;
196+
typedef Model<Config, Embedding, RMSNorm, PanguDenseBlock, int, int, int, int, int> ModelClass;
197+
198+
ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config)
199+
: BaseModelForConditionalGeneration(MODEL_TYPE_PANGU_EMBEDDED, config, runtime_config)
200+
{
201+
const size_t tensor_ovhd = ggml_tensor_overhead();
202+
const size_t num_tensors = 3 + config.num_hidden_layers * 16 + (config.tie_word_embeddings ? -1 : 0);
203+
const size_t ctx_size = num_tensors * tensor_ovhd;
204+
205+
w_ctx_.gctx = GGMLContext({.mem_size = ctx_size, .mem_buffer = nullptr, .no_alloc = true});
206+
w_ctx_.dtype = config.dtype;
207+
208+
transformer = new ModelClass(&w_ctx_, config,
209+
(0 == config.tie_word_embeddings) ? create_embedding<Embedding>(&w_ctx_, config) : nullptr,
210+
config.hidden_size, config.num_attention_heads,
211+
config.intermediate_size, config.num_key_value_heads,
212+
config.max_length);
213+
214+
for (int i = 0; i < config.num_hidden_layers; i++)
215+
{
216+
auto &layer = get_typed_transformer<ModelClass>()->layers[i];
217+
layer.attention.freq_base = config.rope_theta;
218+
}
219+
220+
w_ctx_.check_used_mem_size(true);
221+
}
222+
};
223+
224+
REGISTER_MODEL_LOADER(PANGU_EMBEDDED, pangu::embedded, 1);
178225
}

models/pangu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace chatllm::pangu::moe
1818
class Tokenizer : public BaseTokenizer
1919
{
2020
public:
21-
Tokenizer(const Config &config);
21+
Tokenizer(const BaseConfig &config);
2222
Tokenizer(const BaseConfig &config, BaseHistoryEncoder *encoder);
2323

2424
size_t load(tokenizer::DataReader *buffer, int n_vocab) override;

src/models_priv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ namespace chatllm
161161
MODEL_TYPE_ERNIE_MOE = 0x2500,
162162

163163
MODEL_TYPE_PANGU_MOE = 0x2600,
164+
MODEL_TYPE_PANGU_EMBEDDED = 0x2601,
164165

165166
MODEL_TYPE_SMOLLM3 = 0x2700,
166167

0 commit comments

Comments
 (0)