@@ -2177,6 +2177,15 @@ void llama_model::load_hparams(llama_model_loader & ml) {
21772177 default : type = LLM_TYPE_UNKNOWN;
21782178 }
21792179 } break ;
2180+ case LLM_ARCH_PANGU_EMBED:
2181+ {
2182+ ml.get_key (LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps );
2183+ switch (hparams.n_layer ) {
2184+ case 26 : type = LLM_TYPE_1B; break ; // openPangu-Embedded-1B-V1.1
2185+ case 34 : type = LLM_TYPE_7B; break ; // openPangu-Embedded-7B-V1.1
2186+ default : type = LLM_TYPE_UNKNOWN;
2187+ }
2188+ } break ;
21802189 default : throw std::runtime_error (" unsupported model architecture" );
21812190 }
21822191
@@ -6263,6 +6272,50 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
62636272 layer.visexp_ffn_up = create_tensor (tn (LLM_TENSOR_VISEXP_FFN_UP, " weight" , i), {n_embd, n_ff}, 0 );
62646273 }
62656274 } break ;
6275+ case LLM_ARCH_PANGU_EMBED:
6276+ {
6277+ tok_embd = create_tensor (tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab}, 0 );
6278+
6279+ // output
6280+ output_norm = create_tensor (tn (LLM_TENSOR_OUTPUT_NORM, " weight" ), {n_embd}, 0 );
6281+ output = create_tensor (tn (LLM_TENSOR_OUTPUT, " weight" ), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
6282+
6283+ // if output is NULL, init from the input tok embed
6284+ if (output == NULL ) {
6285+ output = create_tensor (tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab}, TENSOR_DUPLICATED);
6286+ }
6287+
6288+ for (int i = 0 ; i < n_layer; ++i) {
6289+ auto & layer = layers[i];
6290+
6291+ layer.attn_norm = create_tensor (tn (LLM_TENSOR_ATTN_NORM, " weight" , i), {n_embd}, 0 );
6292+
6293+ // weight tensors
6294+ layer.wq = create_tensor (tn (LLM_TENSOR_ATTN_Q, " weight" , i), {n_embd, n_embd_head_k * n_head}, 0 );
6295+ layer.wk = create_tensor (tn (LLM_TENSOR_ATTN_K, " weight" , i), {n_embd, n_embd_k_gqa}, 0 );
6296+ layer.wv = create_tensor (tn (LLM_TENSOR_ATTN_V, " weight" , i), {n_embd, n_embd_v_gqa}, 0 );
6297+ layer.wo = create_tensor (tn (LLM_TENSOR_ATTN_OUT, " weight" , i), {n_embd_head_k * n_head, n_embd}, 0 );
6298+
6299+ // bias tensors
6300+ layer.bq = create_tensor (tn (LLM_TENSOR_ATTN_Q, " bias" , i), {n_embd_head_k * n_head}, 0 );
6301+ layer.bk = create_tensor (tn (LLM_TENSOR_ATTN_K, " bias" , i), {n_embd_gqa}, 0 );
6302+ layer.bv = create_tensor (tn (LLM_TENSOR_ATTN_V, " bias" , i), {n_embd_gqa}, 0 );
6303+ layer.bo = create_tensor (tn (LLM_TENSOR_ATTN_OUT, " bias" , i), {n_embd}, 0 );
6304+
6305+ layer.ffn_norm = create_tensor (tn (LLM_TENSOR_FFN_NORM, " weight" , i), {n_embd}, 0 );
6306+
6307+ if (hparams.rope_scaling_type_train == LLAMA_ROPE_SCALING_TYPE_LONGROPE) {
6308+ layer.rope_long = create_tensor (tn (LLM_TENSOR_ROPE_FACTORS_LONG, " weight" , i), {n_rot/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0 ));
6309+ layer.rope_short = create_tensor (tn (LLM_TENSOR_ROPE_FACTORS_SHORT, " weight" , i), {n_rot/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0 ));
6310+ } else {
6311+ layer.rope_freqs = create_tensor (tn (LLM_TENSOR_ROPE_FREQS, " weight" , i), {n_rot/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0 ));
6312+ }
6313+
6314+ layer.ffn_gate = create_tensor (tn (LLM_TENSOR_FFN_GATE, " weight" , i), {n_embd, n_ff}, 0 );
6315+ layer.ffn_down = create_tensor (tn (LLM_TENSOR_FFN_DOWN, " weight" , i), { n_ff, n_embd}, 0 );
6316+ layer.ffn_up = create_tensor (tn (LLM_TENSOR_FFN_UP, " weight" , i), {n_embd, n_ff}, 0 );
6317+ }
6318+ } break ;
62666319 default :
62676320 throw std::runtime_error (" unknown architecture" );
62686321 }
@@ -7260,6 +7313,10 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
72607313 {
72617314 llm = std::make_unique<llm_build_cogvlm>(*this , params);
72627315 } break ;
7316+ case LLM_ARCH_PANGU_EMBED:
7317+ {
7318+ llm = std::make_unique<llm_build_pangu_embedded>(*this , params);
7319+ }break ;
72637320 default :
72647321 GGML_ABORT (" fatal error" );
72657322 }
@@ -7479,6 +7536,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
74797536 case LLM_ARCH_APERTUS:
74807537 case LLM_ARCH_MINIMAX_M2:
74817538 case LLM_ARCH_COGVLM:
7539+ case LLM_ARCH_PANGU_EMBED:
74827540 return LLAMA_ROPE_TYPE_NEOX;
74837541
74847542 case LLM_ARCH_QWEN2VL:
0 commit comments