@@ -8367,7 +8367,6 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
83678367
83688368 for (int il = 0; il < n_layer; ++il) {
83698369 ggml_tensor * inpSA = inpL;
8370-
83718370 // norm
83728371 {
83738372 cur = build_norm(inpL,
@@ -8404,15 +8403,17 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
84048403 Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
84058404 Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
84068405
8406+ const float freq_base_l = model.get_rope_freq_base (cparams, il);
8407+ const float freq_scale_l = model.get_rope_freq_scale(cparams, il);
84078408 Qcur = ggml_rope_ext(
84088409 ctx0, Qcur, inp_pos, nullptr,
8409- n_rot, rope_type, n_ctx_orig, freq_base, freq_scale ,
8410+ n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l ,
84108411 ext_factor, attn_factor, beta_fast, beta_slow
84118412 );
84128413
84138414 Kcur = ggml_rope_ext(
84148415 ctx0, Kcur, inp_pos, nullptr,
8415- n_rot, rope_type, n_ctx_orig, freq_base, freq_scale ,
8416+ n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l ,
84168417 ext_factor, attn_factor, beta_fast, beta_slow
84178418 );
84188419
@@ -8435,7 +8436,7 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
84358436 cb(ffn_inp, "ffn_inp", il);
84368437
84378438 // feed-forward network
8438- bool is_moe_layer = arch == LLM_ARCH_ERNIE4_5_MOE && hparams.n_moe_layer_step > 0 && ( il + 1) % hparams.n_moe_layer_step == 0 ;
8439+ bool is_moe_layer = arch == LLM_ARCH_ERNIE4_5_MOE && hparams.n_moe_layer_step > 0 && il >= hparams.n_moe_layer_step;
84398440
84408441 if (!is_moe_layer) {
84418442 cur = build_norm(ffn_inp,
@@ -16828,6 +16829,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1682816829 case LLM_ARCH_SMOLLM3:
1682916830 case LLM_ARCH_ARCEE:
1683016831 case LLM_ARCH_ERNIE4_5:
16832+ case LLM_ARCH_ERNIE4_5_MOE:
1683116833 return LLAMA_ROPE_TYPE_NORM;
1683216834
1683316835 // the pairs of head values are offset by n_rot/2
0 commit comments