@@ -6214,30 +6214,12 @@ struct llm_build_modern_bert : public llm_graph_context {
62146214 ggml_tensor * ffn_inp = cur;
62156215 cb(ffn_inp, "ffn_inp", il);
62166216
6217- // feed-forward network
6218- ggml_tensor * ffn_up = build_lora_mm(model.layers[il].ffn_up, cur);
6219- cb(ffn_up, "ffn_up", il);
6220-
6221- int64_t split_point = ffn_up->ne[0] / 2;
6222- ggml_tensor * output_ffn_up = ggml_cont(ctx0, ggml_view_2d(
6223- ctx0, ffn_up, split_point,
6224- ffn_up->ne[1], ffn_up->nb[1], 0
6225- ));
6226- ggml_tensor * output_ffn_gate = ggml_cont(ctx0, ggml_view_2d(
6227- ctx0, ffn_up, split_point,
6228- ffn_up->ne[1], ffn_up->nb[1],
6229- split_point * ggml_element_size(ffn_up)
6230- ));
6231-
6232- // Apply activation function
6233- output_ffn_up = ggml_gelu(ctx0, output_ffn_up);
6234-
6235- // Element-wise multiplication
6236- ggml_tensor * gated = ggml_mul(ctx0, output_ffn_up, output_ffn_gate);
6237- cb(gated, "ffn_gated", il);
6238-
6239- // Final projection
6240- cur = build_lora_mm(model.layers[il].ffn_down, gated);
6217+ cur = build_ffn(cur,
6218+ model.layers[il].ffn_up,
6219+ NULL, NULL, NULL, NULL, NULL,
6220+ model.layers[il].ffn_down,
6221+ NULL, NULL, NULL,
6222+ LLM_FFN_GEGLU, LLM_FFN_SEQ, il);
62416223
62426224 // attentions bypass the intermediate layer
62436225 cur = ggml_add(ctx0, cur, ffn_inp);
0 commit comments