@@ -7672,7 +7672,7 @@ struct llm_build_modern_bert : public llm_graph_context {
76727672 ggml_tensor * ffn_up_view = model.layers[il].ffn_up;
76737673
76747674 if (ffn_gate_view == nullptr && ffn_up_view) {
7675-
7675+
76767676 // Case A: weight stored as (2*ffn, hidden) -> split rows into two (ffn x hidden)
76777677 if( ffn_up_view->ne[0] == 2 * n_ff and ffn_up_view->ne[1] == n_embd) {
76787678
@@ -7685,33 +7685,49 @@ struct llm_build_modern_bert : public llm_graph_context {
76857685 ffn_gate_view = ggml_view_2d(ctx0, model.layers[il].ffn_up,
76867686 /*ne0*/ n_ff, /*ne1*/ n_embd,
76877687 /*nb1*/ model.layers[il].ffn_up->nb[1],
7688+
76887689 /*offset_bytes*/ (size_t)n_ff * model.layers[il].ffn_up->nb[1]);
76897690 }
7691+
7692+ /*
76907693 else if ( ffn_up_view->ne[0] == n_embd && ffn_up_view->ne[1] == 2 * n_ff) {
76917694 // top half
7695+ LLAMA_LOG_INFO("Case B:\n");
76927696 ffn_up_view = ggml_view_2d(ctx0, model.layers[il].ffn_up,
76937697 n_embd, n_ff,
76947698 model.layers[il].ffn_up->nb[1],
76957699 0);
7700+
76967701 ffn_up_view = ggml_cont(ctx0, ffn_up_view);
76977702
76987703 ffn_gate_view = ggml_view_2d(ctx0, model.layers[il].ffn_up,
76997704 n_embd, n_ff,
77007705 model.layers[il].ffn_up->nb[1],
7701- n_ff * sizeof(float) );
7706+ n_ff * model.layers[il].ffn_up->nb[0] );
77027707 ffn_gate_view = ggml_cont(ctx0, ffn_gate_view);
77037708 }
7704-
7705- ggml_tensor * ffn_down_view = model.layers[il].ffn_down;
7706- LLAMA_LOG_INFO("ffn shapes: Up: {%lld, %lld}, Gate: {%lld, %lld}, Down: {%lld, %lld}",
7707- ffn_up_view->ne[0], ffn_up_view->ne[1], ffn_gate_view->ne[0], ffn_gate_view->ne[1], ffn_down_view->ne[0], ffn_down_view->ne[1]);
7708-
7709+ */
7710+ //ggml_tensor * ffn_down_view = model.layers[il].ffn_down;
7711+ //LLAMA_LOG_INFO("ffn shapes: Up: {%lld, %lld}, Gate: {%lld, %lld}, Down: {%lld, %lld}\n",
7712+ // ffn_up_view->ne[0], ffn_up_view->ne[1], ffn_gate_view->ne[0], ffn_gate_view->ne[1], ffn_down_view->ne[0], ffn_down_view->ne[1]);
7713+ /*
7714+ ggml_tensor * cur,
7715+ ggml_tensor * up,
7716+ ggml_tensor * up_b,
7717+ ggml_tensor * up_s,
7718+ ggml_tensor * gate,
7719+ ggml_tensor * gate_b,
7720+ ggml_tensor * gate_s,
7721+ ggml_tensor * down,
7722+ ggml_tensor * down_b,
7723+ ggml_tensor * down_s,
7724+ ggml_tensor * act_scales,*/
77097725 mlp_out = build_ffn(
77107726 h,
7711- model.layers[il].ffn_up, /*up_b*/ NULL, /*up_shexp*/ NULL,
7712- ffn_gate_view , /*gate_b*/ NULL, /*gate_shexp*/ NULL,
7727+ model.layers[il].ffn_up, /*up_b*/ NULL, /*up_shexp*/ NULL,
7728+ NULL , /*gate_b*/ NULL, /*gate_shexp*/ NULL,
77137729 model.layers[il].ffn_down, /*down_b*/ NULL, /*down_shexp*/ NULL,
7714- /*expert_scores */ NULL,
7730+ /*act_scales */ NULL,
77157731 LLM_FFN_GEGLU, LLM_FFN_PAR, il
77167732 );
77177733 cb(mlp_out, "ffn_out_geglu", il);
0 commit comments