@@ -15239,7 +15239,7 @@ struct llm_build_falcon_h1 : public llm_graph_context {
1523915239 cb(Kcur, "Kcur-post-rope", il);
1524015240 cb(Vcur, "Vcur-post-rope", il);
1524115241
15242- ggml_tensor * attn_out = build_attn(inp, gf,
15242+ ggml_tensor * attn_out = build_attn(inp->get_attn() , gf,
1524315243 model.layers[il].wo, NULL,
1524415244 Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
1524515245 cb(attn_out, "attn_out", il);
@@ -15334,7 +15334,7 @@ struct llm_build_falcon_h1 : public llm_graph_context {
1533415334 ggml_tensor * conv_states_all = kv_state->get_r_l(il);
1533515335 ggml_tensor * ssm_states_all = kv_state->get_s_l(il);
1533615336
15337- ggml_tensor * conv = build_rs(inp, gf, conv_states_all, hparams.n_embd_r(), n_seqs);
15337+ ggml_tensor * conv = build_rs(inp->get_recr() , gf, conv_states_all, hparams.n_embd_r(), n_seqs);
1533815338 conv = ggml_reshape_3d(ctx0, conv, d_conv - 1, d_inner + 2*n_group*d_state, n_seqs);
1533915339
1534015340 // {n_embd, n_tokens} => {n_embd, n_seq_tokens, n_seqs}
@@ -15407,7 +15407,7 @@ struct llm_build_falcon_h1 : public llm_graph_context {
1540715407 return ggml_ssm_scan(ctx, ssm, x, dt, A, B, C, ids);
1540815408 };
1540915409
15410- ggml_tensor * y_ssm = build_rs(inp, gf, ssm_states_all, hparams.n_embd_s(), ubatch.n_seqs, get_ssm_rows);
15410+ ggml_tensor * y_ssm = build_rs(inp->get_recr() , gf, ssm_states_all, hparams.n_embd_s(), ubatch.n_seqs, get_ssm_rows);
1541115411
1541215412 // store last states
1541315413 ggml_build_forward_expand(gf,
0 commit comments