@@ -9904,7 +9904,9 @@ struct llm_build_mamba : public llm_graph_context {
99049904 // {n_embd, n_tokens}
99059905 inpL = build_inp_embd(model.tok_embd);
99069906
9907- auto * inp = build_rs_inp();
9907+ auto * rs_inp = build_rs_inp();
9908+
9909+ ggml_tensor * inp_out_ids = build_inp_out_ids();
99089910
99099911 for (int il = 0; il < n_layer; ++il) {
99109912 // norm
@@ -9914,14 +9916,12 @@ struct llm_build_mamba : public llm_graph_context {
99149916 cb(cur, "attn_norm", il);
99159917
99169918 if (model.arch == LLM_ARCH_MAMBA2) {
9917- cur = build_mamba2_layer(inp , gf, cur, model, ubatch, il);
9919+ cur = build_mamba2_layer(rs_inp , gf, cur, model, ubatch, il);
99189920 } else {
9919- cur = build_mamba_layer(inp , gf, cur, model, ubatch, il);
9921+ cur = build_mamba_layer(rs_inp , gf, cur, model, ubatch, il);
99209922 }
99219923
99229924 if (il == n_layer - 1) {
9923- // skip computing output for unused tokens
9924- ggml_tensor * inp_out_ids = build_inp_out_ids();
99259925 cur = ggml_get_rows(ctx0, cur, inp_out_ids);
99269926 inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
99279927 }
@@ -13550,7 +13550,6 @@ struct llm_build_arwkv7 : public llm_build_rwkv7_base {
1355013550};
1355113551
1355213552struct llm_build_granite : public llm_graph_context {
13553-
1355413553 llm_build_granite(
1355513554 const llama_model & model,
1355613555 const llm_graph_params & params,
0 commit comments