@@ -9921,7 +9921,7 @@ struct llm_build_mamba : public llm_graph_context {
99219921 cur = build_mamba_layer(rs_inp, gf, cur, model, ubatch, il);
99229922 }
99239923
9924- if (il == n_layer - 1) {
9924+ if (il == n_layer - 1 && inp_out_ids ) {
99259925 cur = ggml_get_rows(ctx0, cur, inp_out_ids);
99269926 inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
99279927 }
@@ -13785,6 +13785,8 @@ struct llm_build_granite_hybrid : public llm_graph_context {
1378513785
1378613786 auto * inp = build_inp_mem_hybrid();
1378713787
13788+ ggml_tensor * inp_out_ids = build_inp_out_ids();
13789+
1378813790 // Positional embeddings populated if rope enabled
1378913791 ggml_tensor * inp_pos = nullptr;
1379013792 if (use_rope) {
@@ -13810,9 +13812,7 @@ struct llm_build_granite_hybrid : public llm_graph_context {
1381013812 n_embd_head, use_rope, il);
1381113813 }
1381213814
13813- if (il == n_layer - 1) {
13814- // skip computing output for unused tokens
13815- ggml_tensor * inp_out_ids = build_inp_out_ids();
13815+ if (il == n_layer - 1 && inp_out_ids) {
1381613816 cur = ggml_get_rows(ctx0, cur, inp_out_ids);
1381713817 inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
1381813818 }
0 commit comments