Skip to content

Commit 6958888

Browse files
committed
fix: Use per-layer sizes in granite build_attention_layer
Also no need to pass in kv cache since it's already in the inp_attn Branch: GraniteFour Signed-off-by: Gabe Goodhart <[email protected]>
1 parent 52cd6d1 commit 6958888

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

src/llama-model.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12749,9 +12749,9 @@ struct llm_build_granite : public llm_graph_context {
1274912749
self->cb(Vcur, "Vcur", il);
1275012750
}
1275112751

12752-
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, self->n_head, self->n_tokens);
12753-
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, self->n_head_kv, self->n_tokens);
12754-
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, self->n_head_kv, self->n_tokens);
12752+
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, self->hparams.n_head(il), self->n_tokens);
12753+
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, self->hparams.n_head_kv(il), self->n_tokens);
12754+
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, self->hparams.n_head_kv(il), self->n_tokens);
1275512755

1275612756
if (use_rope) {
1275712757
ggml_tensor * rope_factors = model.get_rope_factors(self->cparams, il);

0 commit comments

Comments
 (0)