Skip to content

Commit f95b04a

Browse files
committed
model : fix order kvq -> qkv
ggml-ci
1 parent 2eacb4c commit f95b04a

File tree

4 files changed

+56
-55
lines changed

4 files changed

+56
-55
lines changed

src/llama-context.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2572,9 +2572,9 @@ ggml_tensor * llama_context_kv_self::build_attn(
25722572
ggml_cgraph * gf,
25732573
ggml_tensor * wo,
25742574
ggml_tensor * wo_b,
2575+
ggml_tensor * q_cur,
25752576
ggml_tensor * k_cur,
25762577
ggml_tensor * v_cur,
2577-
ggml_tensor * q_cur,
25782578
int32_t n_tokens,
25792579
float kq_scale,
25802580
int il,
@@ -2617,9 +2617,6 @@ ggml_tensor * llama_context_kv_self::build_attn(
26172617
ggml_build_forward_expand(gf, ggml_cpy(ctx0, v_cur, v_cache_view));
26182618
}
26192619

2620-
const auto & n_embd_head_k = hparams.n_embd_head_k;
2621-
const auto & n_embd_head_v = hparams.n_embd_head_v;
2622-
26232620
// TODO: improve
26242621
bool is_sliding = false;
26252622

@@ -2648,8 +2645,11 @@ ggml_tensor * llama_context_kv_self::build_attn(
26482645

26492646
const auto n_kv = worst_case ? kv_self.size : kv_self.n;
26502647

2651-
const int64_t n_head = hparams.n_head(il);
2652-
const int64_t n_head_kv = hparams.n_head_kv(il);
2648+
const int64_t n_head = hparams.n_head(il);
2649+
const int64_t n_head_kv = hparams.n_head_kv(il);
2650+
2651+
const auto & n_embd_head_k = hparams.n_embd_head_k;
2652+
const auto & n_embd_head_v = hparams.n_embd_head_v;
26532653

26542654
struct ggml_tensor * q = ggml_permute(ctx0, q_cur, 0, 2, 1, 3);
26552655
//cb(q, "q", il);

src/llama-context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,9 +381,9 @@ class llama_context_kv_self : public llama_context {
381381
ggml_cgraph * gf,
382382
ggml_tensor * wo,
383383
ggml_tensor * wo_b,
384+
ggml_tensor * q_cur,
384385
ggml_tensor * k_cur,
385386
ggml_tensor * v_cur,
386-
ggml_tensor * q_cur,
387387
int32_t n_tokens,
388388
float kq_scale,
389389
int il,

src/llama-graph.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ class llama_graph_i {
9393
ggml_cgraph * gf,
9494
ggml_tensor * wo,
9595
ggml_tensor * wo_b,
96+
ggml_tensor * q_cur,
9697
ggml_tensor * k_cur,
9798
ggml_tensor * v_cur,
98-
ggml_tensor * q_cur,
9999
int32_t n_tokens,
100100
float kq_scale,
101101
int il,

0 commit comments

Comments
 (0)