Skip to content

Commit 1d801d2

Browse files
committed
graph : update attn/kv_self names
1 parent 8280645 commit 1d801d2

File tree

4 files changed

+17
-17
lines changed

4 files changed

+17
-17
lines changed

src/llama-context.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,7 +2491,7 @@ void llama_context_kv_self::kv_self_update() {
24912491

24922492
ggml_cgraph * gf = ggml_new_graph_custom(ctx0, model.max_nodes(), false);
24932493

2494-
build_k_shift(ctx0, gf);
2494+
build_kv_self_shift(ctx0, gf);
24952495

24962496
ggml_backend_sched_alloc_graph(sched.get(), gf);
24972497

@@ -2520,7 +2520,7 @@ void llama_context_kv_self::kv_self_update() {
25202520

25212521
ggml_cgraph * gf = ggml_new_graph_custom(ctx0, model.max_nodes(), false);
25222522

2523-
build_defrag(ctx0, gf);
2523+
build_kv_self_defrag(ctx0, gf);
25242524

25252525
ggml_backend_sched_alloc_graph(sched.get(), gf);
25262526

@@ -2762,7 +2762,7 @@ ggml_tensor * llama_context_kv_self::build_attn_qkv(
27622762
return cur;
27632763
}
27642764

2765-
ggml_tensor * llama_context_kv_self::build_soft_max_ext(
2765+
ggml_tensor * llama_context_kv_self::build_attn_soft_max(
27662766
ggml_context * ctx0,
27672767
ggml_tensor * kq,
27682768
float kq_scale) {
@@ -2771,7 +2771,7 @@ ggml_tensor * llama_context_kv_self::build_soft_max_ext(
27712771
return ggml_soft_max_ext(ctx0, kq, inp_KQ_mask_cnv, kq_scale, hparams.f_max_alibi_bias);
27722772
}
27732773

2774-
void llama_context_kv_self::build_k_shift(
2774+
void llama_context_kv_self::build_kv_self_shift(
27752775
ggml_context * ctx0,
27762776
ggml_cgraph * graph) {
27772777
const auto & n_ctx = cparams.n_ctx;
@@ -2843,7 +2843,7 @@ void llama_context_kv_self::build_k_shift(
28432843
}
28442844
}
28452845

2846-
void llama_context_kv_self::build_defrag(
2846+
void llama_context_kv_self::build_kv_self_defrag(
28472847
ggml_context * ctx0,
28482848
ggml_cgraph * graph) {
28492849
const auto & hparams = model.hparams;
@@ -2860,7 +2860,7 @@ void llama_context_kv_self::build_defrag(
28602860
// number of cells moved
28612861
uint32_t n_moves = 0;
28622862

2863-
// each move requires 6*n_layer tensors (see build_defrag)
2863+
// each move requires 6*n_layer tensors (see build_kv_self_defrag)
28642864
// - source view, destination view, copy operation
28652865
// - x2 for keys and values
28662866
//const uint32_t max_moves = model.max_nodes()/(6*n_layer);

src/llama-context.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,17 +379,17 @@ class llama_context_kv_self : public llama_context {
379379
int il,
380380
bool worst_case) override;
381381

382-
virtual ggml_tensor * build_soft_max_ext(
382+
virtual ggml_tensor * build_attn_soft_max(
383383
ggml_context * ctx0,
384384
ggml_tensor * kq,
385385
float kq_scale) override;
386386

387-
virtual void build_k_shift(
387+
virtual void build_kv_self_shift(
388388
ggml_context * ctx0,
389389
ggml_cgraph * graph) override;
390390

391391
// find holes from the beginning of the KV cache and fill them by moving data from the end of the cache
392-
virtual void build_defrag(
392+
virtual void build_kv_self_defrag(
393393
ggml_context * ctx0,
394394
ggml_cgraph * graph) override;
395395

src/llama-graph.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,17 +92,17 @@ class llama_graph_i {
9292
int il,
9393
bool worst_case) = 0;
9494

95-
virtual ggml_tensor * build_soft_max_ext(
95+
virtual ggml_tensor * build_attn_soft_max(
9696
ggml_context * ctx0,
9797
ggml_tensor * kq,
9898
float kq_scale) = 0;
9999

100-
virtual void build_k_shift(
100+
virtual void build_kv_self_shift(
101101
ggml_context * ctx0,
102102
ggml_cgraph * graph) = 0;
103103

104104
// find holes from the beginning of the KV cache and fill them by moving data from the end of the cache
105-
virtual void build_defrag(
105+
virtual void build_kv_self_defrag(
106106
ggml_context * ctx0,
107107
ggml_cgraph * graph) = 0;
108108

src/llama-model.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4251,18 +4251,18 @@ struct llm_build_context {
42514251
return cur;
42524252
}
42534253

4254-
struct ggml_cgraph * build_k_shift() {
4254+
struct ggml_cgraph * build_kv_self_shift() {
42554255
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, model.max_nodes(), false);
42564256

4257-
lgf.build_k_shift(ctx0, gf);
4257+
lgf.build_kv_self_shift(ctx0, gf);
42584258

42594259
return gf;
42604260
}
42614261

4262-
struct ggml_cgraph * build_defrag() {
4262+
struct ggml_cgraph * build_kv_self_defrag() {
42634263
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, model.max_nodes(), false);
42644264

4265-
lgf.build_defrag(ctx0, gf);
4265+
lgf.build_kv_self_defrag(ctx0, gf);
42664266

42674267
return gf;
42684268
}
@@ -5638,7 +5638,7 @@ struct llm_build_context {
56385638
cb(kq, "kq", il);
56395639

56405640
//kq = ggml_soft_max_ext(ctx0, kq, KQ_mask, 1.0f/sqrtf(float(n_embd_head)), hparams.f_max_alibi_bias);
5641-
kq = lgf.build_soft_max_ext(ctx0, kq, 1.0f/sqrtf(float(n_embd_head)));
5641+
kq = lgf.build_attn_soft_max(ctx0, kq, 1.0f/sqrtf(float(n_embd_head)));
56425642
cb(kq, "kq_soft_max_ext", il);
56435643

56445644
struct ggml_tensor * v = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_reshape_2d(ctx0, Vcur, n_embd_gqa, n_tokens)));

0 commit comments

Comments
 (0)