graph : update attn/kv_self names

ggerganov · ggerganov · commit 1d801d27b9b9 · 2025-02-14T17:22:55.000+02:00
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -2491,7 +2491,7 @@ void llama_context_kv_self::kv_self_update() {
 
             ggml_cgraph * gf = ggml_new_graph_custom(ctx0, model.max_nodes(), false);
 
-            build_k_shift(ctx0, gf);
+            build_kv_self_shift(ctx0, gf);
 
             ggml_backend_sched_alloc_graph(sched.get(), gf);
 
@@ -2520,7 +2520,7 @@ void llama_context_kv_self::kv_self_update() {
 
         ggml_cgraph * gf = ggml_new_graph_custom(ctx0, model.max_nodes(), false);
 
-        build_defrag(ctx0, gf);
+        build_kv_self_defrag(ctx0, gf);
 
         ggml_backend_sched_alloc_graph(sched.get(), gf);
 
@@ -2762,7 +2762,7 @@ ggml_tensor * llama_context_kv_self::build_attn_qkv(
     return cur;
 }
 
-ggml_tensor * llama_context_kv_self::build_soft_max_ext(
+ggml_tensor * llama_context_kv_self::build_attn_soft_max(
         ggml_context * ctx0,
          ggml_tensor * kq,
              float     kq_scale) {
@@ -2771,7 +2771,7 @@ ggml_tensor * llama_context_kv_self::build_soft_max_ext(
     return ggml_soft_max_ext(ctx0, kq, inp_KQ_mask_cnv, kq_scale, hparams.f_max_alibi_bias);
 }
 
-void llama_context_kv_self::build_k_shift(
+void llama_context_kv_self::build_kv_self_shift(
         ggml_context * ctx0,
          ggml_cgraph * graph) {
     const auto & n_ctx      = cparams.n_ctx;
@@ -2843,7 +2843,7 @@ void llama_context_kv_self::build_k_shift(
     }
 }
 
-void llama_context_kv_self::build_defrag(
+void llama_context_kv_self::build_kv_self_defrag(
         ggml_context * ctx0,
          ggml_cgraph * graph) {
     const auto & hparams = model.hparams;
@@ -2860,7 +2860,7 @@ void llama_context_kv_self::build_defrag(
     // number of cells moved
     uint32_t n_moves = 0;
 
-    // each move requires 6*n_layer tensors (see build_defrag)
+    // each move requires 6*n_layer tensors (see build_kv_self_defrag)
     //   - source view, destination view, copy operation
     //   - x2 for keys and values
     //const uint32_t max_moves = model.max_nodes()/(6*n_layer);
diff --git a/src/llama-context.h b/src/llama-context.h
@@ -379,17 +379,17 @@ class llama_context_kv_self : public llama_context {
                  int       il,
                  bool      worst_case) override;
 
-    virtual ggml_tensor * build_soft_max_ext(
+    virtual ggml_tensor * build_attn_soft_max(
             ggml_context * ctx0,
              ggml_tensor * kq,
                  float     kq_scale) override;
 
-    virtual void build_k_shift(
+    virtual void build_kv_self_shift(
             ggml_context * ctx0,
              ggml_cgraph * graph) override;
 
     // find holes from the beginning of the KV cache and fill them by moving data from the end of the cache
-    virtual void build_defrag(
+    virtual void build_kv_self_defrag(
             ggml_context * ctx0,
              ggml_cgraph * graph) override;
 
diff --git a/src/llama-graph.h b/src/llama-graph.h
@@ -92,17 +92,17 @@ class llama_graph_i {
                  int       il,
                  bool      worst_case) = 0;
 
-    virtual ggml_tensor * build_soft_max_ext(
+    virtual ggml_tensor * build_attn_soft_max(
             ggml_context * ctx0,
              ggml_tensor * kq,
                  float     kq_scale) = 0;
 
-    virtual void build_k_shift(
+    virtual void build_kv_self_shift(
             ggml_context * ctx0,
              ggml_cgraph * graph) = 0;
 
     // find holes from the beginning of the KV cache and fill them by moving data from the end of the cache
-    virtual void build_defrag(
+    virtual void build_kv_self_defrag(
             ggml_context * ctx0,
              ggml_cgraph * graph) = 0;
 
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -4251,18 +4251,18 @@ struct llm_build_context {
         return cur;
     }
 
-    struct ggml_cgraph * build_k_shift() {
+    struct ggml_cgraph * build_kv_self_shift() {
         struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, model.max_nodes(), false);
 
-        lgf.build_k_shift(ctx0, gf);
+        lgf.build_kv_self_shift(ctx0, gf);
 
         return gf;
     }
 
-    struct ggml_cgraph * build_defrag() {
+    struct ggml_cgraph * build_kv_self_defrag() {
         struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, model.max_nodes(), false);
 
-        lgf.build_defrag(ctx0, gf);
+        lgf.build_kv_self_defrag(ctx0, gf);
 
         return gf;
     }
@@ -5638,7 +5638,7 @@ struct llm_build_context {
             cb(kq, "kq", il);
 
             //kq = ggml_soft_max_ext(ctx0, kq, KQ_mask, 1.0f/sqrtf(float(n_embd_head)), hparams.f_max_alibi_bias);
-            kq = lgf.build_soft_max_ext(ctx0, kq, 1.0f/sqrtf(float(n_embd_head)));
+            kq = lgf.build_attn_soft_max(ctx0, kq, 1.0f/sqrtf(float(n_embd_head)));
             cb(kq, "kq_soft_max_ext", il);
 
             struct ggml_tensor * v = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_reshape_2d(ctx0, Vcur, n_embd_gqa, n_tokens)));