refacor: _recurrent -> _recr for brevity

gabe-l-hart · gabe-l-hart · commit 6403f192137b · 2025-06-18T10:19:21.000-06:00
It just _happens_ to have the same number of letters as _attn!

Branch: HybridRecurrentCache

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
@@ -409,15 +409,15 @@ void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) {
         mem_state->get_state_attn()->set_input_kq_mask(self_kq_mask, ubatch, cparams.causal_attn);
     }
 
-    const int64_t n_rs = mem_state->get_state_recurrent()->get_n_rs();
+    const int64_t n_rs = mem_state->get_state_recr()->get_n_rs();
 
     if (s_copy) {
         GGML_ASSERT(ggml_backend_buffer_is_host(s_copy->buffer));
         int32_t * data = (int32_t *) s_copy->data;
 
         // assuming copy destinations ALWAYS happen ONLY on the cells between head and head+n
         for (uint32_t i = 0; i < n_rs; ++i) {
-            data[i] = mem_state->get_state_recurrent()->s_copy(i);
+            data[i] = mem_state->get_state_recr()->s_copy(i);
         }
     }
 }
@@ -1067,7 +1067,7 @@ llm_graph_input_mem_hybrid * llm_graph_context::build_inp_mem_hybrid() const {
     }
 
     {
-        const auto n_rs = mem_state->get_state_recurrent()->get_n_rs();
+        const auto n_rs = mem_state->get_state_recr()->get_n_rs();
 
         inp->s_copy = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_rs);
         ggml_set_input(inp->s_copy);
@@ -1584,7 +1584,7 @@ ggml_tensor * llm_graph_context::build_rs(
             int32_t   state_size,
             int32_t   n_seqs,
                bool   avoid_copies) const {
-    const auto * kv_state = static_cast<const llama_memory_hybrid_state *>(mstate)->get_state_recurrent();
+    const auto * kv_state = static_cast<const llama_memory_hybrid_state *>(mstate)->get_state_recr();
 
     return build_rs(gf, s, inp->s_copy, state_size, n_seqs, kv_state->get_n_rs(), kv_state->get_head(), kv_state->get_size(), kv_state->get_rs_z(), avoid_copies);
 }
diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp
@@ -27,7 +27,7 @@ llama_memory_hybrid::llama_memory_hybrid(
                  bool    offload,
                          /* layer filters */
       layer_filter_cb && filter_attn,
-      layer_filter_cb && filter_recurrent) :
+      layer_filter_cb && filter_recr) :
     hparams(model.hparams),
     mem_attn(new llama_kv_cache_unified(
         model,
@@ -44,11 +44,11 @@ llama_memory_hybrid::llama_memory_hybrid(
         n_swa,
         swa_type
     )),
-    mem_recurrent(new llama_memory_recurrent(
+    mem_recr(new llama_memory_recurrent(
         model,
-        filter_recurrent == nullptr ?
+        filter_recr == nullptr ?
             [&](int32_t il) { return model.hparams.recurrent_layer(il); }
-            : filter_recurrent,
+            : filter_recr,
         type_r,
         type_s,
         offload,
@@ -77,7 +77,7 @@ llama_memory_state_ptr llama_memory_hybrid::init_batch(const llama_batch & batch
     }
 
     // prepare the recurrent batches first
-    if (!mem_recurrent->prepare(ubatches)) {
+    if (!mem_recr->prepare(ubatches)) {
         // TODO: will the recurrent cache be in an undefined state at this point?
         LLAMA_LOG_ERROR("%s: failed to prepare recurrent ubatches\n", __func__);
         return std::make_unique<llama_memory_hybrid_state>(LLAMA_MEMORY_STATUS_FAILED_PREPARE);
@@ -108,82 +108,82 @@ bool llama_memory_hybrid::get_can_shift() const {
 }
 
 void llama_memory_hybrid::clear(bool data) {
-    mem_attn     ->clear(data);
-    mem_recurrent->clear(data);
+    mem_attn->clear(data);
+    mem_recr->clear(data);
 }
 
 bool llama_memory_hybrid::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1) {
     // Try removing from the recurrent cache first since it may fail. If it does
     // fail, the cache will not have been mutated.
-    if (!mem_recurrent->seq_rm(seq_id, p0, p1)) {
+    if (!mem_recr->seq_rm(seq_id, p0, p1)) {
         return false;
     }
     return mem_attn->seq_rm(seq_id, p0, p1);
 }
 
 void llama_memory_hybrid::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_id_dst, llama_pos p0, llama_pos p1) {
-    mem_attn     ->seq_cp(seq_id_src, seq_id_dst, p0, p1);
-    mem_recurrent->seq_cp(seq_id_src, seq_id_dst, p0, p1);
+    mem_attn->seq_cp(seq_id_src, seq_id_dst, p0, p1);
+    mem_recr->seq_cp(seq_id_src, seq_id_dst, p0, p1);
 }
 
 void llama_memory_hybrid::seq_keep(llama_seq_id seq_id) {
-    mem_attn     ->seq_keep(seq_id);
-    mem_recurrent->seq_keep(seq_id);
+    mem_attn->seq_keep(seq_id);
+    mem_recr->seq_keep(seq_id);
 }
 
 void llama_memory_hybrid::seq_add(llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos shift) {
     mem_attn->seq_add(seq_id, p0, p1, shift);
-    mem_recurrent->seq_add(seq_id, p0, p1, shift);
+    mem_recr->seq_add(seq_id, p0, p1, shift);
 }
 
 void llama_memory_hybrid::seq_div(llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) {
-    mem_attn     ->seq_div(seq_id, p0, p1, d);
-    mem_recurrent->seq_div(seq_id, p0, p1, d);
+    mem_attn->seq_div(seq_id, p0, p1, d);
+    mem_recr->seq_div(seq_id, p0, p1, d);
 }
 
 llama_pos llama_memory_hybrid::seq_pos_min(llama_seq_id seq_id) const {
     // the min of the total cache is the max of the two caches' min values
-    return std::max(mem_attn->seq_pos_min(seq_id), mem_recurrent->seq_pos_min(seq_id));
+    return std::max(mem_attn->seq_pos_min(seq_id), mem_recr->seq_pos_min(seq_id));
 }
 
 llama_pos llama_memory_hybrid::seq_pos_max(llama_seq_id seq_id) const {
     // the max of the total cache is the min of the two caches' max values
-    return std::min(mem_attn->seq_pos_max(seq_id), mem_recurrent->seq_pos_max(seq_id));
+    return std::min(mem_attn->seq_pos_max(seq_id), mem_recr->seq_pos_max(seq_id));
 }
 
 void llama_memory_hybrid::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
-    mem_attn     ->state_write(io, seq_id);
-    mem_recurrent->state_write(io, seq_id);
+    mem_attn->state_write(io, seq_id);
+    mem_recr->state_write(io, seq_id);
 }
 
 void llama_memory_hybrid::state_read(llama_io_read_i & io, llama_seq_id seq_id) {
-    mem_attn     ->state_read(io, seq_id);
-    mem_recurrent->state_read(io, seq_id);
+    mem_attn->state_read(io, seq_id);
+    mem_recr->state_read(io, seq_id);
 }
 
 llama_kv_cache_unified * llama_memory_hybrid::get_mem_attn() const {
     return mem_attn.get();
 }
 
-llama_memory_recurrent * llama_memory_hybrid::get_mem_recurrent() const {
-    return mem_recurrent.get();
+llama_memory_recurrent * llama_memory_hybrid::get_mem_recr() const {
+    return mem_recr.get();
 }
 
 llama_memory_hybrid_state::llama_memory_hybrid_state(llama_memory_status status) : status(status) {}
 
 llama_memory_hybrid_state::llama_memory_hybrid_state(llama_memory_hybrid * mem) :
-    state_attn     (mem->get_mem_attn     ()->init_full()),
-    state_recurrent(mem->get_mem_recurrent()->init_full()),
-    status(llama_memory_status_combine(state_attn->get_status(), state_recurrent->get_status())) {
+    state_attn(mem->get_mem_attn()->init_full()),
+    state_recr(mem->get_mem_recr()->init_full()),
+    status(llama_memory_status_combine(state_attn->get_status(), state_recr->get_status())) {
 }
 
 llama_memory_hybrid_state::llama_memory_hybrid_state(
         llama_memory_hybrid * mem,
               llama_context * lctx,
                        bool   optimize) :
-    state_attn     (mem->get_mem_attn     ()->init_update(lctx, optimize)),
-    state_recurrent(mem->get_mem_recurrent()->init_update(lctx, optimize)),
-    status(llama_memory_status_combine(state_attn->get_status(), state_recurrent->get_status())) {
+    state_attn(mem->get_mem_attn()->init_update(lctx, optimize)),
+    state_recr(mem->get_mem_recr()->init_update(lctx, optimize)),
+    status(llama_memory_status_combine(state_attn->get_status(), state_recr->get_status())) {
 }
 
 llama_memory_hybrid_state::llama_memory_hybrid_state(
@@ -194,16 +194,16 @@ llama_memory_hybrid_state::llama_memory_hybrid_state(
     sbatch(std::move(sbatch)),
     ubatches(std::move(ubatches)),
     // note: here we copy the ubatches. not sure if this is ideal
-    state_attn     (new llama_kv_cache_unified_state(mem->get_mem_attn(),      {}, std::move(heads_attn), this->ubatches)),
-    state_recurrent(new llama_memory_recurrent_state(mem->get_mem_recurrent(), {},                        this->ubatches)),
+    state_attn(new llama_kv_cache_unified_state(mem->get_mem_attn(), {}, std::move(heads_attn), this->ubatches)),
+    state_recr(new llama_memory_recurrent_state(mem->get_mem_recr(), {},                        this->ubatches)),
     status(LLAMA_MEMORY_STATUS_SUCCESS) {
 }
 
 bool llama_memory_hybrid_state::next() {
     assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
 
-    state_attn     ->next();
-    state_recurrent->next();
+    state_attn->next();
+    state_recr->next();
 
     if (++i_next >= ubatches.size()) {
         return false;
@@ -217,8 +217,8 @@ bool llama_memory_hybrid_state::apply() {
 
     bool res = true;
 
-    res = res & state_attn     ->apply();
-    res = res & state_recurrent->apply();
+    res = res & state_attn->apply();
+    res = res & state_recr->apply();
 
     return res;
 }
@@ -242,6 +242,6 @@ const llama_kv_cache_unified_state * llama_memory_hybrid_state::get_state_attn()
     return static_cast<const llama_kv_cache_unified_state *>(state_attn.get());
 }
 
-const llama_memory_recurrent_state * llama_memory_hybrid_state::get_state_recurrent() const {
-    return static_cast<const llama_memory_recurrent_state *>(state_recurrent.get());
+const llama_memory_recurrent_state * llama_memory_hybrid_state::get_state_recr() const {
+    return static_cast<const llama_memory_recurrent_state *>(state_recr.get());
 }
diff --git a/src/llama-memory-hybrid.h b/src/llama-memory-hybrid.h
@@ -40,8 +40,8 @@ class llama_memory_hybrid : public llama_memory_i {
                  uint32_t    n_seq_max,
                      bool    offload,
                              /* layer filters */
-          layer_filter_cb && filter_attn      = nullptr,
-          layer_filter_cb && filter_recurrent = nullptr);
+          layer_filter_cb && filter_attn = nullptr,
+          layer_filter_cb && filter_recr = nullptr);
 
     ~llama_memory_hybrid() = default;
 
@@ -80,14 +80,14 @@ class llama_memory_hybrid : public llama_memory_i {
     // llama_memory_hybrid specific API
     //
 
-    llama_kv_cache_unified * get_mem_attn     () const;
-    llama_memory_recurrent * get_mem_recurrent() const;
+    llama_kv_cache_unified * get_mem_attn() const;
+    llama_memory_recurrent * get_mem_recr() const;
 
 private:
     const llama_hparams & hparams;
 
-    const std::unique_ptr<llama_kv_cache_unified>   mem_attn;
-    const std::unique_ptr<llama_memory_recurrent> mem_recurrent;
+    const std::unique_ptr<llama_kv_cache_unified> mem_attn;
+    const std::unique_ptr<llama_memory_recurrent> mem_recr;
 };
 
 class llama_memory_hybrid_state : public llama_memory_state_i {
@@ -125,8 +125,8 @@ class llama_memory_hybrid_state : public llama_memory_state_i {
     // llama_memory_hybrid_state
     //
 
-    const llama_kv_cache_unified_state * get_state_attn     () const;
-    const llama_memory_recurrent_state * get_state_recurrent() const;
+    const llama_kv_cache_unified_state * get_state_attn() const;
+    const llama_memory_recurrent_state * get_state_recr() const;
 
 private:
     llama_sbatch sbatch;
@@ -137,7 +137,7 @@ class llama_memory_hybrid_state : public llama_memory_state_i {
     std::vector<llama_ubatch> ubatches;
 
     const llama_memory_state_ptr state_attn;
-    const llama_memory_state_ptr state_recurrent;
+    const llama_memory_state_ptr state_recr;
 
     const llama_memory_status status;
 };

Original file line number	Diff line number	Diff line change
`@@ -409,15 +409,15 @@ void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) {`
`409`	`409`	`mem_state->get_state_attn()->set_input_kq_mask(self_kq_mask, ubatch, cparams.causal_attn);`
`410`	`410`	`}`
`411`	`411`
`412`		`- const int64_t n_rs = mem_state->get_state_recurrent()->get_n_rs();`
	`412`	`+ const int64_t n_rs = mem_state->get_state_recr()->get_n_rs();`
`413`	`413`
`414`	`414`	`if (s_copy) {`
`415`	`415`	`GGML_ASSERT(ggml_backend_buffer_is_host(s_copy->buffer));`
`416`	`416`	`int32_t * data = (int32_t *) s_copy->data;`
`417`	`417`
`418`	`418`	`// assuming copy destinations ALWAYS happen ONLY on the cells between head and head+n`
`419`	`419`	`for (uint32_t i = 0; i < n_rs; ++i) {`
`420`		`- data[i] = mem_state->get_state_recurrent()->s_copy(i);`
	`420`	`+ data[i] = mem_state->get_state_recr()->s_copy(i);`
`421`	`421`	`}`
`422`	`422`	`}`
`423`	`423`	`}`
`@@ -1067,7 +1067,7 @@ llm_graph_input_mem_hybrid * llm_graph_context::build_inp_mem_hybrid() const {`
`1067`	`1067`	`}`
`1068`	`1068`
`1069`	`1069`	`{`
`1070`		`- const auto n_rs = mem_state->get_state_recurrent()->get_n_rs();`
	`1070`	`+ const auto n_rs = mem_state->get_state_recr()->get_n_rs();`
`1071`	`1071`
`1072`	`1072`	`inp->s_copy = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_rs);`
`1073`	`1073`	`ggml_set_input(inp->s_copy);`
`@@ -1584,7 +1584,7 @@ ggml_tensor * llm_graph_context::build_rs(`
`1584`	`1584`	`int32_t state_size,`
`1585`	`1585`	`int32_t n_seqs,`
`1586`	`1586`	`bool avoid_copies) const {`
`1587`		`- const auto * kv_state = static_cast<const llama_memory_hybrid_state *>(mstate)->get_state_recurrent();`
	`1587`	`+ const auto * kv_state = static_cast<const llama_memory_hybrid_state *>(mstate)->get_state_recr();`
`1588`	`1588`
`1589`	`1589`	`return build_rs(gf, s, inp->s_copy, state_size, n_seqs, kv_state->get_n_rs(), kv_state->get_head(), kv_state->get_size(), kv_state->get_rs_z(), avoid_copies);`
`1590`	`1590`	`}`