fix: Fix indexing into k_l for recurrent cache with filter

gabe-l-hart · gabe-l-hart · commit f8bcfe0dbd7a · 2025-05-23T12:16:03.000-06:00
Branch: HybridCache

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
@@ -1906,8 +1906,8 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
         ggml_tensor * v = ggml_new_tensor_1d(ctx, type_v, n_embd_v_gqa*kv_size);
         ggml_format_name(k, "cache_k_l%d", i);
         ggml_format_name(v, "cache_v_l%d", i);
-        k_l.push_back(k);
-        v_l.push_back(v);
+        k_l[i] = k;
+        v_l[i] = v;
     }
 
     // allocate tensors and initialize the buffers to avoid NaNs in the padding