kv-cache : fix reserve graph out-of-bounds access

ggerganov · ggerganov · commit 7f14ac19e807 · 2025-05-14T22:00:40.000+03:00
ggml-ci
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
@@ -441,6 +441,7 @@ void llama_kv_cache_unified::defrag_sched(float thold) {
 
 void llama_kv_cache_unified::set_full() {
     n = size;
+    head = 0;
 }
 
 llama_sbatch llama_kv_cache_unified::sbatch_init(
@@ -1712,6 +1713,7 @@ void llama_kv_cache_recurrent::defrag_sched(float thold) {
 
 void llama_kv_cache_recurrent::set_full() {
     n = size;
+    head = 0;
 }
 
 llama_sbatch llama_kv_cache_recurrent::sbatch_init(
diff --git a/src/llama-kv-cache.h b/src/llama-kv-cache.h
@@ -171,11 +171,8 @@ class llama_kv_cache_unified : public llama_kv_cache {
     void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1) const override;
     void state_read (llama_io_read_i  & io, llama_seq_id seq_id = -1) override;
 
-    // Note: The value of head isn't only used to optimize searching
-    // for a free KV slot. llama_decode_impl also uses it, so it
-    // cannot be freely changed after a slot has been allocated.
-    uint32_t head = 0;
-    uint32_t size = 0;
+    uint32_t head = 0; // the location where the batch will be placed in the cache (see find_slot())
+    uint32_t size = 0; // total number of cells, shared across all sequences
     uint32_t used = 0; // used cells (i.e. at least one seq_id)
 
     // computed before each graph build

Original file line number	Diff line number	Diff line change
`@@ -441,6 +441,7 @@ void llama_kv_cache_unified::defrag_sched(float thold) {`
`441`	`441`
`442`	`442`	`void llama_kv_cache_unified::set_full() {`
`443`	`443`	`n = size;`
	`444`	`+ head = 0;`
`444`	`445`	`}`
`445`	`446`
`446`	`447`	`llama_sbatch llama_kv_cache_unified::sbatch_init(`
`@@ -1712,6 +1713,7 @@ void llama_kv_cache_recurrent::defrag_sched(float thold) {`
`1712`	`1713`
`1713`	`1714`	`void llama_kv_cache_recurrent::set_full() {`
`1714`	`1715`	`n = size;`
	`1716`	`+ head = 0;`
`1715`	`1717`	`}`
`1716`	`1718`
`1717`	`1719`	`llama_sbatch llama_kv_cache_recurrent::sbatch_init(`