File tree Expand file tree Collapse file tree 2 files changed +4
-5
lines changed Expand file tree Collapse file tree 2 files changed +4
-5
lines changed Original file line number Diff line number Diff line change @@ -441,6 +441,7 @@ void llama_kv_cache_unified::defrag_sched(float thold) {
441441
442442void llama_kv_cache_unified::set_full () {
443443 n = size;
444+ head = 0 ;
444445}
445446
446447llama_sbatch llama_kv_cache_unified::sbatch_init (
@@ -1712,6 +1713,7 @@ void llama_kv_cache_recurrent::defrag_sched(float thold) {
17121713
17131714void llama_kv_cache_recurrent::set_full () {
17141715 n = size;
1716+ head = 0 ;
17151717}
17161718
17171719llama_sbatch llama_kv_cache_recurrent::sbatch_init (
Original file line number Diff line number Diff line change @@ -171,11 +171,8 @@ class llama_kv_cache_unified : public llama_kv_cache {
171171 void state_write (llama_io_write_i & io, llama_seq_id seq_id = -1 ) const override ;
172172 void state_read (llama_io_read_i & io, llama_seq_id seq_id = -1 ) override ;
173173
174- // Note: The value of head isn't only used to optimize searching
175- // for a free KV slot. llama_decode_impl also uses it, so it
176- // cannot be freely changed after a slot has been allocated.
177- uint32_t head = 0 ;
178- uint32_t size = 0 ;
174+ uint32_t head = 0 ; // the location where the batch will be placed in the cache (see find_slot())
175+ uint32_t size = 0 ; // total number of cells, shared across all sequences
179176 uint32_t used = 0 ; // used cells (i.e. at least one seq_id)
180177
181178 // computed before each graph build
You can’t perform that action at this time.
0 commit comments