@@ -223,12 +223,7 @@ void llama_kv_cache_unified::clear(bool data) {
223223}
224224
225225bool  llama_kv_cache_unified::seq_rm (llama_seq_id seq_id, llama_pos p0, llama_pos p1) {
226-     GGML_ASSERT (seq_id >= 0  && (size_t ) seq_id < seq_to_stream.size ());
227- 
228-     auto  & cells = v_cells[seq_to_stream[seq_id]];
229-     auto  & head  = v_heads[seq_to_stream[seq_id]];
230- 
231-     uint32_t  new_head = cells.size ();
226+     GGML_ASSERT (seq_id == -1  || (seq_id >= 0  && (size_t ) seq_id < seq_to_stream.size ()));
232227
233228    if  (p0 < 0 ) {
234229        p0 = 0 ;
@@ -239,6 +234,11 @@ bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos
239234    }
240235
241236    if  (seq_id >= 0 ) {
237+         auto  & cells = v_cells[seq_to_stream[seq_id]];
238+         auto  & head  = v_heads[seq_to_stream[seq_id]];
239+ 
240+         uint32_t  new_head = cells.size ();
241+ 
242242        for  (uint32_t  i = 0 ; i < cells.size (); ++i) {
243243            if  (!cells.pos_in (i, p0, p1)) {
244244                continue ;
@@ -250,24 +250,36 @@ bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos
250250                }
251251            }
252252        }
253+ 
254+         //  If we freed up a slot, set head to it so searching can start there.
255+         if  (new_head != cells.size () && new_head < head) {
256+             head = new_head;
257+         }
253258    } else  {
254259        //  match any sequence
255-         for  (uint32_t  i = 0 ; i < cells.size (); ++i) {
256-             if  (!cells.pos_in (i, p0, p1)) {
257-                 continue ;
258-             }
260+         for  (uint32_t  s = 0 ; s < n_stream; ++s) {
261+             auto  & cells = v_cells[s];
262+             auto  & head  = v_heads[s];
259263
260-             cells.rm (i );
264+             uint32_t  new_head =  cells.size ( );
261265
262-             if  (new_head == cells.size ()) {
263-                 new_head = i;
266+             for  (uint32_t  i = 0 ; i < cells.size (); ++i) {
267+                 if  (!cells.pos_in (i, p0, p1)) {
268+                     continue ;
269+                 }
270+ 
271+                 cells.rm (i);
272+ 
273+                 if  (new_head == cells.size ()) {
274+                     new_head = i;
275+                 }
264276            }
265-         }
266-     }
267277
268-     //  If we freed up a slot, set head to it so searching can start there.
269-     if  (new_head != cells.size () && new_head < head) {
270-         head = new_head;
278+             //  If we freed up a slot, set head to it so searching can start there.
279+             if  (new_head != cells.size () && new_head < head) {
280+                 head = new_head;
281+             }
282+         }
271283    }
272284
273285    return  true ;
0 commit comments