ggml-org · leok7v · Aug 9, 2025 · Copilot · Aug 9, 2025 · Copilot
@@ -429,7 +429,7 @@ int main(int argc, char ** argv) {
 
         // KV cache management
         // if no verification token matched, we simply remove all cells from this batch -> no fragmentation
-        llama_memory_seq_rm(mem, -1, n_past, -1);
+        llama_memory_seq_rm(mem, 0, n_past, -1);
-        llama_memory_seq_rm(mem, 0, n_past, -1);
+        llama_memory_seq_rm(mem, -1, n_past, -1);
-        llama_memory_seq_rm(mem, 0, n_past, -1);
+        llama_memory_seq_rm(mem, -1, n_past, -1);
 
         if (seq_id_best != 0) {
             // if a verification token matched, we keep the best sequence and remove the rest

@@ -354,7 +354,7 @@ int main(int argc, char ** argv) {
         }
 
         // remove any "future" tokens that we might have inherited from the previous session
-        llama_memory_seq_rm(mem, -1, n_matching_session_tokens, -1);
+        llama_memory_seq_rm(mem, 0, n_matching_session_tokens, -1);
-        llama_memory_seq_rm(mem, 0, n_matching_session_tokens, -1);
+        llama_memory_seq_rm(mem, -1, n_matching_session_tokens, -1);
-        llama_memory_seq_rm(mem, 0, n_matching_session_tokens, -1);
+        llama_memory_seq_rm(mem, -1, n_matching_session_tokens, -1);
     }
 
     LOG_DBG("recalculate the cached logits (check): embd_inp.size() %zu, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu\n",