fix: llama_memory_seq_rm(mem, -1, ...)

leok7v · leok7v · commit 843f453d2c64 · 2025-08-09T14:33:01.000-07:00
because

GGML_ASSERT(seq_id &gt;= 0 &amp;&amp; (size_t) seq_id &lt; seq_to_stream.size());

in llama_kv_cache_unified::seq_rm
diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp
@@ -429,7 +429,7 @@ int main(int argc, char ** argv) {
 
         // KV cache management
         // if no verification token matched, we simply remove all cells from this batch -> no fragmentation
-        llama_memory_seq_rm(mem, -1, n_past, -1);
+        llama_memory_seq_rm(mem, 0, n_past, -1);
 
         if (seq_id_best != 0) {
             // if a verification token matched, we keep the best sequence and remove the rest
diff --git a/tools/main/main.cpp b/tools/main/main.cpp
@@ -354,7 +354,7 @@ int main(int argc, char ** argv) {
         }
 
         // remove any "future" tokens that we might have inherited from the previous session
-        llama_memory_seq_rm(mem, -1, n_matching_session_tokens, -1);
+        llama_memory_seq_rm(mem, 0, n_matching_session_tokens, -1);
     }
 
     LOG_DBG("recalculate the cached logits (check): embd_inp.size() %zu, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu\n",

Original file line number	Diff line number	Diff line change
`@@ -354,7 +354,7 @@ int main(int argc, char ** argv) {`
`354`	`354`	`}`
`355`	`355`
`356`	`356`	`// remove any "future" tokens that we might have inherited from the previous session`
`357`		`- llama_memory_seq_rm(mem, -1, n_matching_session_tokens, -1);`
	`357`	`+ llama_memory_seq_rm(mem, 0, n_matching_session_tokens, -1);`
`358`	`358`	`}`
`359`	`359`
`360`	`360`	`LOG_DBG("recalculate the cached logits (check): embd_inp.size() %zu, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu\n",`