fix(main): Check the output of seq_rm for prefix matching

gabe-l-hart · gabe-l-hart · commit 24aff7fa7989 · 2025-11-04T13:44:28.000-07:00
This prefix matching is explicitly attempting to remove the tokens at the end of the sequence that don't match. This is the operation that can't be performed on a recurrent cache due to the state being updated in place, so if this removal fails, we need to clear the whole cache. #16768 Branch: HybridContextShift-16768 Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
diff --git a/tools/main/main.cpp b/tools/main/main.cpp
@@ -354,7 +354,10 @@ int main(int argc, char ** argv) {
         }
 
         // remove any "future" tokens that we might have inherited from the previous session
-        llama_memory_seq_rm(mem, -1, n_matching_session_tokens, -1);
+        if (!llama_memory_seq_rm(mem, -1, n_matching_session_tokens, -1)) {
+            LOG_INF("%s: unable to resuse common prefix\n", __func__);
+            llama_memory_seq_rm(mem, -1, -1, -1);
+        }
     }
 
     LOG_DBG("recalculate the cached logits (check): embd_inp.size() %zu, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu\n",

Original file line number	Diff line number	Diff line change
`@@ -354,7 +354,10 @@ int main(int argc, char ** argv) {`
`354`	`354`	`}`
`355`	`355`
`356`	`356`	`// remove any "future" tokens that we might have inherited from the previous session`
`357`		`- llama_memory_seq_rm(mem, -1, n_matching_session_tokens, -1);`
	`357`	`+ if (!llama_memory_seq_rm(mem, -1, n_matching_session_tokens, -1)) {`
	`358`	`+ LOG_INF("%s: unable to resuse common prefix\n", __func__);`
	`359`	`+ llama_memory_seq_rm(mem, -1, -1, -1);`
	`360`	`+ }`
`358`	`361`	`}`
`359`	`362`
`360`	`363`	`LOG_DBG("recalculate the cached logits (check): embd_inp.size() %zu, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu\n",`