Skip to content

Commit 7e60d1c

Browse files
committed
cont : fix context shift
1 parent 41ebbfd commit 7e60d1c

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

tools/server/server.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3577,7 +3577,21 @@ struct server_context {
35773577
llama_memory_seq_rm (llama_get_memory(ctx), slot.id, n_keep , n_keep + n_discard);
35783578
llama_memory_seq_add(llama_get_memory(ctx), slot.id, n_keep + n_discard, slot.prompt.n_tokens(), -n_discard);
35793579

3580-
slot.prompt.tokens.keep_first(slot.prompt.tokens.size() - n_discard);
3580+
// add generated tokens to cache
3581+
// ref: https://github.com/ggml-org/llama.cpp/pull/16818#discussion_r2473269481
3582+
{
3583+
GGML_ASSERT(!slot.prompt.tokens.has_mtmd);
3584+
3585+
llama_tokens new_tokens = slot.prompt.tokens.get_text_tokens(); // copy
3586+
for (size_t i = n_keep + n_discard; i < new_tokens.size(); i++) {
3587+
new_tokens[i - n_discard] = new_tokens[i];
3588+
}
3589+
3590+
new_tokens.resize(slot.prompt.tokens.size() - n_discard);
3591+
3592+
slot.prompt.tokens.clear();
3593+
slot.prompt.tokens.insert(new_tokens);
3594+
}
35813595

35823596
slot.truncated = true;
35833597
}

0 commit comments

Comments
 (0)