From bcf14fd4c0fd9f9f143c0df18159e9a324d787d4 Mon Sep 17 00:00:00 2001 From: Vinkal Chudgar Date: Mon, 22 Sep 2025 20:15:36 +0000 Subject: [PATCH 1/3] tools/main: llama-cli: prevent spurious assistant token (#13402) During prompt ingestion, prompt tokens are accepted into the sampler history (for repetition penalties). The conversation-mode path then appended `common_sampler_last(smpl)` to `assistant_ss` before any new token was sampled. At that point, "last" was a prompt-side token (e.g., an input prefix), so the assistant chat message began with an extra piece. Fix: append to `assistant_ss` only for a newly sampled (non-EOG) token. This affects only chat message assembly (`assistant_ss` / `chat_msgs` / `common_chat_format_single`); terminal stdout is unchanged. Sampling order/logits are unchanged. Fixes #13402. Signed-off-by: Vinkal Chudgar --- tools/main/main.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 083fc0cf26c93..60d8919fd02bf 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -706,6 +706,10 @@ int main(int argc, char ** argv) { // LOG_DBG("last: %s\n", string_from(ctx, smpl->prev.to_vector()).c_str()); embd.push_back(id); + + if (params.conversation_mode && !waiting_for_first_input && !llama_vocab_is_eog(vocab, id)) { + assistant_ss << common_token_to_piece(ctx, id, false); + } // echo this to console input_echo = true; @@ -826,9 +830,6 @@ int main(int argc, char ** argv) { // if current token is not EOG, we add it to current assistant message if (params.conversation_mode && !waiting_for_first_input) { - const auto id = common_sampler_last(smpl); - assistant_ss << common_token_to_piece(ctx, id, false); - if (!prompt.empty()) { prompt.clear(); is_interacting = false; From 2f16ea660d11d62a5e44e1c40abbddefe3434096 Mon Sep 17 00:00:00 2001 From: Vinkal Date: Wed, 24 Sep 2025 11:27:57 +0530 Subject: [PATCH 2/3] Update tools/main/main.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sigbjørn Skjæret --- tools/main/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 60d8919fd02bf..2cecaf471bcd2 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -706,7 +706,7 @@ int main(int argc, char ** argv) { // LOG_DBG("last: %s\n", string_from(ctx, smpl->prev.to_vector()).c_str()); embd.push_back(id); - + if (params.conversation_mode && !waiting_for_first_input && !llama_vocab_is_eog(vocab, id)) { assistant_ss << common_token_to_piece(ctx, id, false); } From 61b76c3a1a7443d0bf7c186e79002788e3cf10fd Mon Sep 17 00:00:00 2001 From: Vinkal Chudgar Date: Sun, 28 Sep 2025 10:43:30 +0000 Subject: [PATCH 3/3] tools/main: remove outdated comment Signed-off-by: Vinkal Chudgar --- tools/main/main.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 2cecaf471bcd2..498e00e3a5e58 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -828,7 +828,6 @@ int main(int argc, char ** argv) { } } - // if current token is not EOG, we add it to current assistant message if (params.conversation_mode && !waiting_for_first_input) { if (!prompt.empty()) { prompt.clear();