diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 083fc0cf26c93..498e00e3a5e58 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -707,6 +707,10 @@ int main(int argc, char ** argv) { embd.push_back(id); + if (params.conversation_mode && !waiting_for_first_input && !llama_vocab_is_eog(vocab, id)) { + assistant_ss << common_token_to_piece(ctx, id, false); + } + // echo this to console input_echo = true; @@ -824,11 +828,7 @@ int main(int argc, char ** argv) { } } - // if current token is not EOG, we add it to current assistant message if (params.conversation_mode && !waiting_for_first_input) { - const auto id = common_sampler_last(smpl); - assistant_ss << common_token_to_piece(ctx, id, false); - if (!prompt.empty()) { prompt.clear(); is_interacting = false;