@@ -40,7 +40,6 @@ static std::vector<llama_token> * g_input_tokens;
4040static std::ostringstream * g_output_ss;
4141static std::vector<llama_token> * g_output_tokens;
4242static bool is_interacting = false ;
43- static bool need_insert_eot = false ;
4443
4544static void print_usage (int argc, char ** argv) {
4645 (void ) argc;
@@ -70,7 +69,6 @@ static void sigint_handler(int signo) {
7069 g_chat->interrupt ();
7170 } else if (!is_interacting && g_params->interactive ) {
7271 is_interacting = true ;
73- need_insert_eot = true ;
7472 } else {
7573 console::cleanup ();
7674 LOG (" \n " );
@@ -763,26 +761,16 @@ int main(int argc, char ** argv) {
763761 }
764762 }
765763
766- // if current token is not EOG, we add it to current assistant message
767- if (params.conversation ) {
768- const auto id = common_sampler_last (smpl);
769- assistant_ss << common_token_to_piece (ctx, id, false );
770- }
771-
772764 if (n_past > 0 && is_interacting) {
773765 LOG_DBG (" waiting for user input\n " );
774766
775- if (params.conversation ) {
776- LOG (" \n > " );
777- }
778-
779767 if (params.input_prefix_bos ) {
780768 LOG_DBG (" adding input prefix BOS token\n " );
781769 embd_inp.push_back (llama_vocab_bos (vocab));
782770 }
783771
784772 std::string buffer;
785- if (!params.input_prefix .empty () && !params. conversation ) {
773+ if (!params.input_prefix .empty ()) {
786774 LOG_DBG (" appending input prefix: '%s'\n " , params.input_prefix .c_str ());
787775 LOG (" %s" , params.input_prefix .c_str ());
788776 }
@@ -806,7 +794,7 @@ int main(int argc, char ** argv) {
806794 // Entering a empty line lets the user pass control back
807795 if (buffer.length () > 1 ) {
808796 // append input suffix if any
809- if (!params.input_suffix .empty () && !params. conversation ) {
797+ if (!params.input_suffix .empty ()) {
810798 LOG_DBG (" appending input suffix: '%s'\n " , params.input_suffix .c_str ());
811799 LOG (" %s" , params.input_suffix .c_str ());
812800 }
@@ -819,22 +807,14 @@ int main(int argc, char ** argv) {
819807 string_process_escapes (buffer);
820808 }
821809
822- bool format_chat = params.conversation && params.enable_chat_template ;
823810 std::string user_inp = std::move (buffer);
824811 // TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix)
825812 const auto line_pfx = common_tokenize (ctx, params.input_prefix , false , true );
826- const auto line_inp = common_tokenize (ctx, user_inp, false , format_chat );
813+ const auto line_inp = common_tokenize (ctx, user_inp, false , true );
827814 const auto line_sfx = common_tokenize (ctx, params.input_suffix , false , true );
828815
829816 LOG_DBG (" input tokens: %s\n " , string_from (ctx, line_inp).c_str ());
830817
831- // if user stop generation mid-way, we must add EOT to finish model's last response
832- if (need_insert_eot && format_chat) {
833- llama_token eot = llama_vocab_eot (vocab);
834- embd_inp.push_back (eot == LLAMA_TOKEN_NULL ? llama_vocab_eos (vocab) : eot);
835- need_insert_eot = false ;
836- }
837-
838818 embd_inp.insert (embd_inp.end (), line_pfx.begin (), line_pfx.end ());
839819 embd_inp.insert (embd_inp.end (), line_inp.begin (), line_inp.end ());
840820 embd_inp.insert (embd_inp.end (), line_sfx.begin (), line_sfx.end ());
0 commit comments