3131#pragma  warning(disable: 4244 4267) //  possible loss of data
3232#endif 
3333
34- static  const  char  * DEFAULT_SYSTEM_MESSAGE = " You are a helpful assistant"  ;
35- 
3634static  llama_context           ** g_ctx;
3735static  llama_model             ** g_model;
3836static  common_sampler          ** g_smpl;
@@ -263,6 +261,7 @@ int main(int argc, char ** argv) {
263261
264262    std::vector<llama_token> embd_inp;
265263
264+     bool  waiting_for_first_input = params.conversation_mode  && params.enable_chat_template  && params.prompt .empty ();
266265    auto  chat_add_and_format = [&chat_msgs, &chat_templates](const  std::string & role, const  std::string & content) {
267266        common_chat_msg new_msg;
268267        new_msg.role  = role;
@@ -274,9 +273,9 @@ int main(int argc, char ** argv) {
274273    };
275274
276275    {
277-         auto  prompt = (params.conversation_mode  && params.enable_chat_template )
278-             //  format the system  prompt in conversation mode (fallback to default if empty) 
279-             ? chat_add_and_format (" system " ,  params.prompt . empty ()  ? DEFAULT_SYSTEM_MESSAGE :  params.prompt )
276+         auto  prompt = (params.enable_chat_template  && ! params.prompt . empty () )
277+             //  format the user  prompt or system prompt if in conversation mode 
278+             ? chat_add_and_format (params.conversation_mode  ? " system "  :  " user " ,  params.prompt )
280279            //  otherwise use the prompt as is
281280            : params.prompt ;
282281        if  (params.interactive_first  || !params.prompt .empty () || session_tokens.empty ()) {
@@ -292,7 +291,7 @@ int main(int argc, char ** argv) {
292291    }
293292
294293    //  Should not run without any tokens
295-     if  (embd_inp.empty ()) {
294+     if  (!params. conversation_mode  &&  embd_inp.empty ()) {
296295        if  (add_bos) {
297296            embd_inp.push_back (llama_vocab_bos (vocab));
298297            LOG_WRN (" embd_inp was considered empty and bos was added: %s\n "  , string_from (ctx, embd_inp).c_str ());
@@ -773,7 +772,7 @@ int main(int argc, char ** argv) {
773772            }
774773
775774            //  deal with end of generation tokens in interactive mode
776-             if  (llama_vocab_is_eog (vocab, common_sampler_last (smpl))) {
775+             if  (!waiting_for_first_input &&  llama_vocab_is_eog (vocab, common_sampler_last (smpl))) {
777776                LOG_DBG (" found an EOG token\n "  );
778777
779778                if  (params.interactive ) {
@@ -793,12 +792,12 @@ int main(int argc, char ** argv) {
793792            }
794793
795794            //  if current token is not EOG, we add it to current assistant message
796-             if  (params.conversation_mode ) {
795+             if  (params.conversation_mode  && !waiting_for_first_input ) {
797796                const  auto  id = common_sampler_last (smpl);
798797                assistant_ss << common_token_to_piece (ctx, id, false );
799798            }
800799
801-             if  (n_past > 0  && is_interacting) {
800+             if  ((waiting_for_first_input ||  n_past > 0 )  && is_interacting) {
802801                LOG_DBG (" waiting for user input\n "  );
803802
804803                if  (params.conversation_mode ) {
@@ -888,11 +887,12 @@ int main(int argc, char ** argv) {
888887                input_echo = false ; //  do not echo this again
889888            }
890889
891-             if  (n_past > 0 ) {
890+             if  (waiting_for_first_input ||  n_past > 0 ) {
892891                if  (is_interacting) {
893892                    common_sampler_reset (smpl);
894893                }
895894                is_interacting = false ;
895+                 waiting_for_first_input = false ;
896896            }
897897        }
898898
0 commit comments