3131#pragma  warning(disable: 4244 4267) //  possible loss of data
3232#endif 
3333
34- static  const  char  * DEFAULT_SYSTEM_MESSAGE = " You are a helpful assistant"  ;
35- 
3634static  llama_context           ** g_ctx;
3735static  llama_model             ** g_model;
3836static  common_sampler          ** g_smpl;
@@ -267,6 +265,7 @@ int main(int argc, char ** argv) {
267265
268266    std::vector<llama_token> embd_inp;
269267
268+     bool  waiting_for_first_input = params.conversation_mode  && params.enable_chat_template  && params.system_prompt .empty ();
270269    auto  chat_add_and_format = [&chat_msgs, &chat_templates](const  std::string & role, const  std::string & content) {
271270        common_chat_msg new_msg;
272271        new_msg.role  = role;
@@ -278,11 +277,20 @@ int main(int argc, char ** argv) {
278277    };
279278
280279    {
281-         auto  prompt = (params.conversation_mode  && params.enable_chat_template )
282-             //  format the system prompt in conversation mode (fallback to default if empty)
283-             ? chat_add_and_format (" system"  , params.system_prompt .empty () ? DEFAULT_SYSTEM_MESSAGE : params.system_prompt )
280+         std::string prompt;
281+ 
282+         if  (params.conversation_mode  && params.enable_chat_template ) {
283+             //  format the system prompt in conversation mode (will use template default if empty)
284+             prompt = params.system_prompt ;
285+ 
286+             if  (!prompt.empty ()) {
287+                 prompt = chat_add_and_format (" system"  , prompt);
288+             }
289+         } else  {
284290            //  otherwise use the prompt as is
285-             : params.prompt ;
291+             prompt = params.prompt ;
292+         }
293+ 
286294        if  (params.interactive_first  || !params.prompt .empty () || session_tokens.empty ()) {
287295            LOG_DBG (" tokenize the prompt\n "  );
288296            embd_inp = common_tokenize (ctx, prompt, true , true );
@@ -296,7 +304,7 @@ int main(int argc, char ** argv) {
296304    }
297305
298306    //  Should not run without any tokens
299-     if  (embd_inp.empty ()) {
307+     if  (!params. conversation_mode  &&  embd_inp.empty ()) {
300308        if  (add_bos) {
301309            embd_inp.push_back (llama_vocab_bos (vocab));
302310            LOG_WRN (" embd_inp was considered empty and bos was added: %s\n "  , string_from (ctx, embd_inp).c_str ());
@@ -777,7 +785,7 @@ int main(int argc, char ** argv) {
777785            }
778786
779787            //  deal with end of generation tokens in interactive mode
780-             if  (llama_vocab_is_eog (vocab, common_sampler_last (smpl))) {
788+             if  (!waiting_for_first_input &&  llama_vocab_is_eog (vocab, common_sampler_last (smpl))) {
781789                LOG_DBG (" found an EOG token\n "  );
782790
783791                if  (params.interactive ) {
@@ -797,12 +805,12 @@ int main(int argc, char ** argv) {
797805            }
798806
799807            //  if current token is not EOG, we add it to current assistant message
800-             if  (params.conversation_mode ) {
808+             if  (params.conversation_mode  && !waiting_for_first_input ) {
801809                const  auto  id = common_sampler_last (smpl);
802810                assistant_ss << common_token_to_piece (ctx, id, false );
803811            }
804812
805-             if  (n_past > 0  && is_interacting) {
813+             if  (( n_past > 0  || waiting_for_first_input)  && is_interacting) {
806814                LOG_DBG (" waiting for user input\n "  );
807815
808816                if  (params.conversation_mode ) {
@@ -892,11 +900,12 @@ int main(int argc, char ** argv) {
892900                input_echo = false ; //  do not echo this again
893901            }
894902
895-             if  (n_past > 0 ) {
903+             if  (n_past > 0  || waiting_for_first_input ) {
896904                if  (is_interacting) {
897905                    common_sampler_reset (smpl);
898906                }
899907                is_interacting = false ;
908+                 waiting_for_first_input = false ;
900909            }
901910        }
902911
0 commit comments