3131#pragma warning(disable: 4244 4267) // possible loss of data
3232#endif
3333
34- static const char * DEFAULT_SYSTEM_MESSAGE = " You are a helpful assistant" ;
35-
3634static llama_context ** g_ctx;
3735static llama_model ** g_model;
3836static common_sampler ** g_smpl;
@@ -267,6 +265,7 @@ int main(int argc, char ** argv) {
267265
268266 std::vector<llama_token> embd_inp;
269267
268+ bool waiting_for_first_input = params.conversation_mode && params.enable_chat_template && params.system_prompt .empty ();
270269 auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
271270 common_chat_msg new_msg;
272271 new_msg.role = role;
@@ -278,11 +277,20 @@ int main(int argc, char ** argv) {
278277 };
279278
280279 {
281- auto prompt = (params.conversation_mode && params.enable_chat_template )
282- // format the system prompt in conversation mode (fallback to default if empty)
283- ? chat_add_and_format (" system" , params.system_prompt .empty () ? DEFAULT_SYSTEM_MESSAGE : params.system_prompt )
280+ std::string prompt;
281+
282+ if (params.conversation_mode && params.enable_chat_template ) {
283+ // format the system prompt in conversation mode (will use template default if empty)
284+ prompt = params.system_prompt ;
285+
286+ if (!prompt.empty ()) {
287+ prompt = chat_add_and_format (" system" , prompt);
288+ }
289+ } else {
284290 // otherwise use the prompt as is
285- : params.prompt ;
291+ prompt = params.prompt ;
292+ }
293+
286294 if (params.interactive_first || !params.prompt .empty () || session_tokens.empty ()) {
287295 LOG_DBG (" tokenize the prompt\n " );
288296 embd_inp = common_tokenize (ctx, prompt, true , true );
@@ -296,7 +304,7 @@ int main(int argc, char ** argv) {
296304 }
297305
298306 // Should not run without any tokens
299- if (embd_inp.empty ()) {
307+ if (!params. conversation_mode && embd_inp.empty ()) {
300308 if (add_bos) {
301309 embd_inp.push_back (llama_vocab_bos (vocab));
302310 LOG_WRN (" embd_inp was considered empty and bos was added: %s\n " , string_from (ctx, embd_inp).c_str ());
@@ -777,7 +785,7 @@ int main(int argc, char ** argv) {
777785 }
778786
779787 // deal with end of generation tokens in interactive mode
780- if (llama_vocab_is_eog (vocab, common_sampler_last (smpl))) {
788+ if (!waiting_for_first_input && llama_vocab_is_eog (vocab, common_sampler_last (smpl))) {
781789 LOG_DBG (" found an EOG token\n " );
782790
783791 if (params.interactive ) {
@@ -797,12 +805,12 @@ int main(int argc, char ** argv) {
797805 }
798806
799807 // if current token is not EOG, we add it to current assistant message
800- if (params.conversation_mode ) {
808+ if (params.conversation_mode && !waiting_for_first_input ) {
801809 const auto id = common_sampler_last (smpl);
802810 assistant_ss << common_token_to_piece (ctx, id, false );
803811 }
804812
805- if (n_past > 0 && is_interacting) {
813+ if (( n_past > 0 || waiting_for_first_input) && is_interacting) {
806814 LOG_DBG (" waiting for user input\n " );
807815
808816 if (params.conversation_mode ) {
@@ -892,11 +900,12 @@ int main(int argc, char ** argv) {
892900 input_echo = false ; // do not echo this again
893901 }
894902
895- if (n_past > 0 ) {
903+ if (n_past > 0 || waiting_for_first_input ) {
896904 if (is_interacting) {
897905 common_sampler_reset (smpl);
898906 }
899907 is_interacting = false ;
908+ waiting_for_first_input = false ;
900909 }
901910 }
902911
0 commit comments