@@ -45,8 +45,8 @@ static void print_usage(int argc, char ** argv) {
4545 (void ) argc;
4646
4747 LOG (" \n example usage:\n " );
48- LOG (" \n text generation: %s -m your_model.gguf -p \" I believe the meaning of life is\" -n 128\n " , argv[0 ]);
49- LOG (" \n chat (conversation): %s -m your_model.gguf -p \" You are a helpful assistant\" -cnv \n " , argv[0 ]);
48+ LOG (" \n text generation: %s -m your_model.gguf -p \" I believe the meaning of life is\" -n 128 -no-cnv \n " , argv[0 ]);
49+ LOG (" \n chat (conversation): %s -m your_model.gguf -sys \" You are a helpful assistant\"\n " , argv[0 ]);
5050 LOG (" \n " );
5151}
5252
@@ -217,8 +217,8 @@ int main(int argc, char ** argv) {
217217 // print chat template example in conversation mode
218218 if (params.conversation_mode ) {
219219 if (params.enable_chat_template ) {
220- if (!params.prompt .empty ()) {
221- LOG_WRN (" *** User-specified prompt in conversation mode will be ignored , did you mean to set --system-prompt (-sys) instead?\n " );
220+ if (!params.prompt .empty () && params. system_prompt . empty () ) {
221+ LOG_WRN (" *** User-specified prompt will pre-start conversation , did you mean to set --system-prompt (-sys) instead?\n " );
222222 }
223223
224224 LOG_INF (" %s: chat template example:\n %s\n " , __func__, common_chat_format_example (chat_templates.get (), params.use_jinja ).c_str ());
@@ -265,7 +265,7 @@ int main(int argc, char ** argv) {
265265
266266 std::vector<llama_token> embd_inp;
267267
268- bool waiting_for_first_input = params. conversation_mode && params. enable_chat_template && params. system_prompt . empty () ;
268+ bool waiting_for_first_input = false ;
269269 auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
270270 common_chat_msg new_msg;
271271 new_msg.role = role;
@@ -276,22 +276,34 @@ int main(int argc, char ** argv) {
276276 return formatted;
277277 };
278278
279+ std::string prompt;
279280 {
280- std::string prompt;
281-
282281 if (params.conversation_mode && params.enable_chat_template ) {
283- // format the system prompt in conversation mode (will use template default if empty)
284- prompt = params.system_prompt ;
282+ if (!params.system_prompt .empty ()) {
283+ // format the system prompt (will use template default if empty)
284+ chat_add_and_format (" system" , params.system_prompt );
285+ }
286+
287+ if (!params.prompt .empty ()) {
288+ // format and append the user prompt
289+ chat_add_and_format (" user" , params.prompt );
290+ } else {
291+ waiting_for_first_input = true ;
292+ }
285293
286- if (!prompt.empty ()) {
287- prompt = chat_add_and_format (" system" , prompt);
294+ if (!params.system_prompt .empty () || !params.prompt .empty ()) {
295+ common_chat_templates_inputs inputs;
296+ inputs.messages = chat_msgs;
297+ inputs.add_generation_prompt = !params.prompt .empty ();
298+
299+ prompt = common_chat_templates_apply (chat_templates.get (), inputs).prompt ;
288300 }
289301 } else {
290302 // otherwise use the prompt as is
291303 prompt = params.prompt ;
292304 }
293305
294- if (params.interactive_first || !params. prompt .empty () || session_tokens.empty ()) {
306+ if (params.interactive_first || !prompt.empty () || session_tokens.empty ()) {
295307 LOG_DBG (" tokenize the prompt\n " );
296308 embd_inp = common_tokenize (ctx, prompt, true , true );
297309 } else {
@@ -304,7 +316,7 @@ int main(int argc, char ** argv) {
304316 }
305317
306318 // Should not run without any tokens
307- if (!params. conversation_mode && embd_inp.empty ()) {
319+ if (!waiting_for_first_input && embd_inp.empty ()) {
308320 if (add_bos) {
309321 embd_inp.push_back (llama_vocab_bos (vocab));
310322 LOG_WRN (" embd_inp was considered empty and bos was added: %s\n " , string_from (ctx, embd_inp).c_str ());
@@ -364,7 +376,12 @@ int main(int argc, char ** argv) {
364376 }
365377
366378 if (params.conversation_mode ) {
367- params.interactive_first = true ;
379+ if (params.single_turn && !params.prompt .empty ()) {
380+ params.interactive = false ;
381+ params.interactive_first = false ;
382+ } else {
383+ params.interactive_first = true ;
384+ }
368385 }
369386
370387 // enable interactive mode if interactive start is specified
@@ -808,6 +825,11 @@ int main(int argc, char ** argv) {
808825 if (params.conversation_mode && !waiting_for_first_input) {
809826 const auto id = common_sampler_last (smpl);
810827 assistant_ss << common_token_to_piece (ctx, id, false );
828+
829+ if (!prompt.empty ()) {
830+ prompt.clear ();
831+ is_interacting = false ;
832+ }
811833 }
812834
813835 if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
@@ -905,6 +927,11 @@ int main(int argc, char ** argv) {
905927 common_sampler_reset (smpl);
906928 }
907929 is_interacting = false ;
930+
931+ if (waiting_for_first_input && params.single_turn ) {
932+ params.interactive = false ;
933+ params.interactive_first = false ;
934+ }
908935 waiting_for_first_input = false ;
909936 }
910937 }
0 commit comments