@@ -365,6 +365,8 @@ int main(int argc, char ** argv) {
365365 chat_formatter chat_add_and_format (chat_msgs, chat_templates, params);
366366
367367 std::string prompt;
368+ std::string system_remaining;
369+ std::string prompt_remaining;
368370 {
369371 if (params.conversation_mode && params.enable_chat_template ) {
370372 if (!params.system_prompt .empty ()) {
@@ -400,6 +402,19 @@ int main(int argc, char ** argv) {
400402 LOG_DBG (" tokens: %s\n " , string_from (ctx, embd_inp).c_str ());
401403 }
402404
405+ // Set up content tracking to skip template markup during display
406+ bool skip_template_markup = false ;
407+ if (params.conversation_mode && params.enable_chat_template ) {
408+ for (const auto & msg : chat_msgs) {
409+ if (msg.role == " system" ) {
410+ system_remaining = msg.content ;
411+ } else if (msg.role == " user" ) {
412+ prompt_remaining = msg.content ;
413+ }
414+ }
415+ skip_template_markup = !system_remaining.empty () || !prompt_remaining.empty ();
416+ }
417+
403418 // Should not run without any tokens
404419 if (!waiting_for_first_input && embd_inp.empty ()) {
405420 if (add_bos) {
@@ -833,7 +848,29 @@ int main(int argc, char ** argv) {
833848 const std::string token_str = common_token_to_piece (ctx, id, params.special );
834849
835850 if (!chat_add_and_format.get_partial_formatter () || assistant_ss.str ().empty ()) {
836- LOG (" %s" , token_str.c_str ());
851+ if (skip_template_markup) {
852+ if (!token_str.empty () && !system_remaining.empty () &&
853+ system_remaining.compare (0 , token_str.length (), token_str) == 0 ) {
854+
855+ system_remaining.erase (0 , token_str.length ());
856+ LOG (" %s" , token_str.c_str ());
857+ if (system_remaining.empty ()) {
858+ LOG (" \n " );
859+ }
860+
861+ } else if (!token_str.empty () && !prompt_remaining.empty () &&
862+ prompt_remaining.compare (0 , token_str.length (), token_str) == 0 ) {
863+
864+ prompt_remaining.erase (0 , token_str.length ());
865+ LOG (" %s" , token_str.c_str ());
866+ if (prompt_remaining.empty ()) {
867+ LOG (" \n " );
868+ }
869+ }
870+
871+ } else {
872+ LOG (" %s" , token_str.c_str ());
873+ }
837874 }
838875
839876 // Record Displayed Tokens To Log
@@ -853,6 +890,7 @@ int main(int argc, char ** argv) {
853890 if (input_echo && (int ) embd_inp.size () == n_consumed) {
854891 console::set_display (console::reset);
855892 display = true ;
893+ skip_template_markup = false ; // system & prompt processing complete
856894 }
857895
858896 // if not currently processing queued inputs;
0 commit comments