From d230722d8ed16d6a04360db026db8cbd7deceec0 Mon Sep 17 00:00:00 2001 From: Mason M Date: Wed, 15 Oct 2025 22:16:15 -0300 Subject: [PATCH 1/4] Add partial formatter --- tools/main/main.cpp | 118 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 11 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 498e00e3a5e58..b31527cbc9aee 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -83,6 +83,102 @@ static void sigint_handler(int signo) { } #endif +class partial_formatter { +public: + enum output_type { + CONTENT, + REASONING, + }; + + struct output { + std::string formatted; + output_type type; + }; + + partial_formatter(const common_chat_syntax & syntax) : syntax_(syntax), had_reasoning_(false) {} + + std::vector operator()(const std::string & accumulated) { + common_chat_msg next = common_chat_parse(accumulated, true, syntax_); + + auto diffs = common_chat_msg_diff::compute_diffs(previous_, next); + std::vector result; + for (const auto & diff : diffs) { + if (!diff.reasoning_content_delta.empty()) { + result.push_back({diff.reasoning_content_delta, REASONING}); + had_reasoning_ = true; + } + if (!diff.content_delta.empty()) { + if (had_reasoning_) { + result.push_back({"\n", REASONING}); + had_reasoning_ = false; + } + result.push_back({diff.content_delta, CONTENT}); + } + } + previous_ = next; + return result; + } + +private: + common_chat_syntax syntax_; + common_chat_msg previous_; + bool had_reasoning_; +}; + +class chat_formatter { +public: + chat_formatter( + std::vector & chat_msgs, + const common_chat_templates_ptr & chat_templates, + const common_params & params) + : chat_msgs_(chat_msgs), + chat_templates_(chat_templates), + params_(params) {} + + std::string operator()(const std::string & role, const std::string & content) { + common_chat_msg new_msg; + new_msg.role = role; + new_msg.content = content; + chat_msgs_.push_back(new_msg); + + common_chat_templates_inputs cinputs; + cinputs.use_jinja = params_.use_jinja; + cinputs.messages = chat_msgs_; + cinputs.add_generation_prompt = (role == "user"); + cinputs.reasoning_format = params_.reasoning_format; + + cinputs.enable_thinking = + params_.use_jinja && params_.reasoning_budget != 0 && + common_chat_templates_support_enable_thinking(chat_templates_.get()); + + common_chat_params cparams = common_chat_templates_apply(chat_templates_.get(), cinputs); + + if (!partial_formatter_ptr_ && params_.reasoning_format != COMMON_REASONING_FORMAT_NONE) { + common_chat_syntax chat_syntax; + chat_syntax.format = cparams.format; + chat_syntax.reasoning_format = params_.reasoning_format; + chat_syntax.thinking_forced_open = cparams.thinking_forced_open; + chat_syntax.parse_tool_calls = false; + partial_formatter_ptr_ = std::make_unique(chat_syntax); + } + + std::string formatted = cparams.prompt.substr(formatted_cumulative_.size()); + formatted_cumulative_ = cparams.prompt; + + LOG_DBG("formatted: '%s'\n", formatted.c_str()); + return formatted; + } + + partial_formatter * get_partial_formatter() { return partial_formatter_ptr_.get(); } + +private: + std::vector & chat_msgs_; + const common_chat_templates_ptr & chat_templates_; + const common_params & params_; + std::unique_ptr partial_formatter_ptr_; + std::string formatted_cumulative_; +}; + int main(int argc, char ** argv) { common_params params; g_params = ¶ms; @@ -265,15 +361,7 @@ int main(int argc, char ** argv) { std::vector embd_inp; bool waiting_for_first_input = false; - auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) { - common_chat_msg new_msg; - new_msg.role = role; - new_msg.content = content; - auto formatted = common_chat_format_single(chat_templates.get(), chat_msgs, new_msg, role == "user", g_params->use_jinja); - chat_msgs.push_back(new_msg); - LOG_DBG("formatted: '%s'\n", formatted.c_str()); - return formatted; - }; + chat_formatter chat_add_and_format(chat_msgs, chat_templates, params); std::string prompt; { @@ -709,6 +797,13 @@ int main(int argc, char ** argv) { if (params.conversation_mode && !waiting_for_first_input && !llama_vocab_is_eog(vocab, id)) { assistant_ss << common_token_to_piece(ctx, id, false); + + if (auto * formatter = chat_add_and_format.get_partial_formatter()) { + auto outputs = (*formatter)(assistant_ss.str()); + for (const auto & out : outputs) { + LOG("%s", out.formatted.c_str()); + } + } } // echo this to console @@ -740,8 +835,9 @@ int main(int argc, char ** argv) { for (auto id : embd) { const std::string token_str = common_token_to_piece(ctx, id, params.special); - // Console/Stream Output - LOG("%s", token_str.c_str()); + if (!chat_add_and_format.get_partial_formatter() || assistant_ss.str().empty()) { + LOG("%s", token_str.c_str()); + } // Record Displayed Tokens To Log // Note: Generated tokens are created one by one hence this check From 3d941129a72c9014b213781daffecdb9fdc47979 Mon Sep 17 00:00:00 2001 From: Mason M Date: Thu, 16 Oct 2025 09:13:46 -0300 Subject: [PATCH 2/4] Remove extra call to common_chat_templates_apply --- tools/main/main.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index b31527cbc9aee..684dd6dca74d7 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -170,6 +170,7 @@ class chat_formatter { } partial_formatter * get_partial_formatter() { return partial_formatter_ptr_.get(); } + const std::string & get_full_prompt() const { return formatted_cumulative_; } private: std::vector & chat_msgs_; @@ -379,13 +380,9 @@ int main(int argc, char ** argv) { } if (!params.system_prompt.empty() || !params.prompt.empty()) { - common_chat_templates_inputs inputs; - inputs.use_jinja = g_params->use_jinja; - inputs.messages = chat_msgs; - inputs.add_generation_prompt = !params.prompt.empty(); - - prompt = common_chat_templates_apply(chat_templates.get(), inputs).prompt; + prompt = chat_add_and_format.get_full_prompt(); } + } else { // otherwise use the prompt as is prompt = params.prompt; From a7771c1b429f5893e190d2d01f9ef737cfe8063f Mon Sep 17 00:00:00 2001 From: Mason M Date: Thu, 16 Oct 2025 10:40:59 -0300 Subject: [PATCH 3/4] Suppress template markup in system & prompt display --- tools/main/main.cpp | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 684dd6dca74d7..ec8bc4b7b3438 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -365,6 +365,8 @@ int main(int argc, char ** argv) { chat_formatter chat_add_and_format(chat_msgs, chat_templates, params); std::string prompt; + std::string system_remaining; + std::string prompt_remaining; { if (params.conversation_mode && params.enable_chat_template) { if (!params.system_prompt.empty()) { @@ -400,6 +402,19 @@ int main(int argc, char ** argv) { LOG_DBG("tokens: %s\n", string_from(ctx, embd_inp).c_str()); } + // Set up content tracking to skip template markup during display + bool skip_template_markup = false; + if (params.conversation_mode && params.enable_chat_template) { + for (const auto & msg : chat_msgs) { + if (msg.role == "system") { + system_remaining = msg.content; + } else if (msg.role == "user") { + prompt_remaining = msg.content; + } + } + skip_template_markup = !system_remaining.empty() || !prompt_remaining.empty(); + } + // Should not run without any tokens if (!waiting_for_first_input && embd_inp.empty()) { if (add_bos) { @@ -833,7 +848,29 @@ int main(int argc, char ** argv) { const std::string token_str = common_token_to_piece(ctx, id, params.special); if (!chat_add_and_format.get_partial_formatter() || assistant_ss.str().empty()) { - LOG("%s", token_str.c_str()); + if (skip_template_markup) { + if (!token_str.empty() && !system_remaining.empty() && + system_remaining.compare(0, token_str.length(), token_str) == 0) { + + system_remaining.erase(0, token_str.length()); + LOG("%s", token_str.c_str()); + if (system_remaining.empty()) { + LOG("\n"); + } + + } else if (!token_str.empty() && !prompt_remaining.empty() && + prompt_remaining.compare(0, token_str.length(), token_str) == 0) { + + prompt_remaining.erase(0, token_str.length()); + LOG("%s", token_str.c_str()); + if (prompt_remaining.empty()) { + LOG("\n"); + } + } + + } else { + LOG("%s", token_str.c_str()); + } } // Record Displayed Tokens To Log @@ -853,6 +890,7 @@ int main(int argc, char ** argv) { if (input_echo && (int) embd_inp.size() == n_consumed) { console::set_display(console::reset); display = true; + skip_template_markup = false; // system & prompt processing complete } // if not currently processing queued inputs; From 8694fa3f8b1fe8a444df114c0aaec55da0192de8 Mon Sep 17 00:00:00 2001 From: Mason M Date: Fri, 17 Oct 2025 12:01:51 -0300 Subject: [PATCH 4/4] Track system/user prompt position --- tools/main/main.cpp | 82 +++++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 25 deletions(-) diff --git a/tools/main/main.cpp b/tools/main/main.cpp index ec8bc4b7b3438..a539f1f111dac 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -83,6 +83,33 @@ static void sigint_handler(int signo) { } #endif +class template_content_tracker { +public: + template_content_tracker() : pos_(0), start_(std::string::npos), end_(std::string::npos) {} + + void set_range(size_t start, size_t end) { + start_ = start; + end_ = end; + } + + bool should_display(size_t pos) const { + return start_ != std::string::npos && pos >= start_ && pos < end_; + } + + bool at_end(size_t pos) const { + return start_ != std::string::npos && pos >= end_; + } + + bool is_active() const { + return start_ != std::string::npos; + } + +private: + size_t pos_; + size_t start_; + size_t end_; +}; + class partial_formatter { public: enum output_type { @@ -365,8 +392,6 @@ int main(int argc, char ** argv) { chat_formatter chat_add_and_format(chat_msgs, chat_templates, params); std::string prompt; - std::string system_remaining; - std::string prompt_remaining; { if (params.conversation_mode && params.enable_chat_template) { if (!params.system_prompt.empty()) { @@ -403,16 +428,27 @@ int main(int argc, char ** argv) { } // Set up content tracking to skip template markup during display - bool skip_template_markup = false; + size_t prompt_pos = 0; + template_content_tracker system_tracker; + template_content_tracker prompt_tracker; + if (params.conversation_mode && params.enable_chat_template) { + size_t search_pos = 0; for (const auto & msg : chat_msgs) { if (msg.role == "system") { - system_remaining = msg.content; + size_t content_start = prompt.find(msg.content, search_pos); + if (content_start != std::string::npos) { + system_tracker.set_range(content_start, content_start + msg.content.length()); + search_pos = content_start + msg.content.length(); + } } else if (msg.role == "user") { - prompt_remaining = msg.content; + size_t content_start = prompt.find(msg.content, search_pos); + if (content_start != std::string::npos) { + prompt_tracker.set_range(content_start, content_start + msg.content.length()); + search_pos = content_start + msg.content.length(); + } } } - skip_template_markup = !system_remaining.empty() || !prompt_remaining.empty(); } // Should not run without any tokens @@ -848,29 +884,26 @@ int main(int argc, char ** argv) { const std::string token_str = common_token_to_piece(ctx, id, params.special); if (!chat_add_and_format.get_partial_formatter() || assistant_ss.str().empty()) { - if (skip_template_markup) { - if (!token_str.empty() && !system_remaining.empty() && - system_remaining.compare(0, token_str.length(), token_str) == 0) { - - system_remaining.erase(0, token_str.length()); - LOG("%s", token_str.c_str()); - if (system_remaining.empty()) { - LOG("\n"); - } - - } else if (!token_str.empty() && !prompt_remaining.empty() && - prompt_remaining.compare(0, token_str.length(), token_str) == 0) { + bool always_display = !system_tracker.is_active() && !prompt_tracker.is_active(); + if (always_display) { + LOG("%s", token_str.c_str()); - prompt_remaining.erase(0, token_str.length()); - LOG("%s", token_str.c_str()); - if (prompt_remaining.empty()) { - LOG("\n"); - } + } else if (system_tracker.should_display(prompt_pos)) { + LOG("%s", token_str.c_str()); + size_t next_pos = prompt_pos + token_str.length(); + if (system_tracker.at_end(next_pos)) { + LOG("\n"); } - } else { + } else if (prompt_tracker.should_display(prompt_pos)) { LOG("%s", token_str.c_str()); + size_t next_pos = prompt_pos + token_str.length(); + if (prompt_tracker.at_end(next_pos)) { + LOG("\n"); + } } + + prompt_pos += token_str.length(); } // Record Displayed Tokens To Log @@ -890,7 +923,6 @@ int main(int argc, char ** argv) { if (input_echo && (int) embd_inp.size() == n_consumed) { console::set_display(console::reset); display = true; - skip_template_markup = false; // system & prompt processing complete } // if not currently processing queued inputs;