@@ -1648,7 +1648,7 @@ std::string common_get_builtin_chat_template(const struct llama_model * model) {
16481648
16491649bool common_chat_verify_template (const std::string & tmpl) {
16501650 llama_chat_message chat[] = {{" user" , " test" }};
1651- int res = llama_chat_apply_template (nullptr , tmpl.c_str (), chat, 1 , true , nullptr , 0 );
1651+ const int res = llama_chat_apply_template (tmpl.c_str (), chat, 1 , true , nullptr , 0 );
16521652 return res >= 0 ;
16531653}
16541654
@@ -1659,35 +1659,34 @@ std::string common_chat_apply_template(const struct llama_model * model,
16591659 int alloc_size = 0 ;
16601660 bool fallback = false ; // indicate if we must fallback to default chatml
16611661 std::vector<llama_chat_message> chat;
1662- for (auto & msg : msgs) {
1662+ for (const auto & msg : msgs) {
16631663 chat.push_back ({msg.role .c_str (), msg.content .c_str ()});
16641664 alloc_size += (msg.role .size () + msg.content .size ()) * 1.25 ;
16651665 }
16661666
1667- const char * ptr_tmpl = tmpl.empty () ? nullptr : tmpl.c_str ();
1667+ const char * ptr_tmpl = tmpl.empty () ? llama_model_chat_template (model) : tmpl.c_str ();
16681668 std::vector<char > buf (alloc_size);
16691669
16701670 // run the first time to get the total output length
1671- int32_t res = llama_chat_apply_template (model, ptr_tmpl, chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
1671+ int32_t res = llama_chat_apply_template (ptr_tmpl, chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
16721672
16731673 // error: chat template is not supported
16741674 if (res < 0 ) {
16751675 if (ptr_tmpl != nullptr ) {
16761676 // if the custom "tmpl" is not supported, we throw an error
16771677 // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template()
16781678 throw std::runtime_error (" this custom template is not supported" );
1679- } else {
1680- // If the built-in template is not supported, we default to chatml
1681- res = llama_chat_apply_template (nullptr , " chatml" , chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
1682- fallback = true ;
16831679 }
1680+
1681+ // If the built-in template is not supported, we default to chatml
1682+ res = llama_chat_apply_template (" chatml" , chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
1683+ fallback = true ;
16841684 }
16851685
16861686 // if it turns out that our buffer is too small, we resize it
16871687 if ((size_t ) res > buf.size ()) {
16881688 buf.resize (res);
16891689 res = llama_chat_apply_template (
1690- fallback ? nullptr : model,
16911690 fallback ? " chatml" : ptr_tmpl,
16921691 chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
16931692 }
0 commit comments