7474#endif 
7575#define  LLAMA_CURL_MAX_URL_LENGTH  2084  //  Maximum URL Length in Chrome: 2083
7676
77+ const  char  * LLAMA_CHATML_TEMPLATE = R"( 
78+     {%- for message in messages -%} 
79+         {{- "<|im_start|>" + message.role + "\n" + message.content + "<|im_end|>\n" -}} 
80+     {%- endfor -%} 
81+     {%- if add_generation_prompt -%} 
82+         {{- "<|im_start|>assistant\n" -}} 
83+     {%- endif -%} 
84+ )"  ;
85+ 
7786// 
7887//  CURL utils
7988// 
@@ -1748,86 +1757,77 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
17481757    return  res >= 0 ;
17491758}
17501759
1751- std::string common_chat_apply_template (const   struct   llama_model  * model, 
1752-         const  std::string  & tmpl,
1760+ std::string common_chat_apply_template (
1761+         const  llama_chat_template  & tmpl,
17531762        const  std::vector<common_chat_msg> & msgs,
1754-         bool  add_ass) {
1763+         bool  add_ass,
1764+         bool  use_jinja) {
1765+     if  (use_jinja) {
1766+         auto  messages = json::array ();
1767+         for  (const  auto  & msg : msgs) {
1768+             messages.push_back ({{" role"  , msg.role }, {" content"  , msg.content }});
1769+         }
1770+         return  tmpl.apply (messages, /*  tools= */   json (), add_ass);
1771+     }
1772+ 
17551773    int  alloc_size = 0 ;
1756-     bool  fallback = false ; //  indicate if we must fallback to default chatml
17571774    std::vector<llama_chat_message> chat;
17581775    for  (const  auto  & msg : msgs) {
17591776        chat.push_back ({msg.role .c_str (), msg.content .c_str ()});
17601777        alloc_size += (msg.role .size () + msg.content .size ()) * 1.25 ;
17611778    }
17621779
1763-     const  char  * ptr_tmpl = tmpl.empty () ? llama_model_chat_template (model, /*  name */   nullptr ) : tmpl.c_str ();
17641780    std::vector<char > buf (alloc_size);
17651781
17661782    //  run the first time to get the total output length
1767-     int32_t  res = llama_chat_apply_template (ptr_tmpl , chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
1783+     int32_t  res = llama_chat_apply_template (tmpl. source (). c_str () , chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
17681784
17691785    //  error: chat template is not supported
17701786    if  (res < 0 ) {
1771-         if  (ptr_tmpl != nullptr ) {
1772-             //  if the custom "tmpl" is not supported, we throw an error
1773-             //  this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template()
1774-             throw  std::runtime_error (" this custom template is not supported"  );
1775-         }
1776- 
1777-         //  If the built-in template is not supported, we default to chatml
1778-         res = llama_chat_apply_template (" chatml"  , chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
1779-         fallback = true ;
1787+         //  if the custom "tmpl" is not supported, we throw an error
1788+         //  this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template()
1789+         throw  std::runtime_error (" this custom template is not supported"  );
17801790    }
17811791
17821792    //  if it turns out that our buffer is too small, we resize it
17831793    if  ((size_t ) res > buf.size ()) {
17841794        buf.resize (res);
1785-         res = llama_chat_apply_template (
1786-             fallback ? " chatml"   : ptr_tmpl,
1787-             chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
1795+         res = llama_chat_apply_template (tmpl.source ().c_str (), chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
17881796    }
17891797
17901798    std::string formatted_chat (buf.data (), res);
17911799    return  formatted_chat;
17921800}
17931801
1794- std::string common_chat_format_single (const   struct   llama_model  * model, 
1795-         const  std::string  & tmpl,
1802+ std::string common_chat_format_single (
1803+         const  llama_chat_template  & tmpl,
17961804        const  std::vector<common_chat_msg> & past_msg,
17971805        const  common_chat_msg & new_msg,
1798-         bool  add_ass) {
1806+         bool  add_ass,
1807+         bool  use_jinja) {
17991808    std::ostringstream ss;
1800-     auto  fmt_past_msg = past_msg.empty () ? " "   : common_chat_apply_template (model,  tmpl, past_msg, false );
1809+     auto  fmt_past_msg = past_msg.empty () ? " "   : common_chat_apply_template (tmpl, past_msg, false , use_jinja );
18011810    std::vector<common_chat_msg> chat_new (past_msg);
18021811    //  if the past_msg ends with a newline, we must preserve it in the formatted version
18031812    if  (add_ass && !fmt_past_msg.empty () && fmt_past_msg.back () == ' \n '  ) {
18041813        ss << " \n "  ;
18051814    };
18061815    //  format chat with new_msg
18071816    chat_new.push_back (new_msg);
1808-     auto  fmt_new_msg = common_chat_apply_template (model,  tmpl, chat_new, add_ass);
1817+     auto  fmt_new_msg = common_chat_apply_template (tmpl, chat_new, add_ass, use_jinja );
18091818    //  get the diff part
18101819    ss << fmt_new_msg.substr (fmt_past_msg.size (), fmt_new_msg.size () - fmt_past_msg.size ());
18111820    return  ss.str ();
18121821}
18131822
1814- std::string common_chat_format_example (const  struct   llama_model  * model,  const  minja::chat_template  & tmpl, bool  use_jinja) {
1823+ std::string common_chat_format_example (const  llama_chat_template  & tmpl, bool  use_jinja) {
18151824    std::vector<common_chat_msg> msgs = {
18161825        {" system"  ,    " You are a helpful assistant"  },
18171826        {" user"  ,      " Hello"  },
18181827        {" assistant"  , " Hi there"  },
18191828        {" user"  ,      " How are you?"  },
18201829    };
1821-     const  auto  add_generation_prompt = true ;
1822-     if  (use_jinja) {
1823-         auto  messages = json::array ();
1824-         for  (const  auto  & msg : msgs) {
1825-             messages.push_back ({{" role"  , msg.role }, {" content"  , msg.content }});
1826-         }
1827-         return  tmpl.apply (messages, /*  tools= */   json (), add_generation_prompt);
1828-     } else  {
1829-         return  common_chat_apply_template (model, tmpl.source (), msgs, add_generation_prompt);
1830-     }
1830+     return  common_chat_apply_template (tmpl, msgs, true , use_jinja);
18311831}
18321832
18331833llama_chat_templates llama_chat_templates_from_model (const  struct  llama_model  * model, const  std::string & chat_template_override)
@@ -1847,14 +1847,7 @@ llama_chat_templates llama_chat_templates_from_model(const struct llama_model *
18471847        if  (!tool_use_template_src.empty ()) {
18481848            default_template_src = tool_use_template_src;
18491849        } else  {
1850-             default_template_src = R"( 
1851-                 {%- for message in messages -%} 
1852-                     {{- "<|im_start|>" + message.role + "\n" + message.content + "<|im_end|>\n" -}} 
1853-                 {%- endfor -%} 
1854-                 {%- if add_generation_prompt -%} 
1855-                     {{- "<|im_start|>assistant\n" -}} 
1856-                 {%- endif -%} 
1857-             )"  ;
1850+             default_template_src = LLAMA_CHATML_TEMPLATE;
18581851        }
18591852    }
18601853    return  {
0 commit comments