@@ -1510,16 +1510,20 @@ std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token>
15101510//  Chat template utils
15111511// 
15121512
1513- bool  llama_chat_verify_template (const  std::string & tmpl) {
1513+ bool  llama_chat_verify_template (const  std::string & tmpl,  bool  use_jinja ) {
15141514    llama_chat_message chat[] = {{" user"  , " test"  }};
1515-     int  res = llama_chat_apply_template (nullptr , tmpl.c_str (), chat, 1 , true , nullptr , 0 );
1515+     int  res = llama_chat_apply_template (nullptr , tmpl.c_str (), chat, 1 , true , nullptr , 0 , use_jinja );
15161516    return  res >= 0 ;
15171517}
15181518
15191519std::string llama_chat_apply_template (const  struct  llama_model  * model,
15201520        const  std::string & tmpl,
15211521        const  std::vector<llama_chat_msg> & msgs,
1522-         bool  add_ass) {
1522+         bool  add_ass,
1523+         bool  use_jinja,
1524+         const  std::string & tools,
1525+         const  char  * bos_token,
1526+         const  char  * eos_token) {
15231527    int  alloc_size = 0 ;
15241528    bool  fallback = false ; //  indicate if we must fallback to default chatml
15251529    std::vector<llama_chat_message> chat;
@@ -1532,7 +1536,7 @@ std::string llama_chat_apply_template(const struct llama_model * model,
15321536    std::vector<char > buf (alloc_size);
15331537
15341538    //  run the first time to get the total output length
1535-     int32_t  res = llama_chat_apply_template (model, ptr_tmpl, chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
1539+     int32_t  res = llama_chat_apply_template (model, ptr_tmpl, chat.data (), chat.size (), add_ass, buf.data (), buf.size (), use_jinja, tools. empty () ?  nullptr  : tools. data (), bos_token, eos_token );
15361540
15371541    //  error: chat template is not supported
15381542    if  (res < 0 ) {
@@ -1542,7 +1546,7 @@ std::string llama_chat_apply_template(const struct llama_model * model,
15421546            throw  std::runtime_error (" this custom template is not supported"  );
15431547        } else  {
15441548            //  If the built-in template is not supported, we default to chatml
1545-             res = llama_chat_apply_template (nullptr , " chatml"  , chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
1549+             res = llama_chat_apply_template (nullptr , " chatml"  , chat.data (), chat.size (), add_ass, buf.data (), buf.size (), use_jinja, bos_token, eos_token );
15461550            fallback = true ;
15471551        }
15481552    }
@@ -1553,7 +1557,7 @@ std::string llama_chat_apply_template(const struct llama_model * model,
15531557        res = llama_chat_apply_template (
15541558            fallback ? nullptr  : model,
15551559            fallback ? " chatml"   : ptr_tmpl,
1556-             chat.data (), chat.size (), add_ass, buf.data (), buf.size ());
1560+             chat.data (), chat.size (), add_ass, buf.data (), buf.size (), use_jinja, bos_token, eos_token );
15571561    }
15581562
15591563    std::string formatted_chat (buf.data (), res);
@@ -1564,17 +1568,21 @@ std::string llama_chat_format_single(const struct llama_model * model,
15641568        const  std::string & tmpl,
15651569        const  std::vector<llama_chat_msg> & past_msg,
15661570        const  llama_chat_msg & new_msg,
1567-         bool  add_ass) {
1571+         bool  add_ass,
1572+         bool  use_jinja,
1573+         const  std::string & tools,
1574+         const  char  * bos_token,
1575+         const  char  * eos_token) {
15681576    std::ostringstream ss;
1569-     auto  fmt_past_msg = past_msg.empty () ? " "   : llama_chat_apply_template (model, tmpl, past_msg, false );
1577+     auto  fmt_past_msg = past_msg.empty () ? " "   : llama_chat_apply_template (model, tmpl, past_msg, false , use_jinja, bos_token, eos_token );
15701578    std::vector<llama_chat_msg> chat_new (past_msg);
15711579    //  if the past_msg ends with a newline, we must preserve it in the formatted version
15721580    if  (add_ass && !fmt_past_msg.empty () && fmt_past_msg.back () == ' \n '  ) {
15731581        ss << " \n "  ;
15741582    };
15751583    //  format chat with new_msg
15761584    chat_new.push_back (new_msg);
1577-     auto  fmt_new_msg = llama_chat_apply_template (model, tmpl, chat_new, add_ass);
1585+     auto  fmt_new_msg = llama_chat_apply_template (model, tmpl, chat_new, add_ass, use_jinja, bos_token, eos_token );
15781586    //  get the diff part
15791587    ss << fmt_new_msg.substr (fmt_past_msg.size (), fmt_new_msg.size () - fmt_past_msg.size ());
15801588    return  ss.str ();
0 commit comments