@@ -1521,7 +1521,7 @@ std::string llama_chat_apply_template(const struct llama_model * model,
15211521        const  std::vector<llama_chat_msg> & msgs,
15221522        bool  add_ass,
15231523        bool  use_jinja,
1524-         const  std::string &  tools,
1524+         const  char  *  tools,
15251525        const  char  * bos_token,
15261526        const  char  * eos_token) {
15271527    int  alloc_size = 0 ;
@@ -1536,7 +1536,7 @@ std::string llama_chat_apply_template(const struct llama_model * model,
15361536    std::vector<char > buf (alloc_size);
15371537
15381538    //  run the first time to get the total output length
1539-     int32_t  res = llama_chat_apply_template (model, ptr_tmpl, chat.data (), chat.size (), add_ass, buf.data (), buf.size (), use_jinja, tools. empty () ?  nullptr  : tools. data () , bos_token, eos_token);
1539+     int32_t  res = llama_chat_apply_template (model, ptr_tmpl, chat.data (), chat.size (), add_ass, buf.data (), buf.size (), use_jinja, tools, bos_token, eos_token);
15401540
15411541    //  error: chat template is not supported
15421542    if  (res < 0 ) {
@@ -1546,7 +1546,7 @@ std::string llama_chat_apply_template(const struct llama_model * model,
15461546            throw  std::runtime_error (" this custom template is not supported"  );
15471547        } else  {
15481548            //  If the built-in template is not supported, we default to chatml
1549-             res = llama_chat_apply_template (nullptr , " chatml"  , chat.data (), chat.size (), add_ass, buf.data (), buf.size (), use_jinja, bos_token, eos_token);
1549+             res = llama_chat_apply_template (nullptr , " chatml"  , chat.data (), chat.size (), add_ass, buf.data (), buf.size (), use_jinja, tools,  bos_token, eos_token);
15501550            fallback = true ;
15511551        }
15521552    }
@@ -1557,7 +1557,7 @@ std::string llama_chat_apply_template(const struct llama_model * model,
15571557        res = llama_chat_apply_template (
15581558            fallback ? nullptr  : model,
15591559            fallback ? " chatml"   : ptr_tmpl,
1560-             chat.data (), chat.size (), add_ass, buf.data (), buf.size (), use_jinja, bos_token, eos_token);
1560+             chat.data (), chat.size (), add_ass, buf.data (), buf.size (), use_jinja, tools,  bos_token, eos_token);
15611561    }
15621562
15631563    std::string formatted_chat (buf.data (), res);
@@ -1570,19 +1570,19 @@ std::string llama_chat_format_single(const struct llama_model * model,
15701570        const  llama_chat_msg & new_msg,
15711571        bool  add_ass,
15721572        bool  use_jinja,
1573-         const  std::string &  tools,
1573+         const  char  *  tools,
15741574        const  char  * bos_token,
15751575        const  char  * eos_token) {
15761576    std::ostringstream ss;
1577-     auto  fmt_past_msg = past_msg.empty () ? " "   : llama_chat_apply_template (model, tmpl, past_msg, false , use_jinja, bos_token, eos_token);
1577+     auto  fmt_past_msg = past_msg.empty () ? " "   : llama_chat_apply_template (model, tmpl, past_msg, false , use_jinja, tools,  bos_token, eos_token);
15781578    std::vector<llama_chat_msg> chat_new (past_msg);
15791579    //  if the past_msg ends with a newline, we must preserve it in the formatted version
15801580    if  (add_ass && !fmt_past_msg.empty () && fmt_past_msg.back () == ' \n '  ) {
15811581        ss << " \n "  ;
15821582    };
15831583    //  format chat with new_msg
15841584    chat_new.push_back (new_msg);
1585-     auto  fmt_new_msg = llama_chat_apply_template (model, tmpl, chat_new, add_ass, use_jinja, bos_token, eos_token);
1585+     auto  fmt_new_msg = llama_chat_apply_template (model, tmpl, chat_new, add_ass, use_jinja, tools,  bos_token, eos_token);
15861586    //  get the diff part
15871587    ss << fmt_new_msg.substr (fmt_past_msg.size (), fmt_new_msg.size () - fmt_past_msg.size ());
15881588    return  ss.str ();
0 commit comments