Add tools as chat template input (#951)

sayanshaw24 · Sayan Shaw · web-flow · commit 5e991fbb80f4 · 2025-05-09T16:53:50.000-07:00
* add tools as chat template input

* chore: trigger CI

* chore: trigger CI

* chore: trigger CI

* add python test

---------

Co-authored-by: Sayan Shaw &lt;sayanshaw@microsoft.com&gt;
diff --git a/include/ortx_tokenizer.h b/include/ortx_tokenizer.h
@@ -213,14 +213,15 @@ extError_t ORTX_API_CALL OrtxTokenId2DArrayGetItem(const OrtxTokenId2DArray* tok
  * @param tokenizer Pointer to an OrtxTokenizer used for template processing.
  * @param template_str Null-terminated string representing the chat template; can be null if tokenizer.json has one.
  * @param input Null-terminated string containing the input to be processed.
+ * @param tools Null-terminated string containing the function tools.
  * @param output Pointer to an OrtxTensorResult that will be populated with the output strings,
  *        if tokenize is true, the ids will be in the output as indexed 1.
  * @param add_generation_prompt Indicates whether to add a generation prompt to the output.
  * @param tokenize Indicates whether to tokenize the templated text to IDs
  * @return extError_t Returns an error code indicating success or the type of failure.
  */
 extError_t ORTX_API_CALL OrtxApplyChatTemplate(const OrtxTokenizer* tokenizer, const char* template_str,
-                                               const char* input, OrtxTensorResult** output,
+                                               const char* input, const char* tools, OrtxTensorResult** output,
                                                bool add_generation_prompt, bool tokenize);
 
 #ifdef __cplusplus
diff --git a/onnxruntime_extensions/pp_api.py b/onnxruntime_extensions/pp_api.py
@@ -58,9 +58,9 @@ def tokenize(self, text, add_special_tokens = True):
     def detokenize(self, tokens):
         return batch_detokenize(self.tokenizer, [tokens])
 
-    def apply_chat_template(self, chat, template="", add_generation_prompt=True, tokenize=False):
+    def apply_chat_template(self, chat, template="", tools="",add_generation_prompt=True, tokenize=False):
         result = _apply_chat_template(
-            self.tokenizer, template, chat, add_generation_prompt, tokenize)
+            self.tokenizer, template, chat, tools, add_generation_prompt, tokenize)
         return tensor_result_get_at(result, 1 if tokenize else 0)
 
     def __del__(self):
diff --git a/pyop/py_c_api.cc b/pyop/py_c_api.cc
@@ -229,12 +229,13 @@ void AddGlobalMethodsCApi(pybind11::module& m) {
 
   m.def(
       "apply_chat_template",
-      [](std::uintptr_t h, const std::string& template_str, const std::string& input, bool add_generation_prompt,
-         bool tokenize) -> std::uintptr_t {
+      [](std::uintptr_t h, const std::string& template_str, const std::string& input, const std::string& tools,
+         bool add_generation_prompt, bool tokenize) -> std::uintptr_t {
         OrtxTokenizer* tokenizer = reinterpret_cast<OrtxTokenizer*>(h);
         OrtxTensorResult* result{};
         auto err = OrtxApplyChatTemplate(tokenizer, template_str.empty() ? nullptr : template_str.c_str(),
-                                         input.c_str(), &result, add_generation_prompt, tokenize);
+                                         input.c_str(), tools.empty() ? nullptr : tools.c_str(),
+                                         &result, add_generation_prompt, tokenize);
         if (err != kOrtxOK) {
           throw std::runtime_error(std::string("Failed to apply chat template: ") + OrtxGetLastErrorMessage());
         }
diff --git a/shared/api/c_api_tokenizer.cc b/shared/api/c_api_tokenizer.cc
@@ -355,7 +355,8 @@ extError_t ORTX_API_CALL OrtxDetokenizeCached(const OrtxTokenizer* tokenizer, Or
 }
 
 extError_t ORTX_API_CALL OrtxApplyChatTemplate(const OrtxTokenizer* tokenizer, const char* template_str,
-                                               const char* input, OrtxTensorResult** output, bool add_generation_prompt,
+                                               const char* input, const char* tools,
+                                               OrtxTensorResult** output, bool add_generation_prompt,
                                                bool tokenize) {
   if (tokenizer == nullptr && template_str == nullptr) {
     ReturnableStatus::last_error_message_ = "both tokenizer and template_str are null, no template to apply";
@@ -375,7 +376,7 @@ extError_t ORTX_API_CALL OrtxApplyChatTemplate(const OrtxTokenizer* tokenizer, c
 
   std::string text;
   std::vector<extTokenId_t> ids_vec;
-  status = token_ptr->ApplyChatTemplate(template_str, input, text, ids_vec, add_generation_prompt, tokenize);
+  status = token_ptr->ApplyChatTemplate(template_str, input, tools, text, ids_vec, add_generation_prompt, tokenize);
   if (status.IsOk()) {
     auto result = std::make_unique<ort_extensions::TensorResult>();
     std::vector<std::unique_ptr<ortc::TensorBase>> tensors;
diff --git a/shared/api/chat_template.cc b/shared/api/chat_template.cc
@@ -718,7 +718,8 @@ void TokenizerImpl::InitializeChatParameters(const char* template_str,
 }
 
 // ApplyChatTemplate method to choose the template logic based on chat_template
-OrtxStatus TokenizerImpl::ApplyChatTemplate(const TokenizerImpl::MessageList& message_list, std::string& output,
+OrtxStatus TokenizerImpl::ApplyChatTemplate(const TokenizerImpl::MessageList& message_list,
+                                            const char* tools, std::string& output,
                                             bool add_generation_prompt) const {
   // Note: The official chat template from this model's config file may not be supported.
   // However, we do not throw an error until checking model_to_template_map as the user
@@ -734,6 +735,20 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(const TokenizerImpl::MessageList& me
 
   messages = message_list;
 
+  if (tools && *tools) {
+    tool_calls = std::string(tools);
+    if (!messages.empty()) {
+      if (messages[0].find("tools") != messages[0].end()) {
+        messages[0]["tools"] = tool_calls;
+        tools_in_user_message = true;
+      }
+      if (messages[0].find("tool_calls ") != messages[0].end()) {
+        messages[0]["tool_calls "] = tool_calls;
+        tools_in_user_message = true;
+      }
+    }
+  }
+
   // Apply the corresponding chat template if it is supported
   if (chat_template == PHI4_CHAT_TEMPLATE) {
     return Phi4ChatTemplate(output, add_generation_prompt);
@@ -762,9 +777,9 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(const TokenizerImpl::MessageList& me
   return {};
 }
 
-OrtxStatus TokenizerImpl::ApplyChatTemplate(const char* template_str, const char* message, std::string& output,
-                                            std::vector<extTokenId_t>& ids_vec, bool add_generation_prompt,
-                                            bool tokenize) const {
+OrtxStatus TokenizerImpl::ApplyChatTemplate(const char* template_str, const char* message, const char* tools,
+                                            std::string& output, std::vector<extTokenId_t>& ids_vec,
+                                            bool add_generation_prompt, bool tokenize) const {
   OrtxStatus status;
   std::string input_str = minja::normalize_newlines(message);
   auto activated_str = tok_config_->chat_template_.c_str();
@@ -783,7 +798,7 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(const char* template_str, const char
       return {kOrtxErrorInvalidArgument, "Invalid JSON format in chat message."};
     }
 
-    status = ApplyChatTemplate(message_list, output, add_generation_prompt);
+    status = ApplyChatTemplate(message_list, tools, output, add_generation_prompt);
   } else {
     using json = nlohmann::ordered_json;
     std::string text;
diff --git a/shared/api/tokenizer_impl.h b/shared/api/tokenizer_impl.h
@@ -58,10 +58,12 @@ class TokenizerImpl : public OrtxObjectImpl {
   std::string chat_template;
   mutable MessageList messages;
 
+  mutable std::string tool_calls;
+
   std::string bos_token;
   std::string eos_token;
   std::vector<std::string> custom_tools;
-  bool tools_in_user_message;
+  mutable bool tools_in_user_message;
   std::string strftime_now;
   std::string date_string;
   std::vector<std::string> builtin_tools;
@@ -81,7 +83,7 @@ class TokenizerImpl : public OrtxObjectImpl {
   OrtxStatus Id2Token(extTokenId_t id, std::string& token, TokenizerDecodingState** state) const;
   OrtxStatus GetDecoderPromptIds(size_t batch_size, const char* lang, const char* task, int no_timestamps,
                                  std::vector<std::vector<extTokenId_t>>& t_ids) const;
-  OrtxStatus ApplyChatTemplate(const char* template_str, const char* message, std::string& output,
+  OrtxStatus ApplyChatTemplate(const char* template_str, const char* message, const char* tools, std::string& output,
                                std::vector<extTokenId_t>& ids_vec, bool add_generation_prompt, bool tokenize) const;
 
  private:
@@ -95,7 +97,7 @@ class TokenizerImpl : public OrtxObjectImpl {
                                 const std::string& date_str = "26 Jul 2024",
                                 const std::vector<std::string>& builtin_tools_param = {});
 
-  OrtxStatus ApplyChatTemplate(const MessageList& messages, std::string& output,
+  OrtxStatus ApplyChatTemplate(const MessageList& messages, const char* tools, std::string& output,
                                bool add_generation_prompt) const;
 
   using bpe_tokenizer_t = std::unique_ptr<JsonFastTokenizer>;
diff --git a/test/pp_api_test/test_tokenizer_chat.cc b/test/pp_api_test/test_tokenizer_chat.cc
@@ -37,7 +37,7 @@ TEST(OrtxTokenizerTest, Phi4ChatTemplate) {
 
   auto err = OrtxApplyChatTemplate(
     tokenizer.get(), nullptr,
-    messages_json.c_str(), templated_text.ToBeAssigned(), true, false);
+    messages_json.c_str(), nullptr, templated_text.ToBeAssigned(), true, false);
 
   if (err != kOrtxOK) {
     std::cout << "Failed to apply chat template, stopping the test." << std::endl;
diff --git a/test/test_pp_api.py b/test/test_pp_api.py
@@ -331,6 +331,25 @@ def test_phi4_chat_template(self):
         tokenizer = pp_api.Tokenizer(model_id)
         ortx_inputs = tokenizer.apply_chat_template(message_json)
         np.testing.assert_array_equal(ortx_inputs, inputs)
+    
+    def test_chat_tools_input(self):
+        model_id = util.get_test_data_file("data/models/phi-4")
+        messages = [
+            {"role": "system", "content": "You are a medieval knight and must provide explanations to modern people."},
+            {"role": "user", "content": "How should I explain the Internet?"},
+        ]
+        message_json = json.dumps(messages)
+        tokenizer = pp_api.Tokenizer(model_id)
+
+        # Note: we simply test passing in a tools input to apply_chat_template here,
+        # we do not compare with HF as they place the result of the function call in their output,
+        # and we do not have the ability to call a function in-line within the C++ chat template backend.
+        tool_calls = """[{"name": "fn1", "description": "fn details", "parameters": {"p1": {"description": "details", "type": "string"}}}, {"fn2": 2},{"fn3": 3}]"""
+
+        try:
+            tokenizer.apply_chat_template(chat=message_json, tools=tool_calls)
+        except Exception as e:
+            assert False, f"Error while trying to pass in tools to chat template: {e}"
 
     def test_qwen2_5_vl_chat_template(self):
         model_id = "Qwen/Qwen2.5-VL-72B-Instruct"