Add parameters to chat template in chat example (microsoft#1673)

kunal-vaishnavi · web-flow · commit 00e565493d73 · 2025-08-07T00:10:09.000Z
### Description This PR updates `apply_chat_template` to read a `chat_template.jinja` file. ### Motivation and Context This is used for [OpenAI's gpt-oss models](https://openai.com/index/introducing-gpt-oss/).
diff --git a/cmake/deps.txt b/cmake/deps.txt
@@ -14,7 +14,7 @@ pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.6.zip;f78029
 googletest;https://github.com/google/googletest/archive/530d5c8c84abd2a46f38583ee817743c9b3a42b4.zip;5e3a61db2aa975cfd0f97ba92c818744e7fa7034
 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
 directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
-onnxruntime_extensions;https://github.com/microsoft/onnxruntime-extensions.git;492286d9b7b1e674a2d4ce81bd22a7668c3b58fa
+onnxruntime_extensions;https://github.com/microsoft/onnxruntime-extensions.git;af289f57acae13a0ee1926605e0a7cf53efd8a0c
 
 # These two dependencies are for the optional constrained decoding feature (USE_GUIDANCE)
 llguidance;https://github.com/microsoft/llguidance.git;2d2f1de3c87e3289528affc346f734f7471216d9
diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py
@@ -3,8 +3,9 @@
 
 import onnxruntime_genai as og
 import argparse
-import time
+import os
 import json
+import time
 
 def get_tools_list(input_tools):
     # input_tools format: '[{"name": "fn1", "description": "fn details", "parameters": {"p1": {"description": "details", "type": "string"}}},
@@ -134,8 +135,18 @@ def main(args):
         messages = f"""[{{"role": "system", "content": "{system_prompt}", "tools": "{prompt_tool_input}"}}]"""
     else:
         messages = f"""[{{"role": "system", "content": "{system_prompt}"}}]"""
+
     # Apply Chat Template
-    tokenizer_input_system_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=False)
+    template_str = ""
+    tokenizer_input_system_prompt = None
+    jinja_path = os.path.join(args.model_path, "chat_template.jinja")
+    if os.path.exists(jinja_path):
+        with open(jinja_path, "r", encoding="utf-8") as f:
+            template_str = f.read()
+            tokenizer_input_system_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=False, template_str=template_str)
+    else:
+        tokenizer_input_system_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=False)
+
     input_tokens = tokenizer.encode(tokenizer_input_system_prompt)
     # Ignoring the last end of text token as it is messes up the generation when grammar is enabled
     if guidance_type:
@@ -156,8 +167,13 @@ def main(args):
         if args.timings: started_timestamp = time.time()
 
         messages = f"""[{{"role": "user", "content": "{text}"}}]"""
+
         # Apply Chat Template
-        user_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=True)
+        user_prompt = ""
+        if os.path.exists(jinja_path):
+            user_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=True, template_str=template_str)
+        else:
+            user_prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=True)
         input_tokens = tokenizer.encode(user_prompt)
         generator.append_tokens(input_tokens)