From 07720bc96430f5fd0b827f72d583b0cf4494a25e Mon Sep 17 00:00:00 2001
From: Shichao Song <60967965+Ki-Seki@users.noreply.github.com>
Date: Wed, 3 Dec 2025 15:45:42 +0800
Subject: [PATCH] fix: correct chat format usage

---
 docs/guide/migration.md                 | 2 ++
 tests/backends/test_llguidance.py       | 1 +
 tests/backends/test_outlines_core.py    | 1 +
 tests/backends/test_xgrammar.py         | 1 +
 tests/models/test_llamacpp.py           | 2 ++
 tests/models/test_llamacpp_tokenizer.py | 2 ++
 6 files changed, 9 insertions(+)

diff --git a/docs/guide/migration.md b/docs/guide/migration.md
index 729cceac1..0d705ba14 100644
--- a/docs/guide/migration.md
+++ b/docs/guide/migration.md
@@ -62,6 +62,7 @@ from outlines import models
 model = models.llamacpp(
     repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
     filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+    chat_format="qwen",
 )
 ```
 
@@ -74,6 +75,7 @@ from outlines import from_llamacpp
 llamacpp_model = Llama.from_pretrained(
     repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
     filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+    chat_format="qwen",
 )
 model = from_llamacpp(llamacpp_model)
 ```
diff --git a/tests/backends/test_llguidance.py b/tests/backends/test_llguidance.py
index fb5faea91..465f54871 100644
--- a/tests/backends/test_llguidance.py
+++ b/tests/backends/test_llguidance.py
@@ -31,6 +31,7 @@ def model_llamacpp():
         llama_cpp.Llama.from_pretrained(
             repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
             filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+            chat_format="qwen",
         )
     )
 
diff --git a/tests/backends/test_outlines_core.py b/tests/backends/test_outlines_core.py
index ce3242457..9dc3db04a 100644
--- a/tests/backends/test_outlines_core.py
+++ b/tests/backends/test_outlines_core.py
@@ -30,6 +30,7 @@ def model_llamacpp():
         llama_cpp.Llama.from_pretrained(
             repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
             filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+            chat_format="qwen",
         )
     )
 
diff --git a/tests/backends/test_xgrammar.py b/tests/backends/test_xgrammar.py
index e25f66508..855213990 100644
--- a/tests/backends/test_xgrammar.py
+++ b/tests/backends/test_xgrammar.py
@@ -27,6 +27,7 @@ def model_llamacpp():
         llama_cpp.Llama.from_pretrained(
             repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
             filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+            chat_format="qwen",
         )
     )
 
diff --git a/tests/models/test_llamacpp.py b/tests/models/test_llamacpp.py
index 9b881b7de..776da800b 100644
--- a/tests/models/test_llamacpp.py
+++ b/tests/models/test_llamacpp.py
@@ -20,6 +20,7 @@ def test_load_model():
         Llama.from_pretrained(
             repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
             filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+            chat_format="qwen"
         )
     )
 
@@ -36,6 +37,7 @@ def model(tmp_path_factory):
         Llama.from_pretrained(
             repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
             filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+            chat_format="qwen",
         )
     )
 
diff --git a/tests/models/test_llamacpp_tokenizer.py b/tests/models/test_llamacpp_tokenizer.py
index dbf2507f3..80b063464 100644
--- a/tests/models/test_llamacpp_tokenizer.py
+++ b/tests/models/test_llamacpp_tokenizer.py
@@ -13,6 +13,7 @@ def model():
     model = llama_cpp.Llama.from_pretrained(
         repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
         filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+        chat_format="qwen",
     )
     setattr(
         model.tokenizer_,
@@ -27,6 +28,7 @@ def model_no_hf_tokenizer():
     model = llama_cpp.Llama.from_pretrained(
         repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
         filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+        chat_format="qwen",
     )
     del model.tokenizer_
     return model