dottxt-ai · RobinPicard · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025
diff --git a/docs/guide/migration.md b/docs/guide/migration.md
@@ -62,6 +62,7 @@ from outlines import models
 model = models.llamacpp(
     repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
     filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+    chat_format="qwen",
 )
 ```
 
@@ -74,6 +75,7 @@ from outlines import from_llamacpp
 llamacpp_model = Llama.from_pretrained(
     repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
     filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+    chat_format="qwen",
 )
 model = from_llamacpp(llamacpp_model)
 ```

diff --git a/tests/backends/test_llguidance.py b/tests/backends/test_llguidance.py
@@ -31,6 +31,7 @@ def model_llamacpp():
         llama_cpp.Llama.from_pretrained(
             repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
             filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+            chat_format="qwen",
         )
     )
 

diff --git a/tests/backends/test_outlines_core.py b/tests/backends/test_outlines_core.py
@@ -30,6 +30,7 @@ def model_llamacpp():
         llama_cpp.Llama.from_pretrained(
             repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
             filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+            chat_format="qwen",
         )
     )
 

diff --git a/tests/backends/test_xgrammar.py b/tests/backends/test_xgrammar.py
@@ -27,6 +27,7 @@ def model_llamacpp():
         llama_cpp.Llama.from_pretrained(
             repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
             filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+            chat_format="qwen",
         )
     )
 

diff --git a/tests/models/test_llamacpp.py b/tests/models/test_llamacpp.py
@@ -20,6 +20,7 @@ def test_load_model():
         Llama.from_pretrained(
             repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
             filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+            chat_format="qwen"
         )
     )
 
@@ -36,6 +37,7 @@ def model(tmp_path_factory):
         Llama.from_pretrained(
             repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
             filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+            chat_format="qwen",
         )
     )
 

diff --git a/tests/models/test_llamacpp_tokenizer.py b/tests/models/test_llamacpp_tokenizer.py
@@ -13,6 +13,7 @@ def model():
     model = llama_cpp.Llama.from_pretrained(
         repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
         filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+        chat_format="qwen",
     )
     setattr(
         model.tokenizer_,
@@ -27,6 +28,7 @@ def model_no_hf_tokenizer():
     model = llama_cpp.Llama.from_pretrained(
         repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
         filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+        chat_format="qwen",
     )
     del model.tokenizer_
     return model