From 07720bc96430f5fd0b827f72d583b0cf4494a25e Mon Sep 17 00:00:00 2001 From: Shichao Song <60967965+Ki-Seki@users.noreply.github.com> Date: Wed, 3 Dec 2025 15:45:42 +0800 Subject: [PATCH] fix: correct chat format usage --- docs/guide/migration.md | 2 ++ tests/backends/test_llguidance.py | 1 + tests/backends/test_outlines_core.py | 1 + tests/backends/test_xgrammar.py | 1 + tests/models/test_llamacpp.py | 2 ++ tests/models/test_llamacpp_tokenizer.py | 2 ++ 6 files changed, 9 insertions(+) diff --git a/docs/guide/migration.md b/docs/guide/migration.md index 729cceac1..0d705ba14 100644 --- a/docs/guide/migration.md +++ b/docs/guide/migration.md @@ -62,6 +62,7 @@ from outlines import models model = models.llamacpp( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", + chat_format="qwen", ) ``` @@ -74,6 +75,7 @@ from outlines import from_llamacpp llamacpp_model = Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", + chat_format="qwen", ) model = from_llamacpp(llamacpp_model) ``` diff --git a/tests/backends/test_llguidance.py b/tests/backends/test_llguidance.py index fb5faea91..465f54871 100644 --- a/tests/backends/test_llguidance.py +++ b/tests/backends/test_llguidance.py @@ -31,6 +31,7 @@ def model_llamacpp(): llama_cpp.Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", + chat_format="qwen", ) ) diff --git a/tests/backends/test_outlines_core.py b/tests/backends/test_outlines_core.py index ce3242457..9dc3db04a 100644 --- a/tests/backends/test_outlines_core.py +++ b/tests/backends/test_outlines_core.py @@ -30,6 +30,7 @@ def model_llamacpp(): llama_cpp.Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", + chat_format="qwen", ) ) diff --git a/tests/backends/test_xgrammar.py b/tests/backends/test_xgrammar.py index e25f66508..855213990 100644 --- a/tests/backends/test_xgrammar.py +++ b/tests/backends/test_xgrammar.py @@ -27,6 +27,7 @@ def model_llamacpp(): llama_cpp.Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", + chat_format="qwen", ) ) diff --git a/tests/models/test_llamacpp.py b/tests/models/test_llamacpp.py index 9b881b7de..776da800b 100644 --- a/tests/models/test_llamacpp.py +++ b/tests/models/test_llamacpp.py @@ -20,6 +20,7 @@ def test_load_model(): Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", + chat_format="qwen" ) ) @@ -36,6 +37,7 @@ def model(tmp_path_factory): Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", + chat_format="qwen", ) ) diff --git a/tests/models/test_llamacpp_tokenizer.py b/tests/models/test_llamacpp_tokenizer.py index dbf2507f3..80b063464 100644 --- a/tests/models/test_llamacpp_tokenizer.py +++ b/tests/models/test_llamacpp_tokenizer.py @@ -13,6 +13,7 @@ def model(): model = llama_cpp.Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", + chat_format="qwen", ) setattr( model.tokenizer_, @@ -27,6 +28,7 @@ def model_no_hf_tokenizer(): model = llama_cpp.Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", + chat_format="qwen", ) del model.tokenizer_ return model