Adds the option to override the chat template (#914)

clefourrier · web-flow · commit 02bf1355fa9f · 2025-08-11T15:34:33.000+02:00
diff --git a/src/lighteval/models/sglang/sglang_model.py b/src/lighteval/models/sglang/sglang_model.py
@@ -95,6 +95,9 @@ class SGLangModelConfig(ModelConfig):
             Fraction of GPU memory to use for static allocation. Defaults to 0.8.
         chunked_prefill_size (PositiveInt):
             Size of chunks for prefill operations. Defaults to 4096.
+        override_chat_template (bool):
+            If True, we force the model to use a chat template. If alse, we prevent the model from using
+            a chat template. If None, we use the default (true if present in the tokenizer, false otherwise)
 
     Example:
         ```python
@@ -127,6 +130,7 @@ class SGLangModelConfig(ModelConfig):
     attention_backend: str | None = None
     mem_fraction_static: PositiveFloat = 0.8
     chunked_prefill_size: PositiveInt = 4096
+    override_chat_template: bool = None
 
 
 class SGLangModel(LightevalModel):
@@ -136,7 +140,9 @@ def __init__(
     ):
         """Initializes an SGLang model."""
         self.config = config
-        self.use_chat_template = uses_chat_template(model_name=self.config.model_name)
+        self.use_chat_template = uses_chat_template(
+            model_name=self.config.model_name, override_chat_template=config.override_chat_template
+        )
         self.data_parallel_size = config.dp_size
         self.tensor_parallel_size = config.tp_size
         self._add_special_tokens = config.add_special_tokens
diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py
@@ -114,6 +114,9 @@ class TransformersModelConfig(ModelConfig):
             Whether to tokenize context and continuation separately or together. Defaults to False.
         continuous_batching (bool):
             Whether to use continuous batching for generation. Defaults to False.
+        override_chat_template (bool):
+            If True, we force the model to use a chat template. If alse, we prevent the model from using
+            a chat template. If None, we use the default (true if present in the tokenizer, false otherwise)
 
     Example:
         ```python
@@ -151,6 +154,7 @@ class TransformersModelConfig(ModelConfig):
     multichoice_continuations_start_space: bool | None = None
     pairwise_tokenization: bool = False
     continuous_batching: bool = False
+    override_chat_template: bool = None
 
     def model_post_init(self, __context):
         if self.multichoice_continuations_start_space is True:
@@ -201,7 +205,9 @@ def __init__(
         self.model_sha = config.get_model_sha()
         self._max_length = self._init_max_length()
         self._tokenizer = self._create_auto_tokenizer()
-        self.use_chat_template = uses_chat_template(tokenizer=self._tokenizer)
+        self.use_chat_template = uses_chat_template(
+            tokenizer=self._tokenizer, override_chat_template=config.override_chat_template
+        )
         self.model = self._create_auto_model()
 
         # We are in DP (and launch the script with `accelerate launch`)
@@ -285,7 +291,9 @@ def from_model(
         else:
             self._device = self.config.device
 
-        self.use_chat_template = uses_chat_template(self._tokenizer)
+        self.use_chat_template = uses_chat_template(
+            tokenizer=self._tokenizer, override_chat_template=config.override_chat_template
+        )
         self._add_special_tokens = add_special_tokens if add_special_tokens is not None else False
         self.skip_special_tokens = skip_special_tokens if skip_special_tokens is not None else True
         self.pairwise_tokenization = pairwise_tokenization
diff --git a/src/lighteval/models/utils.py b/src/lighteval/models/utils.py
@@ -109,7 +109,9 @@ def batched(iterable, n):
         yield batch
 
 
-def uses_chat_template(model_name: str = None, tokenizer: AutoTokenizer = None) -> bool:
+def uses_chat_template(
+    model_name: str = None, tokenizer: AutoTokenizer = None, override_chat_template: bool = None
+) -> bool:
     """Returns a boolean depending on whether the Transformers AutoTokenizer contains
     a chat template or not
 
@@ -119,6 +121,8 @@ def uses_chat_template(model_name: str = None, tokenizer: AutoTokenizer = None)
     Returns:
         bool: True if Tokenizer config contains a chat template, False otherwise
     """
+    if override_chat_template is not None:
+        return override_chat_template
     if model_name is None and tokenizer is None:
         raise Exception("`uses_chat_template` requires either a tokenizer or model name as input")
     try:
diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py
@@ -125,6 +125,9 @@ class VLLMModelConfig(ModelConfig):
             Subfolder within the model repository. Defaults to None.
         is_async (bool):
             Whether to use the async version of VLLM. Defaults to False.
+        override_chat_template (bool):
+            If True, we force the model to use a chat template. If alse, we prevent the model from using
+            a chat template. If None, we use the default (true if present in the tokenizer, false otherwise)
 
     Example:
         ```python
@@ -165,6 +168,7 @@ class VLLMModelConfig(ModelConfig):
     max_num_batched_tokens: PositiveInt = 2048  # maximum number of tokens per batch
     subfolder: str | None = None
     is_async: bool = False  # Whether to use the async version or sync version of the model
+    override_chat_template: bool = None
 
 
 class VLLMModel(LightevalModel):
@@ -174,7 +178,9 @@ def __init__(
     ):
         """Initializes a HuggingFace `AutoModel` and `AutoTokenizer` for evaluation."""
         self.config = config
-        self.use_chat_template = uses_chat_template(model_name=config.model_name)
+        self.use_chat_template = uses_chat_template(
+            model_name=config.model_name, override_chat_template=config.override_chat_template
+        )
         self.data_parallel_size = config.data_parallel_size
         self.tensor_parallel_size = config.tensor_parallel_size
         self._add_special_tokens = config.add_special_tokens if config.add_special_tokens is not None else False
diff --git a/tests/models/test_model_utils.py b/tests/models/test_model_utils.py
@@ -42,3 +42,19 @@ def test_uses_chat_template_with_no_chat_template(self):
 
         result = uses_chat_template(tokenizer=mock_tokenizer)
         self.assertFalse(result)
+
+    def test_uses_chat_template_with_chat_template_present_override(self):
+        """Test that uses_chat_template returns True when tokenizer has a chat template."""
+        mock_tokenizer = Mock()
+        mock_tokenizer.chat_template = "{% for message in messages %}..."
+
+        result = uses_chat_template(tokenizer=mock_tokenizer, override_chat_template=False)
+        self.assertFalse(result)
+
+    def test_uses_chat_template_with_no_chat_template_override(self):
+        """Test that uses_chat_template returns False when tokenizer has no chat template."""
+        mock_tokenizer = Mock()
+        mock_tokenizer.chat_template = None
+
+        result = uses_chat_template(tokenizer=mock_tokenizer, override_chat_template=True)
+        self.assertTrue(result)