diff --git a/src/lighteval/logging/info_loggers.py b/src/lighteval/logging/info_loggers.py index 446006aec..9da7c15e7 100644 --- a/src/lighteval/logging/info_loggers.py +++ b/src/lighteval/logging/info_loggers.py @@ -87,6 +87,7 @@ class GeneralConfigLogger: model_size: str = None generation_parameters: dict | None = None + chat_template_parameters: dict | None = None # Nanotron config config: "Config" = None @@ -129,7 +130,9 @@ def log_args_info( self.job_id = job_id self.config = config - def log_model_info(self, generation_parameters: dict, model_info: ModelInfo) -> None: + def log_model_info( + self, generation_parameters: dict, model_info: ModelInfo, chat_template_parameters: dict + ) -> None: """ Logs the model information. @@ -139,6 +142,7 @@ def log_model_info(self, generation_parameters: dict, model_info: ModelInfo) -> """ self.generation_parameters = generation_parameters + self.chat_template_parameters = chat_template_parameters self.model_name = model_info.model_name self.model_sha = model_info.model_sha self.model_dtype = model_info.model_dtype diff --git a/src/lighteval/main_baseline.py b/src/lighteval/main_baseline.py index 59a26c660..f49131bcb 100644 --- a/src/lighteval/main_baseline.py +++ b/src/lighteval/main_baseline.py @@ -89,6 +89,7 @@ def baseline( model_dtype=None, model_size=None, ), + {}, ) evaluation_tracker.task_config_logger.log(tasks_dict) diff --git a/src/lighteval/models/custom/custom_model.py b/src/lighteval/models/custom/custom_model.py index 480952255..6707c1593 100644 --- a/src/lighteval/models/custom/custom_model.py +++ b/src/lighteval/models/custom/custom_model.py @@ -70,5 +70,4 @@ def loglikelihood(self, docs: list[Doc]) -> list[ModelResponse]: An example of a custom model can be found in `examples/custom_models/google_translate_model.py`. """ - model_name: str model_definition_file_path: str diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 49d45e961..19ea689de 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -95,7 +95,6 @@ class ServerlessEndpointModelConfig(ModelConfig): ``` """ - model_name: str add_special_tokens: bool = True batch_size: int = 1 diff --git a/src/lighteval/models/litellm_model.py b/src/lighteval/models/litellm_model.py index e22998663..b99d5f393 100644 --- a/src/lighteval/models/litellm_model.py +++ b/src/lighteval/models/litellm_model.py @@ -94,7 +94,6 @@ class LiteLLMModelConfig(ModelConfig): ``` """ - model_name: str provider: str | None = None base_url: str | None = None api_key: str | None = None diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index 2d8a53fcb..e1ffa7d75 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -232,3 +232,18 @@ def to_sglang_dict(self) -> dict: "min_new_tokens": self.min_new_tokens, } return {k: v for k, v in args.items() if v is not None} + + +class ChatTemplateParameters(BaseModel): + reasoning_effort: str = None + + def to_transformers_dict(self) -> dict: + """Selects relevant chat template parameters for transformers models. + + Returns: + dict: Valid parameters for the chat template + """ + args = { + "reasoning_effort": self.reasoning_effort, + } + return {k: v for k, v in args.items() if v is not None} diff --git a/src/lighteval/models/sglang/sglang_model.py b/src/lighteval/models/sglang/sglang_model.py index 8519733e5..c9ecca80c 100644 --- a/src/lighteval/models/sglang/sglang_model.py +++ b/src/lighteval/models/sglang/sglang_model.py @@ -109,7 +109,6 @@ class SGLangModelConfig(ModelConfig): ``` """ - model_name: str load_format: str = "auto" dtype: str = "auto" tp_size: PositiveInt = 1 # how many GPUs to use for tensor parallelism diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index c7b225913..ebac11c50 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -133,7 +133,6 @@ class TransformersModelConfig(ModelConfig): (bitsandbytes for 4-bit/8-bit quantization). """ - model_name: str tokenizer: str | None = None subfolder: str | None = None revision: str = "main" @@ -230,7 +229,10 @@ def __init__( ) self.prompt_manager = PromptManager( - use_chat_template=self.use_chat_template, tokenizer=self.tokenizer, system_prompt=config.system_prompt + use_chat_template=self.use_chat_template, + tokenizer=self.tokenizer, + system_prompt=config.system_prompt, + chat_template_parameters=config.chat_template_parameters, ) def cleanup(self): diff --git a/src/lighteval/models/transformers/vlm_transformers_model.py b/src/lighteval/models/transformers/vlm_transformers_model.py index 46ca9be64..d11002456 100644 --- a/src/lighteval/models/transformers/vlm_transformers_model.py +++ b/src/lighteval/models/transformers/vlm_transformers_model.py @@ -104,7 +104,6 @@ class VLMTransformersModelConfig(ModelConfig): loading. """ - model_name: str processor: str | None = None use_fast_image_processor: bool | None = None subfolder: str | None = None diff --git a/src/lighteval/models/utils.py b/src/lighteval/models/utils.py index bb8bec163..9d6f18c43 100644 --- a/src/lighteval/models/utils.py +++ b/src/lighteval/models/utils.py @@ -34,7 +34,7 @@ from transformers import AutoTokenizer from transformers.models.auto.configuration_auto import AutoConfig -from lighteval.models.model_input import GenerationParameters +from lighteval.models.model_input import ChatTemplateParameters, GenerationParameters logger = logging.getLogger(__name__) @@ -70,7 +70,7 @@ class ModelConfig(BaseModel, extra="forbid"): config = ModelConfig.from_path("model_config.yaml") # Load from command line arguments - config = ModelConfig.from_args("model_name=meta-llama/Llama-3.1-8B-Instruct,system_prompt='You are a helpful assistant.',generation_parameters={temperature=0.7}") + config = ModelConfig.from_args("model_name=meta-llama/Llama-3.1-8B-Instruct,system_prompt='You are a helpful assistant.',generation_parameters={temperature:0.7}") # Direct instantiation config = ModelConfig( @@ -81,7 +81,9 @@ class ModelConfig(BaseModel, extra="forbid"): ``` """ + model_name: str generation_parameters: GenerationParameters = GenerationParameters() + chat_template_parameters: ChatTemplateParameters = ChatTemplateParameters() system_prompt: str | None = None @classmethod @@ -131,20 +133,31 @@ def _parse_args(args: str) -> dict: """ # Looking for generation_parameters in the model_args generation_parameters_dict = None - pattern = re.compile(r"(\w+)=(\{.*\}|[^,]+)") + chat_template_parameters_dict = None + pattern = re.compile(r"(\w+)\s*=\s*(\{[^{}]*\}|[^,]+?)(?=,|$)") matches = pattern.findall(args) for key, value in matches: key = key.strip() if key == "generation_parameters": gen_params = re.sub(r"(\w+):", r'"\1":', value) generation_parameters_dict = json.loads(gen_params) + if key == "chat_template_parameters": + # Chat template parameters have strings as values that also need to be quoted + chat_template_params = re.sub(r"(\w+)\s*:\s*([A-Za-z_][\w.-]*)\s*(?=[,}])", r'"\1":"\2"', value) + chat_template_parameters_dict = json.loads(chat_template_params) args = re.sub(r"generation_parameters=\{.*?\},?", "", args).strip(",") - model_config = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in args.split(",")} + args = re.sub(r"chat_template_parameters=\{.*?\},?", "", args).strip(",") + model_config = ( + {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in args.split(",")} if args else {} + ) if generation_parameters_dict is not None: model_config["generation_parameters"] = generation_parameters_dict + if chat_template_parameters_dict is not None: + model_config["chat_template_parameters"] = chat_template_parameters_dict + return model_config diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 5f7a7e9bc..70c720364 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -140,7 +140,6 @@ class VLLMModelConfig(ModelConfig): ``` """ - model_name: str revision: str = "main" # revision of the model dtype: str = "bfloat16" tensor_parallel_size: PositiveInt = 1 # how many GPUs to use for tensor parallelism diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 508a2a1c7..ad11329da 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -177,8 +177,11 @@ def __init__( self.model = self._init_model(model_config, model) generation_parameters = model_config.generation_parameters.model_dump() if model_config else {} + chat_template_parameters = model_config.chat_template_parameters.model_dump() if model_config else {} - self.evaluation_tracker.general_config_logger.log_model_info(generation_parameters, self.model.model_info) + self.evaluation_tracker.general_config_logger.log_model_info( + generation_parameters, self.model.model_info, chat_template_parameters + ) self._init_random_seeds() self._init_tasks_and_requests(tasks=tasks) diff --git a/src/lighteval/tasks/prompt_manager.py b/src/lighteval/tasks/prompt_manager.py index 6b7068bd8..ad2968280 100644 --- a/src/lighteval/tasks/prompt_manager.py +++ b/src/lighteval/tasks/prompt_manager.py @@ -28,6 +28,7 @@ from itertools import cycle from typing import TYPE_CHECKING +from lighteval.models.model_input import ChatTemplateParameters from lighteval.tasks.requests import Doc from lighteval.utils.utils import as_list @@ -40,10 +41,17 @@ class PromptManager: - def __init__(self, use_chat_template: bool = False, tokenizer=None, system_prompt: str | None = None): + def __init__( + self, + use_chat_template: bool = False, + tokenizer=None, + system_prompt: str | None = None, + chat_template_parameters: ChatTemplateParameters | None = None, + ): self.use_chat_template = use_chat_template self.tokenizer = tokenizer self.system_prompt = system_prompt # System prompt to be used in chat templates + self.chat_template_parameters = chat_template_parameters if chat_template_parameters else {} def prepare_prompt(self, doc: Doc) -> str: """Prepare a prompt from a document, either using chat template or plain text format.""" @@ -123,6 +131,7 @@ def _prepare_chat_template(self, doc: Doc, tokenize: bool = True) -> str: messages, tokenize=False, add_generation_prompt=True, + **self.chat_template_parameters.to_transformers_dict(), ) else: # for apis diff --git a/tests/test_prompt_manager_class.py b/tests/test_prompt_manager_class.py index f552a9c31..9fa21139a 100644 --- a/tests/test_prompt_manager_class.py +++ b/tests/test_prompt_manager_class.py @@ -24,6 +24,7 @@ import pytest +from lighteval.models.model_input import ChatTemplateParameters from lighteval.tasks.prompt_manager import PromptManager from lighteval.tasks.requests import Doc @@ -47,6 +48,22 @@ def test_init_with_chat_template(self): assert pm.tokenizer == tokenizer assert pm.system_prompt == system_prompt + def test_init_with_chat_template_and_chat_template_parameters(self): + """Test PromptManager initialization with chat template enabled and chat template parameters.""" + tokenizer = Mock() + system_prompt = "You are a helpful assistant." + pm = PromptManager( + use_chat_template=True, + tokenizer=tokenizer, + system_prompt=system_prompt, + chat_template_parameters=ChatTemplateParameters(reasoning_effort="medium"), + ) + assert pm.use_chat_template is True + assert pm.tokenizer == tokenizer + assert pm.system_prompt == system_prompt + assert pm.chat_template_parameters is not None + assert pm.chat_template_parameters.reasoning_effort == "medium" + def test_prepare_prompt_plain_text_basic(self): """Test prepare_prompt with plain text format and basic document.""" pm = PromptManager() diff --git a/tests/utils/test_model_config.py b/tests/utils/test_model_config.py new file mode 100644 index 000000000..f89b4d7d2 --- /dev/null +++ b/tests/utils/test_model_config.py @@ -0,0 +1,84 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +from lighteval.models.model_input import ChatTemplateParameters, GenerationParameters +from lighteval.models.utils import ModelConfig + + +class TestModelConfig(unittest.TestCase): + def test_model_config_init(self): + config = ModelConfig( + model_name="meta-llama/Llama-3.1-8B-Instruct", + generation_parameters=GenerationParameters(temperature=0.7), + system_prompt="You are a helpful assistant.", + chat_template_parameters=ChatTemplateParameters(reasoning_effort="low"), + ) + + self.assertEqual(config.model_name, "meta-llama/Llama-3.1-8B-Instruct") + self.assertEqual(config.generation_parameters.temperature, 0.7) + self.assertEqual(config.system_prompt, "You are a helpful assistant.") + self.assertEqual(config.chat_template_parameters.reasoning_effort, "low") + + def test_model_config_init_command_line(self): + config = ModelConfig.from_args( + 'model_name=meta-llama/Llama-3.1-8B-Instruct,system_prompt="You are a helpful assistant.",generation_parameters={temperature:0.7},chat_template_parameters={reasoning_effort:low}' + ) + + self.assertEqual(config.model_name, "meta-llama/Llama-3.1-8B-Instruct") + self.assertEqual(config.generation_parameters.temperature, 0.7) + self.assertEqual(config.system_prompt, '"You are a helpful assistant."') # is this what we want? + self.assertEqual(config.chat_template_parameters.reasoning_effort, "low") + + def test_model_config_generation_parameters_parse_single_int(self): + config = ModelConfig.from_args( + "model_name=meta-llama/Llama-3.1-8B-Instruct,generation_parameters={temperature:0.7}" + ) + self.assertEqual(config.generation_parameters.temperature, 0.7) + + def test_model_config_generation_parameters_parse_multiple_int(self): + config = ModelConfig.from_args( + "model_name=meta-llama/Llama-3.1-8B-Instruct,generation_parameters={temperature:0.7,top_k:42}" + ) + self.assertEqual(config.generation_parameters.temperature, 0.7) + self.assertEqual(config.generation_parameters.top_k, 42) + + @unittest.skip("This is not working at this time") + def test_model_config_generation_parameters_parse_string(self): + config = ModelConfig.from_args( + 'model_name=meta-llama/Llama-3.1-8B-Instruct,generation_parameters={response_format:{"type":"json_object"}}' + ) + self.assertEqual(config.generation_parameters.temperature, 0.7) + + @unittest.skip("This is not working at this time") + def test_model_config_chat_template_parameters_parse_single_int(self): + config = ModelConfig.from_args( + "model_name=meta-llama/Llama-3.1-8B-Instruct,chat_template_parameters={temperature:0.7}" + ) + self.assertEqual(config.chat_template_parameters.temperature, 0.7) + + def test_model_config_chat_template_parameters_parse_string(self): + config = ModelConfig.from_args( + "model_name=meta-llama/Llama-3.1-8B-Instruct,chat_template_parameters={reasoning_effort:low}" + ) + self.assertEqual(config.chat_template_parameters.reasoning_effort, "low")