Skip to content

Commit 7693a0f

Browse files
authored
Fix log system prompt (#907)
* restructuring: removing ModelInfo which is useless, to log instead the full ModelConfig object. Also moved some classes around as they were not in the right place * better management of pydantic to dict for cleaner save * fixed tests * fixed doc
1 parent d0cd4c9 commit 7693a0f

25 files changed

+584
-397
lines changed

docs/source/package_reference/models.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set in the `model-args` or in the model yaml file (see example
55
[here](https://github.com/huggingface/lighteval/blob/main/examples/model_configs/vllm_model_config.yaml)).
66

77
### Base model config
8-
[[autodoc]] models.utils.ModelConfig
8+
[[autodoc]] models.abstract_model.ModelConfig
99

1010
## Local Models
1111

examples/custom_models/google_translate_model.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from transformers import AutoTokenizer
3333

3434
from lighteval.data import GenerativeTaskDataset
35-
from lighteval.models.abstract_model import LightevalModel, ModelInfo
35+
from lighteval.models.abstract_model import LightevalModel
3636
from lighteval.models.model_output import (
3737
GenerativeResponse,
3838
LoglikelihoodResponse,
@@ -53,13 +53,7 @@ class GoogleTranslateClient(LightevalModel):
5353
def __init__(self, config) -> None:
5454
self.model = config.model_name
5555
self.model_definition_file_path = config.model_definition_file_path
56-
57-
self.model_info = ModelInfo(
58-
model_name=config.model_name,
59-
model_sha="",
60-
model_dtype=None,
61-
model_size=-1,
62-
)
56+
self.config = config
6357

6458
self._tokenizer = AutoTokenizer.from_pretrained("gpt2") # Use a dummy tokenizer for compatibility
6559

examples/custom_models/local_mt_model.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
)
3535

3636
from lighteval.data import GenerativeTaskDataset
37-
from lighteval.models.abstract_model import LightevalModel, ModelInfo, TokenSequence
37+
from lighteval.models.abstract_model import LightevalModel, TokenSequence
3838
from lighteval.models.model_output import (
3939
GenerativeResponse,
4040
LoglikelihoodResponse,
@@ -88,13 +88,7 @@ def __init__(self, config, env_config) -> None:
8888
self.model_definition_file_path = config.model_definition_file_path
8989
self.batch_size = 32
9090
self.device = "cuda" if torch.cuda.is_available() else "cpu"
91-
92-
self.model_info = ModelInfo(
93-
model_name=config.model,
94-
model_sha="",
95-
model_dtype=None,
96-
model_size=-1,
97-
)
91+
self.config = config
9892

9993
# Update model initialization to handle both models
10094
if "seamless-m4t" in config.model:

src/lighteval/logging/evaluation_tracker.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,7 @@ def __init__(
177177
@property
178178
def results(self):
179179
config_general = asdict(self.general_config_logger)
180-
# We remove the config from logging, which contains context/accelerator objects
181-
config_general.pop("config")
180+
config_general["model_config"] = config_general["model_config"].model_dump()
182181
results = {
183182
"config_general": config_general,
184183
"results": self.metrics_logger.metric_aggregated,
@@ -216,19 +215,7 @@ def save(self) -> None:
216215
logger.info("Saving experiment tracker")
217216
date_id = datetime.now().isoformat().replace(":", "-")
218217

219-
# We first prepare data to save
220-
config_general = asdict(self.general_config_logger)
221-
# We remove the config from logging, which contains context/accelerator objects
222-
config_general.pop("config")
223-
224-
results_dict = {
225-
"config_general": config_general,
226-
"results": self.metrics_logger.metric_aggregated,
227-
"versions": self.versions_logger.versions,
228-
"config_tasks": self.task_config_logger.tasks_configs,
229-
"summary_tasks": self.details_logger.compiled_details,
230-
"summary_general": asdict(self.details_logger.compiled_details_over_all_tasks),
231-
}
218+
results_dict = self.results
232219

233220
# Create the details datasets for later upload
234221
details_datasets: dict[str, Dataset] = {}

src/lighteval/logging/info_loggers.py

Lines changed: 7 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import xxhash
3232

3333
from lighteval.metrics.stderr import get_stderr_function
34-
from lighteval.models.abstract_model import ModelInfo
34+
from lighteval.models.abstract_model import ModelConfig
3535
from lighteval.models.model_output import ModelResponse
3636
from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig
3737
from lighteval.tasks.requests import Doc
@@ -42,7 +42,7 @@
4242

4343

4444
if is_nanotron_available():
45-
from nanotron.config import Config
45+
pass
4646

4747

4848
@dataclass(init=False)
@@ -64,11 +64,7 @@ class GeneralConfigLogger:
6464
start_time (float): Start time of the experiment. Logged at class init.
6565
end_time (float): End time of the experiment. Logged when calling [`GeneralConfigLogger.log_end_time`]
6666
total_evaluation_time_secondes (str): Inferred total evaluation time in seconds (from the start and end times).
67-
model_name (str): Name of the currently evaluated model.
68-
model_sha (str): Commit hash of the currently evaluated model on the hub if available.
69-
model_dtype (str): Dtype of the model weights, as obtained when loading the model config.
70-
model_size (str): Model size as obtained when loading the model config.
71-
67+
model_config (ModelConfig): Model configuration
7268
"""
7369

7470
# general
@@ -80,16 +76,8 @@ class GeneralConfigLogger:
8076
end_time: float = None
8177
total_evaluation_time_secondes: str = None
8278

83-
# model info
79+
model_config: ModelConfig = None
8480
model_name: str = None
85-
model_sha: str = None
86-
model_dtype: str = None
87-
model_size: str = None
88-
89-
generation_parameters: dict | None = None
90-
91-
# Nanotron config
92-
config: "Config" = None
9381

9482
def __init__(self) -> None:
9583
"""Stores the current lighteval commit for reproducibility, and starts the evaluation timer."""
@@ -106,7 +94,6 @@ def log_args_info(
10694
num_fewshot_seeds: int,
10795
max_samples: Union[None, int],
10896
job_id: str,
109-
config: "Config" = None,
11097
) -> None:
11198
"""
11299
Logs the information about the arguments passed to the method.
@@ -118,31 +105,21 @@ def log_args_info(
118105
Else, the batch size is automatically inferred depending on what fits in memory.
119106
max_samples (Union[None, int]): maximum number of samples, if None, use all the samples available.
120107
job_id (str): job ID, used to retrieve logs.
121-
config (optional): Nanotron Config
122-
123-
Returns:
124-
None
125-
126108
"""
127109
self.num_fewshot_seeds = num_fewshot_seeds
128110
self.max_samples = max_samples
129111
self.job_id = job_id
130-
self.config = config
131112

132-
def log_model_info(self, generation_parameters: dict, model_info: ModelInfo) -> None:
113+
def log_model_info(self, model_config: ModelConfig) -> None:
133114
"""
134115
Logs the model information.
135116
136117
Args:
137118
model_config: the model config used to initialize the model.
138-
model_info (ModelInfo): Model information to be logged.
139119
140120
"""
141-
self.generation_parameters = generation_parameters
142-
self.model_name = model_info.model_name
143-
self.model_sha = model_info.model_sha
144-
self.model_dtype = model_info.model_dtype
145-
self.model_size = model_info.model_size
121+
self.model_config = model_config
122+
self.model_name = model_config.model_name
146123

147124
def log_end_time(self) -> None:
148125
self.end_time = time.perf_counter()

src/lighteval/main_accelerate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,11 @@ def accelerate( # noqa C901
117117
import yaml
118118

119119
from lighteval.logging.evaluation_tracker import EvaluationTracker
120+
from lighteval.models.abstract_model import ModelConfig
120121
from lighteval.models.transformers.adapter_model import AdapterModelConfig
121122
from lighteval.models.transformers.delta_model import DeltaModelConfig
122123
from lighteval.models.transformers.transformers_model import TransformersModelConfig
123124
from lighteval.models.transformers.vlm_transformers_model import VLMTransformersModelConfig
124-
from lighteval.models.utils import ModelConfig
125125
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
126126

127127
evaluation_tracker = EvaluationTracker(

src/lighteval/main_baseline.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ def baseline(
6262
This baseline computation may not be suitable for all task types and should be used with caution.
6363
"""
6464
from lighteval.logging.evaluation_tracker import EvaluationTracker
65-
from lighteval.models.abstract_model import ModelInfo
6665
from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig
6766
from lighteval.tasks.registry import Registry
6867
from lighteval.tasks.requests import SamplingMethod
@@ -82,13 +81,7 @@ def baseline(
8281
hub_results_org=None,
8382
)
8483
evaluation_tracker.general_config_logger.log_model_info(
85-
{},
86-
ModelInfo(
87-
model_name="lighteval/baseline",
88-
model_sha=None,
89-
model_dtype=None,
90-
model_size=None,
91-
),
84+
model_config=None,
9285
)
9386
evaluation_tracker.task_config_logger.log(tasks_dict)
9487

src/lighteval/models/abstract_model.py

Lines changed: 114 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,133 @@
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
2222

23+
import json
24+
import re
2325
from abc import ABC, abstractmethod
24-
from dataclasses import dataclass
2526
from typing import Optional, Union
2627

2728
import torch
29+
import yaml
30+
from pydantic import BaseModel
2831
from transformers.tokenization_utils_base import BatchEncoding, PreTrainedTokenizerBase
2932

33+
from lighteval.models.model_input import GenerationParameters
3034
from lighteval.models.model_output import ModelResponse
3135
from lighteval.tasks.requests import Doc
3236

3337

3438
TokenSequence = Union[list[int], torch.LongTensor, torch.Tensor, BatchEncoding]
3539

3640

37-
@dataclass
38-
class ModelInfo:
39-
model_name: str
40-
model_sha: str | None = None
41-
model_dtype: str | None = None
42-
model_size: int | None = None
41+
class ModelConfig(BaseModel, extra="forbid"):
42+
"""
43+
Base configuration class for all model types in Lighteval.
44+
45+
This is the foundation class that all specific model configurations inherit from.
46+
It provides common functionality for parsing configuration from files and command-line arguments,
47+
as well as shared attributes that are used by all models like generation parameters and system prompts.
48+
49+
Attributes:
50+
generation_parameters (GenerationParameters):
51+
Configuration parameters that control text generation behavior, including
52+
temperature, top_p, max_new_tokens, etc. Defaults to empty GenerationParameters.
53+
system_prompt (str | None):
54+
Optional system prompt to be used with chat models. This prompt sets the
55+
behavior and context for the model during evaluation.
56+
57+
Methods:
58+
from_path(path: str):
59+
Load configuration from a YAML file.
60+
from_args(args: str):
61+
Parse configuration from a command-line argument string.
62+
_parse_args(args: str):
63+
Static method to parse argument strings into configuration dictionaries.
64+
65+
Example:
66+
```python
67+
# Load from YAML file
68+
config = ModelConfig.from_path("model_config.yaml")
69+
70+
# Load from command line arguments
71+
config = ModelConfig.from_args("model_name=meta-llama/Llama-3.1-8B-Instruct,system_prompt='You are a helpful assistant.',generation_parameters={temperature=0.7}")
72+
73+
# Direct instantiation
74+
config = ModelConfig(
75+
model_name="meta-llama/Llama-3.1-8B-Instruct",
76+
generation_parameters=GenerationParameters(temperature=0.7),
77+
system_prompt="You are a helpful assistant."
78+
)
79+
```
80+
"""
81+
82+
generation_parameters: GenerationParameters = GenerationParameters()
83+
system_prompt: str | None = None
84+
85+
@classmethod
86+
def from_path(cls, path: str):
87+
with open(path, "r") as f:
88+
config = yaml.safe_load(f)
89+
90+
return cls(**config["model_parameters"])
91+
92+
@classmethod
93+
def from_args(cls, args: str):
94+
config = cls._parse_args(args)
95+
return cls(**config)
96+
97+
@staticmethod
98+
def _parse_args(args: str) -> dict:
99+
"""Parse a string of arguments into a configuration dictionary.
100+
101+
This function parses a string containing model arguments and generation parameters
102+
into a structured dictionary with two main sections: 'model' and 'generation'.
103+
It specifically handles generation parameters enclosed in curly braces.
104+
105+
Args:
106+
args (str): A string containing comma-separated key-value pairs, where generation
107+
parameters can be specified in a nested JSON-like format.
108+
109+
Returns:
110+
dict: A dictionary with two keys:
111+
- 'model': Contains general model configuration parameters
112+
- 'generation': Contains generation-specific parameters
113+
114+
Examples:
115+
>>> parse_args("model_name=gpt2,max_length=100")
116+
{
117+
'model': {'model_name': 'gpt2', 'max_length': '100'},
118+
}
119+
120+
>>> parse_args("model_name=gpt2,generation_parameters={temperature:0.7,top_p:0.9}")
121+
{
122+
'model': {'model_name': 'gpt2', 'generation_parameters': {'temperature': 0.7, 'top_p': 0.9},
123+
}
124+
125+
>>> parse_args("model_name=gpt2,use_cache,generation_parameters={temperature:0.7}")
126+
{
127+
'model': {'model_name': 'gpt2', 'use_cache': True, 'generation_parameters': {'temperature': 0.7}},
128+
}
129+
"""
130+
# Looking for generation_parameters in the model_args
131+
generation_parameters_dict = None
132+
pattern = re.compile(r"(\w+)=(\{.*\}|[^,]+)")
133+
matches = pattern.findall(args)
134+
for key, value in matches:
135+
key = key.strip()
136+
if key == "generation_parameters":
137+
# Keys must be quoted (since they are strings)
138+
gen_params = re.sub(r"(\w+):", r'"\1":', value)
139+
# for k, v where v are strings, we quote them too
140+
gen_params = re.sub(r":\s*([A-Za-z_][\w.-]*)\s*(?=[,}])", r':"\1"', gen_params)
141+
generation_parameters_dict = json.loads(gen_params)
142+
143+
args = re.sub(r"generation_parameters=\{.*?\},?", "", args).strip(",")
144+
model_config = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in args.split(",")}
145+
146+
if generation_parameters_dict is not None:
147+
model_config["generation_parameters"] = generation_parameters_dict
148+
149+
return model_config
43150

44151

45152
class LightevalModel(ABC):

src/lighteval/models/custom/custom_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
2222

23-
from lighteval.models.utils import ModelConfig
23+
from lighteval.models.abstract_model import ModelConfig
2424

2525

2626
class CustomModelConfig(ModelConfig):

0 commit comments

Comments
 (0)