Skip to content

Commit 034c23b

Browse files
hynky1999anton-lNouamaneTaziHynek Kydlicek
authored
Nanotron, Multilingual tasks update + misc (#756)
## Nanotron - Logprobs working - Generation works with https://github.com/huggingface/nanotron/tree/nouamane/lighteval-fix, however sometimes there is issue with ordering. - Only relevant part of config are fetched to prevent some dataclass checks which are not needed in nanotron - Smol changes to configs ## Metrics - Probability metrics now works with Char normalization - Token normalization is fixed when used with transformers ## Multlilingual tasks - New squads + few new mcf benchmarks + smol fixes to existing tasks ## Misc - Qa template now only uses unique golds, which speed-upds probs calculation. --------- Co-authored-by: anton <[email protected]> Co-authored-by: nouamanetazi <[email protected]> Co-authored-by: Hynek Kydlicek <[email protected]>
1 parent 2651750 commit 034c23b

File tree

12 files changed

+421
-123
lines changed

12 files changed

+421
-123
lines changed

src/lighteval/config/lighteval_config.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,13 @@
2323
from dataclasses import dataclass
2424
from typing import Dict, Optional, Union
2525

26+
from pydantic import BaseModel
27+
2628
from lighteval.utils.imports import is_nanotron_available
2729

2830

2931
if is_nanotron_available():
30-
from nanotron.config import Config
32+
from nanotron.config import GeneralArgs, ModelArgs, TokenizerArgs
3133
from nanotron.config.parallelism_config import ParallelismArgs
3234
from nanotron.generation.sampler import SamplerType
3335
from nanotron.logging import get_logger
@@ -37,9 +39,8 @@
3739
DEFAULT_GENERATION_SEED = 42
3840

3941

40-
@dataclass
41-
class GenerationArgs:
42-
sampler: Optional[Union[str, "SamplerType"]] = None
42+
class GenerationArgs(BaseModel):
43+
sampler: Optional["SamplerType"] = None
4344
temperature: Optional[float] = None
4445
top_k: Optional[int] = None
4546
top_p: Optional[float] = None
@@ -49,8 +50,6 @@ class GenerationArgs:
4950
use_cache: Optional[bool] = False
5051

5152
def __post_init__(self):
52-
if isinstance(self.sampler, str):
53-
self.sampler = SamplerType[self.sampler.upper()]
5453
if self.seed is None:
5554
self.seed = DEFAULT_GENERATION_SEED
5655

@@ -101,4 +100,14 @@ class LightEvalConfig:
101100
@dataclass
102101
class FullNanotronConfig:
103102
lighteval_config: LightEvalConfig
104-
nanotron_config: "Config"
103+
nanotron_model: "ModelArgs"
104+
nanotron_tokenizer: "TokenizerArgs"
105+
nanotron_general: "GeneralArgs"
106+
107+
@property
108+
def generation_parameters(self):
109+
# Return the generation parameters from the lighteval config
110+
# or create default generation parameters if none are set
111+
if self.lighteval_config.generation:
112+
return self.lighteval_config.generation
113+
return GenerationArgs()

src/lighteval/main_nanotron.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@
2323
# flake8: noqa: C901
2424
import os
2525

26+
import yaml
2627
from typer import Option
2728
from typing_extensions import Annotated
29+
from yaml import SafeLoader
2830

2931

30-
CACHE_DIR: str = os.getenv("HF_HOME", "/scratch")
31-
3232
HELP_PANEL_NAME_1 = "Common Parameters"
3333
HELP_PANEL_NAME_2 = "Logging Parameters"
3434
HELP_PANEL_NAME_3 = "Debug Parameters"
@@ -43,41 +43,43 @@ def nanotron(
4343
str, Option(help="Path to the nanotron checkpoint YAML or python config file, potentially on s3.")
4444
],
4545
lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")],
46-
cache_dir: Annotated[str, Option(help="Cache directory for datasets and models.")] = CACHE_DIR,
4746
):
4847
"""
4948
Evaluate models using nanotron as backend.
5049
"""
51-
from nanotron.config import Config, get_config_from_file
50+
from nanotron.config import GeneralArgs, ModelArgs, TokenizerArgs, get_config_from_dict, get_config_from_file
5251

53-
from lighteval.config.lighteval_config import FullNanotronConfig, LightEvalConfig
52+
from lighteval.config.lighteval_config import (
53+
FullNanotronConfig,
54+
LightEvalConfig,
55+
)
5456
from lighteval.logging.evaluation_tracker import EvaluationTracker
55-
from lighteval.logging.hierarchical_logger import htrack_block
5657
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
5758
from lighteval.utils.imports import NO_NANOTRON_ERROR_MSG, is_nanotron_available
58-
from lighteval.utils.utils import EnvConfig
59-
60-
env_config = EnvConfig(token=os.getenv("HF_TOKEN"), cache_dir=cache_dir)
6159

6260
if not is_nanotron_available():
6361
raise ImportError(NO_NANOTRON_ERROR_MSG)
6462

65-
with htrack_block("Load nanotron config"):
66-
# Create nanotron config
67-
if not checkpoint_config_path.endswith(".yaml"):
68-
raise ValueError("The checkpoint path should point to a YAML file")
63+
# Create nanotron config
64+
if not checkpoint_config_path.endswith(".yaml"):
65+
raise ValueError("The checkpoint path should point to a YAML file")
66+
67+
with open(checkpoint_config_path) as f:
68+
nanotron_yaml = yaml.load(f, Loader=SafeLoader)
6969

70-
model_config = get_config_from_file(
71-
checkpoint_config_path,
72-
config_class=Config,
73-
model_config_class=None,
70+
model_config, tokenizer_config, general_config = [
71+
get_config_from_dict(
72+
nanotron_yaml[key],
73+
config_class=config_class,
7474
skip_unused_config_keys=True,
7575
skip_null_keys=True,
7676
)
77+
for key, config_class in [("model", ModelArgs), ("tokenizer", TokenizerArgs), ("general", GeneralArgs)]
78+
]
7779

78-
# We are getting an type error, because the get_config_from_file is not correctly typed,
79-
lighteval_config: LightEvalConfig = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) # type: ignore
80-
nanotron_config = FullNanotronConfig(lighteval_config, model_config)
80+
# Load lighteval config
81+
lighteval_config: LightEvalConfig = get_config_from_file(lighteval_config_path, config_class=LightEvalConfig) # type: ignore
82+
nanotron_config = FullNanotronConfig(lighteval_config, model_config, tokenizer_config, general_config)
8183

8284
evaluation_tracker = EvaluationTracker(
8385
output_dir=lighteval_config.logging.output_dir,
@@ -88,17 +90,15 @@ def nanotron(
8890
push_to_tensorboard=lighteval_config.logging.push_to_tensorboard,
8991
save_details=lighteval_config.logging.save_details,
9092
tensorboard_metric_prefix=lighteval_config.logging.tensorboard_metric_prefix,
91-
nanotron_run_info=nanotron_config.nanotron_config.general,
93+
nanotron_run_info=nanotron_config.nanotron_general,
9294
)
9395

9496
pipeline_parameters = PipelineParameters(
9597
launcher_type=ParallelismManager.NANOTRON,
96-
env_config=env_config,
9798
job_id=os.environ.get("SLURM_JOB_ID", 0),
9899
nanotron_checkpoint_path=checkpoint_config_path,
99100
dataset_loading_processes=lighteval_config.tasks.dataset_loading_processes,
100101
custom_tasks_directory=lighteval_config.tasks.custom_tasks,
101-
override_batch_size=lighteval_config.batch_size,
102102
num_fewshot_seeds=1,
103103
max_samples=lighteval_config.tasks.max_samples,
104104
use_chat_template=False,

src/lighteval/metrics/metrics_sample.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,8 @@ def __init__(
342342
def compute(
343343
self,
344344
logprobs: list[float],
345-
target_tokens: list[list[int]],
345+
target_tokens: list[list[int]] | None = None,
346+
reference_texts: list[str] | None = None,
346347
**kwargs,
347348
) -> float:
348349
"""Computes the log likelihood probability: chance of choosing the best choice.
@@ -352,8 +353,7 @@ def compute(
352353
choices_logprob (list[float]): Summed log-probabilities of all the possible choices for the model, ordered as the choices.
353354
unconditioned_logprob (list[float] | None): Unconditioned log-probabilities for PMI normalization, ordered as the choices.
354355
choices_tokens (list[list[int]] | None): Tokenized choices for token normalization, ordered as the choices.
355-
formatted_doc (Doc): Original document for the sample.
356-
Used to get the original choices' length for possible normalization
356+
reference_texts (list[str] | None): Reference texts for token normalization, ordered as the choices.
357357
358358
Returns:
359359
float: The probability of the best log-prob choice being a gold choice.
@@ -364,7 +364,7 @@ def compute(
364364
normalization=self.log_prob_normalization,
365365
choices_tokens=target_tokens,
366366
choices_logprob=logprobs,
367-
choices_text=None,
367+
choices_text=reference_texts,
368368
unconditioned_logprob=None,
369369
)
370370
if self.log_prob_normalization

0 commit comments

Comments
 (0)