Skip to content

Commit d0cd4c9

Browse files
authored
Moved some files (#905)
* fixed nanotron config file placed in a very random place * also put litellm model config file with other endpoint models * fix docs
1 parent ea1dd18 commit d0cd4c9

File tree

7 files changed

+92
-125
lines changed

7 files changed

+92
-125
lines changed

docs/source/package_reference/models.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ set in the `model-args` or in the model yaml file (see example
3737
[[autodoc]] models.endpoints.tgi_model.TGIModelConfig
3838

3939
### Litellm Model
40-
[[autodoc]] models.litellm_model.LiteLLMModelConfig
40+
[[autodoc]] models.endpoints.litellm_model.LiteLLMModelConfig
4141

4242
## Custom Model
4343
[[autodoc]] models.custom.custom_model.CustomModelConfig

src/lighteval/config/lighteval_config.py

Lines changed: 0 additions & 113 deletions
This file was deleted.

src/lighteval/main_endpoint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ def litellm(
377377
import yaml
378378

379379
from lighteval.logging.evaluation_tracker import EvaluationTracker
380-
from lighteval.models.litellm_model import LiteLLMModelConfig
380+
from lighteval.models.endpoints.litellm_model import LiteLLMModelConfig
381381
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
382382

383383
evaluation_tracker = EvaluationTracker(

src/lighteval/main_nanotron.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,18 +57,19 @@ def nanotron(
5757
"""
5858
Evaluate models using nanotron as backend.
5959
"""
60+
from lighteval.utils.imports import NO_NANOTRON_ERROR_MSG, is_nanotron_available
61+
62+
if not is_nanotron_available():
63+
raise ImportError(NO_NANOTRON_ERROR_MSG)
64+
6065
from nanotron.config import GeneralArgs, ModelArgs, TokenizerArgs, get_config_from_dict, get_config_from_file
6166

62-
from lighteval.config.lighteval_config import (
67+
from lighteval.logging.evaluation_tracker import EvaluationTracker
68+
from lighteval.models.nanotron import (
6369
FullNanotronConfig,
6470
LightEvalConfig,
6571
)
66-
from lighteval.logging.evaluation_tracker import EvaluationTracker
6772
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
68-
from lighteval.utils.imports import NO_NANOTRON_ERROR_MSG, is_nanotron_available
69-
70-
if not is_nanotron_available():
71-
raise ImportError(NO_NANOTRON_ERROR_MSG)
7273

7374
# Create nanotron config
7475
if not checkpoint_config_path.endswith(".yaml"):

src/lighteval/models/model_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
InferenceProvidersClient,
3636
InferenceProvidersModelConfig,
3737
)
38+
from lighteval.models.endpoints.litellm_model import LiteLLMClient, LiteLLMModelConfig
3839
from lighteval.models.endpoints.tgi_model import ModelClient, TGIModelConfig
39-
from lighteval.models.litellm_model import LiteLLMClient, LiteLLMModelConfig
4040
from lighteval.models.sglang.sglang_model import SGLangModel, SGLangModelConfig
4141
from lighteval.models.transformers.adapter_model import AdapterModel, AdapterModelConfig
4242
from lighteval.models.transformers.delta_model import DeltaModel, DeltaModelConfig

src/lighteval/models/nanotron/nanotron_model.py

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,23 @@
1919
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
22-
2322
# ruff: noqa: C901
2423
import logging
2524
import os
2625
import time
27-
from typing import List, Optional, Tuple, Type, Union
26+
from dataclasses import dataclass
27+
from typing import Dict, List, Optional, Tuple, Type, Union
2828

2929
import torch
3030
import torch.nn.functional as F
3131
import transformers
3232
from datasets.download.streaming_download_manager import xPath
33+
from pydantic import BaseModel
3334
from torch.utils.data import DataLoader
3435
from torch.utils.data.distributed import DistributedSampler
3536
from tqdm import tqdm
3637
from transformers import AutoTokenizer, BatchEncoding
3738

38-
from lighteval.config.lighteval_config import FullNanotronConfig
3939
from lighteval.data import (
4040
GenDistributedSampler,
4141
GenerativeTaskDatasetNanotron,
@@ -69,7 +69,10 @@
6969
if is_nanotron_available():
7070
from nanotron import distributed as dist
7171
from nanotron import logging
72+
from nanotron.config import GeneralArgs, ModelArgs, TokenizerArgs
73+
from nanotron.config.parallelism_config import ParallelismArgs
7274
from nanotron.generation.decode import decode_tokenized
75+
from nanotron.generation.sampler import SamplerType
7376
from nanotron.logging import human_format, log_rank
7477
from nanotron.models import build_model
7578
from nanotron.parallel.context import ParallelContext
@@ -83,6 +86,82 @@
8386

8487
logger = logging.get_logger(__name__)
8588

89+
DEFAULT_GENERATION_SEED = 42
90+
91+
92+
class GenerationArgs(BaseModel):
93+
sampler: Optional["SamplerType"] = None
94+
temperature: Optional[float] = None
95+
top_k: Optional[int] = None
96+
top_p: Optional[float] = None
97+
n_samples: Optional[int] = None
98+
eos: Optional[str] = None
99+
seed: Optional[int] = None
100+
use_cache: Optional[bool] = False
101+
102+
def __post_init__(self):
103+
if self.seed is None:
104+
self.seed = DEFAULT_GENERATION_SEED
105+
106+
107+
@dataclass
108+
class LightEvalLoggingArgs:
109+
"""Arguments related to logging for LightEval"""
110+
111+
output_dir: str
112+
results_path_template: str | None = None
113+
save_details: bool = True
114+
push_to_hub: bool = False
115+
push_to_tensorboard: bool = False
116+
public_run: bool = False
117+
results_org: str | None = None
118+
tensorboard_metric_prefix: str = "eval"
119+
120+
121+
@dataclass
122+
class LightEvalTasksArgs:
123+
"""Arguments related to tasks for LightEval"""
124+
125+
tasks: str
126+
custom_tasks: Optional[str] = None
127+
max_samples: Optional[int] = None
128+
num_fewshot_seeds: Optional[int] = None
129+
130+
dataset_loading_processes: int = 8
131+
multichoice_continuations_start_space: Optional[bool] = None
132+
pairwise_tokenization: bool = False
133+
134+
135+
@dataclass
136+
class LightEvalConfig:
137+
"""Arguments related to running LightEval on checkpoints.
138+
139+
All is optional because you can also use this class to later supply arguments to override
140+
the saved config when running LightEval after training.
141+
"""
142+
143+
logging: LightEvalLoggingArgs
144+
tasks: LightEvalTasksArgs
145+
parallelism: "ParallelismArgs"
146+
batch_size: int = 0
147+
generation: Optional[Union[GenerationArgs, Dict[str, GenerationArgs]]] = None
148+
149+
150+
@dataclass
151+
class FullNanotronConfig:
152+
lighteval_config: LightEvalConfig
153+
nanotron_model: "ModelArgs"
154+
nanotron_tokenizer: "TokenizerArgs"
155+
nanotron_general: "GeneralArgs"
156+
157+
@property
158+
def generation_parameters(self):
159+
# Return the generation parameters from the lighteval config
160+
# or create default generation parameters if none are set
161+
if self.lighteval_config.generation:
162+
return self.lighteval_config.generation
163+
return GenerationArgs()
164+
86165

87166
class NanotronLightevalModel(LightevalModel):
88167
# Default max sequence length setting for when no `max_length` is provided

0 commit comments

Comments
 (0)