huggingface
diff --git a/‎docs/source/package_reference/models.mdx
Lines changed: 1 addition & 1 deletion b/‎docs/source/package_reference/models.mdx
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/custom_models/google_translate_model.py
Lines changed: 2 additions & 8 deletions b/‎examples/custom_models/google_translate_model.py
Lines changed: 2 additions & 8 deletions
diff --git a/‎examples/custom_models/local_mt_model.py
Lines changed: 2 additions & 8 deletions b/‎examples/custom_models/local_mt_model.py
Lines changed: 2 additions & 8 deletions
diff --git a/‎src/lighteval/logging/evaluation_tracker.py
Lines changed: 2 additions & 15 deletions b/‎src/lighteval/logging/evaluation_tracker.py
Lines changed: 2 additions & 15 deletions
diff --git a/‎src/lighteval/logging/info_loggers.py
Lines changed: 7 additions & 30 deletions b/‎src/lighteval/logging/info_loggers.py
Lines changed: 7 additions & 30 deletions
diff --git a/‎src/lighteval/main_accelerate.py
Lines changed: 1 addition & 1 deletion b/‎src/lighteval/main_accelerate.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/lighteval/main_baseline.py
Lines changed: 1 addition & 8 deletions b/‎src/lighteval/main_baseline.py
Lines changed: 1 addition & 8 deletions
diff --git a/‎src/lighteval/models/abstract_model.py
Lines changed: 114 additions & 7 deletions b/‎src/lighteval/models/abstract_model.py
Lines changed: 114 additions & 7 deletions
diff --git a/‎src/lighteval/models/custom/custom_model.py
Lines changed: 1 addition & 1 deletion b/‎src/lighteval/models/custom/custom_model.py
Lines changed: 1 addition & 1 deletion
@@ -5,7 +5,7 @@ set in the `model-args` or in the model yaml file (see example
 [here](https://github.com/huggingface/lighteval/blob/main/examples/model_configs/vllm_model_config.yaml)).
 
 ### Base model config
-[[autodoc]] models.utils.ModelConfig
+[[autodoc]] models.abstract_model.ModelConfig
 
 ## Local Models
 
 
@@ -32,7 +32,7 @@
 from transformers import AutoTokenizer
 
 from lighteval.data import GenerativeTaskDataset
-from lighteval.models.abstract_model import LightevalModel, ModelInfo
+from lighteval.models.abstract_model import LightevalModel
 from lighteval.models.model_output import (
     GenerativeResponse,
     LoglikelihoodResponse,
@@ -53,13 +53,7 @@ class GoogleTranslateClient(LightevalModel):
     def __init__(self, config) -> None:
         self.model = config.model_name
         self.model_definition_file_path = config.model_definition_file_path
-
-        self.model_info = ModelInfo(
-            model_name=config.model_name,
-            model_sha="",
-            model_dtype=None,
-            model_size=-1,
-        )
+        self.config = config
 
         self._tokenizer = AutoTokenizer.from_pretrained("gpt2")  # Use a dummy tokenizer for compatibility
 
 
@@ -34,7 +34,7 @@
 )
 
 from lighteval.data import GenerativeTaskDataset
-from lighteval.models.abstract_model import LightevalModel, ModelInfo, TokenSequence
+from lighteval.models.abstract_model import LightevalModel, TokenSequence
 from lighteval.models.model_output import (
     GenerativeResponse,
     LoglikelihoodResponse,
@@ -88,13 +88,7 @@ def __init__(self, config, env_config) -> None:
         self.model_definition_file_path = config.model_definition_file_path
         self.batch_size = 32
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-
-        self.model_info = ModelInfo(
-            model_name=config.model,
-            model_sha="",
-            model_dtype=None,
-            model_size=-1,
-        )
+        self.config = config
 
         # Update model initialization to handle both models
         if "seamless-m4t" in config.model:
 
@@ -177,8 +177,7 @@ def __init__(
     @property
     def results(self):
         config_general = asdict(self.general_config_logger)
-        # We remove the config from logging, which contains context/accelerator objects
-        config_general.pop("config")
+        config_general["model_config"] = config_general["model_config"].model_dump()
         results = {
             "config_general": config_general,
             "results": self.metrics_logger.metric_aggregated,
@@ -216,19 +215,7 @@ def save(self) -> None:
         logger.info("Saving experiment tracker")
         date_id = datetime.now().isoformat().replace(":", "-")
 
-        # We first prepare data to save
-        config_general = asdict(self.general_config_logger)
-        # We remove the config from logging, which contains context/accelerator objects
-        config_general.pop("config")
-
-        results_dict = {
-            "config_general": config_general,
-            "results": self.metrics_logger.metric_aggregated,
-            "versions": self.versions_logger.versions,
-            "config_tasks": self.task_config_logger.tasks_configs,
-            "summary_tasks": self.details_logger.compiled_details,
-            "summary_general": asdict(self.details_logger.compiled_details_over_all_tasks),
-        }
+        results_dict = self.results
 
         # Create the details datasets for later upload
         details_datasets: dict[str, Dataset] = {}
 
@@ -31,7 +31,7 @@
 import xxhash
 
 from lighteval.metrics.stderr import get_stderr_function
-from lighteval.models.abstract_model import ModelInfo
+from lighteval.models.abstract_model import ModelConfig
 from lighteval.models.model_output import ModelResponse
 from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig
 from lighteval.tasks.requests import Doc
@@ -42,7 +42,7 @@
 
 
 if is_nanotron_available():
-    from nanotron.config import Config
+    pass
 
 
 @dataclass(init=False)
@@ -64,11 +64,7 @@ class GeneralConfigLogger:
         start_time (float): Start time of the experiment. Logged at class init.
         end_time (float): End time of the experiment. Logged when calling [`GeneralConfigLogger.log_end_time`]
         total_evaluation_time_secondes (str): Inferred total evaluation time in seconds (from the start and end times).
-        model_name (str): Name of the currently evaluated model.
-        model_sha (str): Commit hash of the currently evaluated model on the hub if available.
-        model_dtype (str): Dtype of the model weights, as obtained when loading the model config.
-        model_size (str): Model size as obtained when loading the model config.
-
+        model_config (ModelConfig): Model configuration
     """
 
     # general
@@ -80,16 +76,8 @@ class GeneralConfigLogger:
     end_time: float = None
     total_evaluation_time_secondes: str = None
 
-    # model info
+    model_config: ModelConfig = None
     model_name: str = None
-    model_sha: str = None
-    model_dtype: str = None
-    model_size: str = None
-
-    generation_parameters: dict | None = None
-
-    # Nanotron config
-    config: "Config" = None
 
     def __init__(self) -> None:
         """Stores the current lighteval commit for reproducibility, and starts the evaluation timer."""
@@ -106,7 +94,6 @@ def log_args_info(
         num_fewshot_seeds: int,
         max_samples: Union[None, int],
         job_id: str,
-        config: "Config" = None,
     ) -> None:
         """
         Logs the information about the arguments passed to the method.
@@ -118,31 +105,21 @@ def log_args_info(
                 Else, the batch size is automatically inferred depending on what fits in memory.
             max_samples (Union[None, int]): maximum number of samples, if None, use all the samples available.
             job_id (str): job ID, used to retrieve logs.
-            config (optional): Nanotron Config
-
-        Returns:
-            None
-
         """
         self.num_fewshot_seeds = num_fewshot_seeds
         self.max_samples = max_samples
         self.job_id = job_id
-        self.config = config
 
-    def log_model_info(self, generation_parameters: dict, model_info: ModelInfo) -> None:
+    def log_model_info(self, model_config: ModelConfig) -> None:
         """
         Logs the model information.
 
         Args:
             model_config: the model config used to initialize the model.
-            model_info (ModelInfo): Model information to be logged.
 
         """
-        self.generation_parameters = generation_parameters
-        self.model_name = model_info.model_name
-        self.model_sha = model_info.model_sha
-        self.model_dtype = model_info.model_dtype
-        self.model_size = model_info.model_size
+        self.model_config = model_config
+        self.model_name = model_config.model_name
 
     def log_end_time(self) -> None:
         self.end_time = time.perf_counter()
 
@@ -117,11 +117,11 @@ def accelerate(  # noqa C901
     import yaml
 
     from lighteval.logging.evaluation_tracker import EvaluationTracker
+    from lighteval.models.abstract_model import ModelConfig
     from lighteval.models.transformers.adapter_model import AdapterModelConfig
     from lighteval.models.transformers.delta_model import DeltaModelConfig
     from lighteval.models.transformers.transformers_model import TransformersModelConfig
     from lighteval.models.transformers.vlm_transformers_model import VLMTransformersModelConfig
-    from lighteval.models.utils import ModelConfig
     from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
 
     evaluation_tracker = EvaluationTracker(
 
@@ -62,7 +62,6 @@ def baseline(
         This baseline computation may not be suitable for all task types and should be used with caution.
     """
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.abstract_model import ModelInfo
     from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig
     from lighteval.tasks.registry import Registry
     from lighteval.tasks.requests import SamplingMethod
@@ -82,13 +81,7 @@ def baseline(
         hub_results_org=None,
     )
     evaluation_tracker.general_config_logger.log_model_info(
-        {},
-        ModelInfo(
-            model_name="lighteval/baseline",
-            model_sha=None,
-            model_dtype=None,
-            model_size=None,
-        ),
+        model_config=None,
     )
     evaluation_tracker.task_config_logger.log(tasks_dict)
 
 
@@ -20,26 +20,133 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+import json
+import re
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import Optional, Union
 
 import torch
+import yaml
+from pydantic import BaseModel
 from transformers.tokenization_utils_base import BatchEncoding, PreTrainedTokenizerBase
 
+from lighteval.models.model_input import GenerationParameters
 from lighteval.models.model_output import ModelResponse
 from lighteval.tasks.requests import Doc
 
 
 TokenSequence = Union[list[int], torch.LongTensor, torch.Tensor, BatchEncoding]
 
 
-@dataclass
-class ModelInfo:
-    model_name: str
-    model_sha: str | None = None
-    model_dtype: str | None = None
-    model_size: int | None = None
+class ModelConfig(BaseModel, extra="forbid"):
+    """
+    Base configuration class for all model types in Lighteval.
+
+    This is the foundation class that all specific model configurations inherit from.
+    It provides common functionality for parsing configuration from files and command-line arguments,
+    as well as shared attributes that are used by all models like generation parameters and system prompts.
+
+    Attributes:
+        generation_parameters (GenerationParameters):
+            Configuration parameters that control text generation behavior, including
+            temperature, top_p, max_new_tokens, etc. Defaults to empty GenerationParameters.
+        system_prompt (str | None):
+            Optional system prompt to be used with chat models. This prompt sets the
+            behavior and context for the model during evaluation.
+
+    Methods:
+        from_path(path: str):
+            Load configuration from a YAML file.
+        from_args(args: str):
+            Parse configuration from a command-line argument string.
+        _parse_args(args: str):
+            Static method to parse argument strings into configuration dictionaries.
+
+    Example:
+        ```python
+        # Load from YAML file
+        config = ModelConfig.from_path("model_config.yaml")
+
+        # Load from command line arguments
+        config = ModelConfig.from_args("model_name=meta-llama/Llama-3.1-8B-Instruct,system_prompt='You are a helpful assistant.',generation_parameters={temperature=0.7}")
+
+        # Direct instantiation
+        config = ModelConfig(
+            model_name="meta-llama/Llama-3.1-8B-Instruct",
+            generation_parameters=GenerationParameters(temperature=0.7),
+            system_prompt="You are a helpful assistant."
+        )
+        ```
+    """
+
+    generation_parameters: GenerationParameters = GenerationParameters()
+    system_prompt: str | None = None
+
+    @classmethod
+    def from_path(cls, path: str):
+        with open(path, "r") as f:
+            config = yaml.safe_load(f)
+
+        return cls(**config["model_parameters"])
+
+    @classmethod
+    def from_args(cls, args: str):
+        config = cls._parse_args(args)
+        return cls(**config)
+
+    @staticmethod
+    def _parse_args(args: str) -> dict:
+        """Parse a string of arguments into a configuration dictionary.
+
+        This function parses a string containing model arguments and generation parameters
+        into a structured dictionary with two main sections: 'model' and 'generation'.
+        It specifically handles generation parameters enclosed in curly braces.
+
+        Args:
+            args (str): A string containing comma-separated key-value pairs, where generation
+                parameters can be specified in a nested JSON-like format.
+
+        Returns:
+            dict: A dictionary with two keys:
+                - 'model': Contains general model configuration parameters
+                - 'generation': Contains generation-specific parameters
+
+        Examples:
+            >>> parse_args("model_name=gpt2,max_length=100")
+            {
+                'model': {'model_name': 'gpt2', 'max_length': '100'},
+            }
+
+            >>> parse_args("model_name=gpt2,generation_parameters={temperature:0.7,top_p:0.9}")
+            {
+                'model': {'model_name': 'gpt2', 'generation_parameters': {'temperature': 0.7, 'top_p': 0.9},
+            }
+
+            >>> parse_args("model_name=gpt2,use_cache,generation_parameters={temperature:0.7}")
+            {
+                'model': {'model_name': 'gpt2', 'use_cache': True, 'generation_parameters': {'temperature': 0.7}},
+            }
+        """
+        # Looking for generation_parameters in the model_args
+        generation_parameters_dict = None
+        pattern = re.compile(r"(\w+)=(\{.*\}|[^,]+)")
+        matches = pattern.findall(args)
+        for key, value in matches:
+            key = key.strip()
+            if key == "generation_parameters":
+                # Keys must be quoted (since they are strings)
+                gen_params = re.sub(r"(\w+):", r'"\1":', value)
+                # for k, v where v are strings, we quote them too
+                gen_params = re.sub(r":\s*([A-Za-z_][\w.-]*)\s*(?=[,}])", r':"\1"', gen_params)
+                generation_parameters_dict = json.loads(gen_params)
+
+        args = re.sub(r"generation_parameters=\{.*?\},?", "", args).strip(",")
+        model_config = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in args.split(",")}
+
+        if generation_parameters_dict is not None:
+            model_config["generation_parameters"] = generation_parameters_dict
+
+        return model_config
 
 
 class LightevalModel(ABC):
 
@@ -20,7 +20,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-from lighteval.models.utils import ModelConfig
+from lighteval.models.abstract_model import ModelConfig
 
 
 class CustomModelConfig(ModelConfig):