some missing

qgallouedec · qgallouedec · commit cfcec4af869c · 2025-10-06T20:10:00.000Z
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -315,24 +315,6 @@ def replicate_str(string: str, n: int, sep: str = " ") -> str:
 * **Definite Articles:** Removed definite articles where possible to streamline language. (Eg: Changed "The string to replicate" to "String to replicate")
 * **Type Annotations:**
   * Always include type definitions, indicating if a parameter is optional and specifying the default value.
-  * Note that `Optional` means that the value can be `None`, and `*optional*` means that it is not required for the user to pass a value.
-    E.g., for arguments that can't be `None` and aren't required:
-
-    ```python
-    foo (`int`, *optional*, defaults to `4`):
-    ```
-
-    For arguments that can be `None` and are required:
-
-    ```python
-    foo (`Optional[int]`):
-    ```
-
-    for arguments that can be `None` and aren't required:
-
-    ```python
-    foo (`Optional[int]`, *optional*):
-    ```
 
 * **String Defaults:**
   * Ensured that default string values are wrapped in double quotes:
diff --git a/docs/source/lora_without_regret.md b/docs/source/lora_without_regret.md
@@ -143,7 +143,7 @@ For reinforcement learning, the blog uses a math reasoning task that we can repr
 ```python
 def strip_reasoning_accuracy_reward(
     completions: list[list[dict[str, str]]], solution: list[str], **kwargs
-) -> list[Optional[float]]:
+) -> list[float | None]:
     """Reward function that strips reasoning tags and checks mathematical accuracy.
 
     This function:
diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py
@@ -116,7 +116,6 @@ def test_basic(self):
         trainer.add_callback(win_rate_callback)
         trainer.train()
         winrate_history = [h for h in trainer.state.log_history if "eval_win_rate" in h]
-
         for history_row, expected_row in zip(winrate_history, self.expected_winrates, strict=True):
             assert all(key in history_row and history_row[key] == expected_row[key] for key in expected_row)
 
diff --git a/trl/data_utils.py b/trl/data_utils.py
@@ -246,7 +246,7 @@ def maybe_apply_chat_template(
             messages, where each message is a dictionary with keys `"role"` and `"content"`.
         tokenizer (`PreTrainedTokenizerBase`):
             Tokenizer to apply the chat template with.
-        tools (`list[Union[dict, Callable]]`, *optional*):
+        tools (`list[dict | Callable]`, *optional*):
             A list of tools (callable functions) that will be accessible to the model. If the template does not support
             function calling, this argument will have no effect.
         **template_kwargs (`Any`, *optional*):
diff --git a/trl/models/modeling_base.py b/trl/models/modeling_base.py
@@ -391,7 +391,7 @@ def _get_current_device(cls):
         object to handle corner cases when running scripts in distributed environments.
 
         Returns:
-            current_device (`Union[int, str]`):
+            current_device (`int | str`):
                 The current device.
         """
         state = PartialState()
diff --git a/trl/models/utils.py b/trl/models/utils.py
@@ -18,7 +18,7 @@
 from contextlib import contextmanager
 from copy import deepcopy
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Literal, Optional, Union
+from typing import TYPE_CHECKING, Any, Literal
 
 import torch
 import torch.nn as nn
@@ -104,7 +104,7 @@ def setup_chat_format(
     Args:
         model (`~transformers.PreTrainedModel`): The model to be modified.
         tokenizer (`~transformers.PreTrainedTokenizer`): The tokenizer to be modified.
-        format (`Optional[Literal["chatml"]]`): The format to be set. Defaults to "chatml".
+        format (`Literal["chatml"] | None`): The format to be set. Defaults to "chatml".
         resize_to_multiple_of (`int` or `None`): Number to resize the embedding layer to. Defaults to None.
 
     Returns:
@@ -306,15 +306,15 @@ def add_hooks(model: "DeepSpeedEngine") -> None:
 
 @contextmanager
 def unwrap_model_for_generation(
-    model: Union["DistributedDataParallel", "DeepSpeedEngine"],
+    model: "DistributedDataParallel | DeepSpeedEngine",
     accelerator: "Accelerator",
     gather_deepspeed3_params: bool = True,
 ):
     """
     Context manager to unwrap distributed or accelerated models for generation tasks.
 
     Args:
-        model (`Union[DistributedDataParallel, DeepSpeedEngine]`):
+        model (`DistributedDataParallel | DeepSpeedEngine`):
             Model to be unwrapped.
         accelerator (`~accelerate.Accelerator`):
             Accelerator instance managing the model.
@@ -511,7 +511,7 @@ def peft_module_casting_to_bf16(model):
 
 
 def prepare_peft_model(
-    model: PreTrainedModel, peft_config: Optional["PeftConfig"], args: TrainingArguments
+    model: PreTrainedModel, peft_config: "PeftConfig | None", args: TrainingArguments
 ) -> PreTrainedModel:
     """Prepares a model for PEFT training."""
     if not is_peft_available():
diff --git a/trl/scripts/utils.py b/trl/scripts/utils.py
@@ -249,7 +249,7 @@ class TrlParser(HfArgumentParser):
     configurations, while also supporting configuration file loading and environment variable management.
 
     Args:
-        dataclass_types (`Union[DataClassType, Iterable[DataClassType]]`, *optional*):
+        dataclass_types (`DataClassType | Iterable[DataClassType]`, *optional*):
             Dataclass types to use for argument parsing.
         **kwargs:
             Additional keyword arguments passed to the [`transformers.HfArgumentParser`] constructor.
diff --git a/trl/trainer/bco_trainer.py b/trl/trainer/bco_trainer.py
@@ -21,7 +21,7 @@
 from contextlib import contextmanager, nullcontext
 from operator import itemgetter
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal, Optional
+from typing import TYPE_CHECKING, Any, Literal
 
 import numpy as np
 import pandas as pd
@@ -90,7 +90,7 @@
 def _tokenize(
     batch: dict[str, list[Any]],
     tokenizer: "PreTrainedTokenizer",
-    embedding_tokenizer: Optional["PreTrainedTokenizer"] = None,
+    embedding_tokenizer: "PreTrainedTokenizer | None" = None,
 ) -> dict[str, list[Any]]:
     """Tokenize a batch from a BCO specific dataset."""
     prompt_tokenized = tokenizer(batch["prompt"], add_special_tokens=False)
diff --git a/trl/trainer/callbacks.py b/trl/trainer/callbacks.py
@@ -14,7 +14,6 @@
 
 import logging
 import os
-from typing import Optional
 
 import pandas as pd
 import torch
@@ -567,7 +566,7 @@ def accuracy_scorer(prompt: str, completion: str) -> float:
         scorers (`dict[str, Callable]`, *optional*):
             Dictionary mapping scorer names to scorer functions. If `None`, operates in tracing mode (predictions
             only). If provided, operates in evaluation mode (predictions + scores + summary). Scorer functions should
-            have signature: `scorer(prompt: str, completion: str) -> Union[float, int]`
+            have signature: `scorer(prompt: str, completion: str) -> float | int`
         generation_config (`GenerationConfig`, *optional*):
             Generation config to use for generating completions.
         num_prompts (`int` or `None`, *optional*):
@@ -771,7 +770,7 @@ class MergeModelCallback(TrainerCallback):
 
     def __init__(
         self,
-        merge_config: Optional["MergeConfig"] = None,
+        merge_config: "MergeConfig | None" = None,
         merge_at_every_checkpoint: bool = False,
         push_to_hub: bool = False,
     ):
diff --git a/trl/trainer/dpo_config.py b/trl/trainer/dpo_config.py
@@ -123,7 +123,7 @@ class DPOConfig(TrainingArguments):
             Batch size to use when precomputing reference model log probabilities. This can be set higher than the
             training batch size to speed up preprocessing. If `None`, defaults to `per_device_train_batch_size` for
             training and `per_device_eval_batch_size` for evaluation.
-        tools (`Optional[list[Union[dict, Callable]]]`, *optional*):
+        tools (`list[dict] | None`, *optional*):
             List of tools (callable functions) that will be accessible to the model. If the template does not support
             function calling, this argument will have no effect.
 
diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py
@@ -21,7 +21,7 @@
 from contextlib import contextmanager, nullcontext
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal
 
 import pandas as pd
 import torch
@@ -189,7 +189,7 @@ class DPOTrainer(BaseTrainer):
     This class is a wrapper around the [`transformers.Trainer`] class and inherits all of its attributes and methods.
 
     Args:
-        model (`Union[str, PreTrainedModel]`):
+        model (`str | PreTrainedModel`):
             Model to be trained. Can be either:
 
             - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a
@@ -214,7 +214,7 @@ class DPOTrainer(BaseTrainer):
             - [Standard](dataset_formats#standard): Each sample contains plain text.
             - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role
               and content).
-        eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
+        eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Dataset | IterableDataset]`):
             Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
         processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
             Processing class used to process the data. If `None`, the processing class is loaded from the model's name
@@ -282,7 +282,7 @@ def __init__(
         optimizers: tuple[torch.optim.Optimizer | None, torch.optim.lr_scheduler.LambdaLR | None] = (None, None),
         optimizer_cls_and_kwargs: tuple[type[torch.optim.Optimizer], dict[str, Any]] | None = None,
         preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None,
-        peft_config: Optional["PeftConfig"] = None,
+        peft_config: "PeftConfig | None" = None,
     ):
         # Args
         if args is None:
@@ -944,7 +944,7 @@ def concatenated_inputs(
         completion sequences.
 
         Args:
-            batch (`dict[str, Union[list, torch.LongTensor]]`):
+            batch (`dict[str, list | torch.LongTensor]`):
                 A batch of input data. The batch must contain the following keys:
 
                 - `"prompt_input_ids"`: Tensor of shape `(batch_size, prompt_length)` representing the prompt input
diff --git a/trl/trainer/gkd_trainer.py b/trl/trainer/gkd_trainer.py
@@ -15,7 +15,7 @@
 import random
 import textwrap
 from collections.abc import Callable
-from typing import Any, Optional
+from typing import Any
 
 import torch
 import torch.nn as nn
@@ -127,7 +127,7 @@ def __init__(
         callbacks: list[TrainerCallback] | None = None,
         optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
         preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None,
-        peft_config: Optional["PeftConfig"] = None,
+        peft_config: "PeftConfig | None" = None,
         formatting_func: Callable | None = None,
     ):
         # Ensure Trainer does not drop non-signature columns used by the collator (e.g., "prompts")
diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -21,7 +21,7 @@
 from contextlib import nullcontext
 from functools import partial
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any
 
 import datasets
 import torch
@@ -128,7 +128,7 @@ def reward_func(completions, **kwargs):
     ```
 
     Args:
-        model (`Union[str, PreTrainedModel]`):
+        model (`str | PreTrainedModel`):
             Model to be trained. Can be either:
 
             - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a
@@ -137,7 +137,7 @@ def reward_func(completions, **kwargs):
               using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in
               `args.model_init_kwargs`.
             - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
-        reward_funcs (`Union[RewardFunc, list[RewardFunc]]`):
+        reward_funcs (`RewardFunc | list[RewardFunc]`):
             Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward
             functions with the prompts and completions and sum the rewards. Can be either:
 
@@ -170,14 +170,14 @@ def reward_func(completions, **kwargs):
             - [Standard](dataset_formats#standard): Each sample contains plain text.
             - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role
               and content).
-        eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
+        eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Dataset | IterableDataset]`):
             Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
         processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`], *optional*):
             Processing class used to process the data. The padding side must be set to "left". If `None`, the
             processing class is loaded from the model's name with [`~transformers.AutoProcessor.from_pretrained`]. A
             padding token, `tokenizer.pad_token`, must be set. If the processing class has not set a padding token,
             `tokenizer.eos_token` will be used as the default.
-        reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*):
+        reward_processing_classes (`PreTrainedTokenizerBase | list[PreTrainedTokenizerBase]`, *optional*):
             Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either:
 
             - A single processing class: Used when `reward_funcs` contains only one reward function.
@@ -227,7 +227,7 @@ def __init__(
         reward_processing_classes: PreTrainedTokenizerBase | list[PreTrainedTokenizerBase] | None = None,
         callbacks: list[TrainerCallback] | None = None,
         optimizers: tuple[torch.optim.Optimizer | None, torch.optim.lr_scheduler.LambdaLR | None] = (None, None),
-        peft_config: Optional["PeftConfig"] = None,
+        peft_config: "PeftConfig | None" = None,
     ):
         # Args
         if args is None:
diff --git a/trl/trainer/judges.py b/trl/trainer/judges.py
@@ -239,7 +239,7 @@ def judge(
                 Temperature for scaling logits if `return_scores` is True.
 
         Returns:
-            `Union[list[int, float]]`:
+            `list[int | float]`:
                 If `return_scores` is `False`, returns a list of ranks (`0` or `1`) for each prompt, indicating which
                 completion is preferred. If `return_scores` is `True`, returns softmax probabilities for the first
                 completion.
diff --git a/trl/trainer/model_config.py b/trl/trainer/model_config.py
@@ -53,9 +53,9 @@ class ModelConfig:
             LoRA alpha.
         lora_dropout (`float`, *optional*, defaults to `0.05`):
             LoRA dropout.
-        lora_target_modules (`Union[str, list[str]]`, *optional*):
+        lora_target_modules (`str | list[str]`, *optional*):
             LoRA target modules.
-        lora_target_parameters (`Union[str, list[str]]`, *optional*):
+        lora_target_parameters (`str | list[str]`, *optional*):
             List of target parameters for LoRA.
         lora_modules_to_save (`list[str]`, *optional*):
             Model layers to unfreeze & train.
diff --git a/trl/trainer/online_dpo_trainer.py b/trl/trainer/online_dpo_trainer.py
@@ -20,7 +20,7 @@
 from contextlib import nullcontext
 from functools import wraps
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any
 
 import jinja2
 import torch
@@ -105,7 +105,7 @@ class OnlineDPOTrainer(BaseTrainer):
     Initialize OnlineDPOTrainer.
 
     Args:
-        model (`Union[str, nn.Module, PreTrainedModel]`):
+        model (`str | nn.Module | PreTrainedModel`):
             Model to be trained. Can be either:
 
             - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a
@@ -119,7 +119,7 @@ class OnlineDPOTrainer(BaseTrainer):
             model.
         judge (`BasePairwiseJudge`):
             The judge to use for pairwise comparison of model completions.
-        reward_funcs (`Union[RewardFunc, list[RewardFunc]]`, *optional*):
+        reward_funcs (`RewardFunc | list[RewardFunc]`, *optional*):
             Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward
             functions with the prompts and completions and sum the rewards. Can be either:
 
@@ -136,13 +136,13 @@ class OnlineDPOTrainer(BaseTrainer):
             sequences in the batch, given a dataset of paired sequences.
         train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]):
             The dataset to use for training.
-        eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
+        eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Dataset | IterableDataset]`):
             The dataset to use for evaluation.
         processing_class ([`~transformers.PreTrainedTokenizerBase`] or [`~transformers.ProcessorMixin`], *optional*):
             Processing class used to process the data. If provided, will be used to automatically process the inputs
             for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
             reuse the fine-tuned model.
-        reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*):
+        reward_processing_classes (`PreTrainedTokenizerBase | list[PreTrainedTokenizerBase]`, *optional*):
             Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either:
 
             - A single processing class: Used when `reward_funcs` contains only one reward function.
@@ -198,7 +198,7 @@ def __init__(
         eval_dataset: Dataset | IterableDataset | dict[str, Dataset | IterableDataset] | None = None,
         processing_class: PreTrainedTokenizerBase | ProcessorMixin | None = None,
         reward_processing_classes: PreTrainedTokenizerBase | list[PreTrainedTokenizerBase] | None = None,
-        peft_config: Optional["PeftConfig"] = None,
+        peft_config: "PeftConfig | None" = None,
         compute_metrics: Callable[[EvalPrediction], dict] | None = None,
         callbacks: list[TrainerCallback] | None = None,
         optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
diff --git a/trl/trainer/ppo_trainer.py b/trl/trainer/ppo_trainer.py
@@ -20,7 +20,6 @@
 from collections import defaultdict
 from contextlib import contextmanager, nullcontext
 from pathlib import Path
-from typing import Optional
 
 import numpy as np
 import pandas as pd
@@ -157,7 +156,7 @@ def __init__(
         # less commonly used
         optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
         callbacks: list[TrainerCallback] | None = None,
-        peft_config: Optional["PeftConfig"] = None,
+        peft_config: "PeftConfig | None" = None,
     ) -> None:
         if ref_model is model:
             raise ValueError(
diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py
diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py
diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py