fix docstring formatting

ClemensSchwarke · ClemensSchwarke · commit 7e3e65d2c643 · 2025-10-13T10:51:03.000+02:00
diff --git a/README.md b/README.md
@@ -57,8 +57,7 @@ For documentation, we adopt the [Google Style Guide](https://sphinxcontrib-napol
 We use the following tools for maintaining code quality:
 
 - [pre-commit](https://pre-commit.com/): Runs a list of formatters and linters over the codebase.
-- [black](https://black.readthedocs.io/en/stable/): The uncompromising code formatter.
-- [flake8](https://flake8.pycqa.org/en/latest/): A wrapper around PyFlakes, pycodestyle, and McCabe complexity checker.
+- [ruff](https://github.com/astral-sh/ruff): An extremely fast Python linter and code formatter, written in Rust.
 
 Please check [here](https://pre-commit.com/#install) for instructions to set these up. To run over the entire repository, please execute the following command in the terminal:
 
diff --git a/rsl_rl/env/vec_env.py b/rsl_rl/env/vec_env.py
@@ -13,9 +13,8 @@
 class VecEnv(ABC):
     """Abstract class for a vectorized environment.
 
-    The vectorized environment is a collection of environments that are synchronized. This means that
-    the same type of action is applied to all environments and the same type of observation is returned from all
-    environments.
+    The vectorized environment is a collection of environments that are synchronized. This means that the same type of
+    action is applied to all environments and the same type of observation is returned from all environments.
     """
 
     num_envs: int
@@ -41,16 +40,12 @@ class VecEnv(ABC):
     cfg: dict | object
     """Configuration object."""
 
-    """
-    Operations.
-    """
-
     @abstractmethod
     def get_observations(self) -> TensorDict:
         """Return the current observations.
 
         Returns:
-            observations: Observations from the environment.
+            The observations from the environment.
         """
         raise NotImplementedError
 
@@ -62,13 +57,12 @@ def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.T
             actions: Input actions to apply. Shape: (num_envs, num_actions)
 
         Returns:
-                observations: Observations from the environment.
-                rewards: Rewards from the environment. Shape: (num_envs,)
-                dones: Done flags from the environment. Shape: (num_envs,)
-                extras: Extra information from the environment.
+            observations: Observations from the environment.
+            rewards: Rewards from the environment. Shape: (num_envs,)
+            dones: Done flags from the environment. Shape: (num_envs,)
+            extras: Extra information from the environment.
 
         Observations:
-
             The observations TensorDict usually contains multiple observation groups. The `obs_groups`
             dictionary of the runner configuration specifies which observation groups are used for which
             purpose, i.e., it maps the available observation groups to observation sets. The observation sets
@@ -83,7 +77,6 @@ def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.T
             `rsl_rl/utils/utils.py`.
 
         Extras:
-
             The extras dictionary includes metrics such as the episode reward, episode length, etc. The following
             dictionary keys are used by rsl_rl:
 
diff --git a/rsl_rl/modules/actor_critic.py b/rsl_rl/modules/actor_critic.py
@@ -192,12 +192,12 @@ def load_state_dict(self, state_dict: dict, strict: bool = True) -> bool:
 
         Args:
             state_dict: State dictionary of the model.
-            strict: Whether to strictly enforce that the keys in state_dict match the keys returned by this
-                           module's state_dict() function.
+            strict: Whether to strictly enforce that the keys in `state_dict` match the keys returned by this module's
+                :meth:`state_dict` function.
 
         Returns:
-            bool: Whether this training resumes a previous training. This flag is used by the `load()` function of
-                  `OnPolicyRunner` to determine how to load further parameters (relevant for, e.g., distillation).
+            Whether this training resumes a previous training. This flag is used by the :func:`load` function of
+                :class:`OnPolicyRunner` to determine how to load further parameters (relevant for, e.g., distillation).
         """
         super().load_state_dict(state_dict, strict=strict)
         return True
diff --git a/rsl_rl/modules/actor_critic_recurrent.py b/rsl_rl/modules/actor_critic_recurrent.py
@@ -223,12 +223,12 @@ def load_state_dict(self, state_dict: dict, strict: bool = True) -> bool:
 
         Args:
             state_dict: State dictionary of the model.
-            strict: Whether to strictly enforce that the keys in state_dict match the keys returned by this
-                           module's state_dict() function.
+            strict: Whether to strictly enforce that the keys in `state_dict` match the keys returned by this module's
+                :meth:`state_dict` function.
 
         Returns:
-            bool: Whether this training resumes a previous training. This flag is used by the `load()` function of
-                  `OnPolicyRunner` to determine how to load further parameters (relevant for, e.g., distillation).
+            Whether this training resumes a previous training. This flag is used by the :func:`load` function of
+                :class:`OnPolicyRunner` to determine how to load further parameters (relevant for, e.g., distillation).
         """
         super().load_state_dict(state_dict, strict=strict)
         return True
diff --git a/rsl_rl/modules/rnd.py b/rsl_rl/modules/rnd.py
@@ -41,40 +41,37 @@ def __init__(
           layer.
         - If :attr:`reward_normalization` is True, then the intrinsic reward is normalized using an Empirical Discounted
           Variation Normalization layer.
-
-        .. note::
-            If the hidden dimensions are -1 in the predictor and target networks configuration, then the number of
-            states is used as the hidden dimension.
+        - If the hidden dimensions are -1 in the predictor and target networks configuration, then the number of states
+          is used as the hidden dimension.
 
         Args:
             num_states: Number of states/inputs to the predictor and target networks.
             obs_groups: Dictionary of observation groups.
             num_outputs: Number of outputs (embedding size) of the predictor and target networks.
             predictor_hidden_dims: List of hidden dimensions of the predictor network.
             target_hidden_dims: List of hidden dimensions of the target network.
-            activation: Activation function. Defaults to "elu".
-            weight: Scaling factor of the intrinsic reward. Defaults to 0.0.
-            state_normalization: Whether to normalize the input state. Defaults to False.
-            reward_normalization: Whether to normalize the intrinsic reward. Defaults to False.
-            device: Device to use. Defaults to "cpu".
-            weight_schedule: The type of schedule to use for the RND weight parameter.
-                Defaults to None, in which case the weight parameter is constant.
+            activation: Activation function.
+            weight: Scaling factor of the intrinsic reward.
+            state_normalization: Whether to normalize the input state.
+            reward_normalization: Whether to normalize the intrinsic reward.
+            device: Device to use.
+            weight_schedule: Type of schedule to use for the RND weight parameter.
                 It is a dictionary with the following keys:
 
-                - "mode": The type of schedule to use for the RND weight parameter.
+                - "mode": Type of schedule to use for the RND weight parameter.
                     - "constant": Constant weight schedule.
                     - "step": Step weight schedule.
                     - "linear": Linear weight schedule.
 
                 For the "step" weight schedule, the following parameters are required:
 
-                - "final_step": The step at which the weight parameter is set to the final value.
-                - "final_value": The final value of the weight parameter.
+                - "final_step": Step at which the weight parameter is set to the final value.
+                - "final_value": Final value of the weight parameter.
 
                 For the "linear" weight schedule, the following parameters are required:
-                - "initial_step": The step at which the weight parameter is set to the initial value.
-                - "final_step": The step at which the weight parameter is set to the final value.
-                - "final_value": The final value of the weight parameter.
+                - "initial_step": Step at which the weight parameter is set to the initial value.
+                - "final_step": Step at which the weight parameter is set to the final value.
+                - "final_value": Final value of the weight parameter.
         """
         # Initialize parent class
         super().__init__()
@@ -165,10 +162,6 @@ def update_normalization(self, obs: TensorDict) -> None:
             rnd_state = self.get_rnd_state(obs)
             self.state_normalizer.update(rnd_state)
 
-    """
-    Different weight schedules.
-    """
-
     def _constant_weight_schedule(self, step: int, **kwargs: dict[str, Any]) -> float:
         return self.initial_weight
 
@@ -192,10 +185,10 @@ def resolve_rnd_config(alg_cfg: dict, obs: TensorDict, obs_groups: dict[str, lis
     """Resolve the RND configuration.
 
     Args:
-        alg_cfg: The algorithm configuration dictionary.
-        obs: The observation dictionary.
-        obs_groups: The observation groups dictionary.
-        env: The environment.
+        alg_cfg: Algorithm configuration dictionary.
+        obs: Observation dictionary.
+        obs_groups: Observation groups dictionary.
+        env: Environment object.
 
     Returns:
         The resolved algorithm configuration dictionary.
diff --git a/rsl_rl/modules/student_teacher.py b/rsl_rl/modules/student_teacher.py
@@ -172,12 +172,12 @@ def load_state_dict(self, state_dict: dict, strict: bool = True) -> bool:
 
         Args:
             state_dict: State dictionary of the model.
-            strict: Whether to strictly enforce that the keys in state_dict match the keys returned by this
-                           module's state_dict() function.
+            strict: Whether to strictly enforce that the keys in `state_dict` match the keys returned by this module's
+                :meth:`state_dict` function.
 
         Returns:
-            bool: Whether this training resumes a previous training. This flag is used by the `load()` function of
-                  `OnPolicyRunner` to determine how to load further parameters.
+            Whether this training resumes a previous training. This flag is used by the :func:`load` function of
+                :class:`OnPolicyRunner` to determine how to load further parameters.
         """
         # Check if state_dict contains teacher and student or just teacher parameters
         if any("actor" in key for key in state_dict):  # Load parameters from rl training
diff --git a/rsl_rl/modules/student_teacher_recurrent.py b/rsl_rl/modules/student_teacher_recurrent.py
@@ -203,12 +203,12 @@ def load_state_dict(self, state_dict: dict, strict: bool = True) -> bool:
 
         Args:
             state_dict: State dictionary of the model.
-            strict: Whether to strictly enforce that the keys in state_dict match the keys returned by this
-                           module's state_dict() function.
+            strict: Whether to strictly enforce that the keys in `state_dict` match the keys returned by this module's
+                :meth:`state_dict` function.
 
         Returns:
-            bool: Whether this training resumes a previous training. This flag is used by the `load()` function of
-                  `OnPolicyRunner` to determine how to load further parameters.
+            Whether this training resumes a previous training. This flag is used by the :func:`load` function of
+                :class:`OnPolicyRunner` to determine how to load further parameters.
         """
         # Check if state_dict contains teacher and student or just teacher parameters
         if any("actor" in key for key in state_dict):  # Load parameters from rl training
diff --git a/rsl_rl/modules/symmetry.py b/rsl_rl/modules/symmetry.py
@@ -12,8 +12,8 @@ def resolve_symmetry_config(alg_cfg: dict, env: VecEnv) -> dict:
     """Resolve the symmetry configuration.
 
     Args:
-        alg_cfg: The algorithm configuration dictionary.
-        env: The environment.
+        alg_cfg: Algorithm configuration dictionary.
+        env: Environment object.
 
     Returns:
         The resolved algorithm configuration dictionary.
diff --git a/rsl_rl/networks/memory.py b/rsl_rl/networks/memory.py
@@ -14,8 +14,7 @@
 class Memory(nn.Module):
     """Memory module for recurrent networks.
 
-    This module is used to store the hidden states of the policy.
-    Currently only supports GRU and LSTM.
+    This module is used to store the hidden states of the policy. It currently only supports GRU and LSTM.
     """
 
     def __init__(self, input_size: int, hidden_dim: int = 256, num_layers: int = 1, type: str = "lstm") -> None:
diff --git a/rsl_rl/networks/mlp.py b/rsl_rl/networks/mlp.py
@@ -15,17 +15,12 @@
 class MLP(nn.Sequential):
     """Multi-layer perceptron.
 
-    The MLP network is a sequence of linear layers and activation functions. The
-    last layer is a linear layer that outputs the desired dimension unless the
-    last activation function is specified.
+    The MLP network is a sequence of linear layers and activation functions. The last layer is a linear layer that
+    outputs the desired dimension unless the last activation function is specified.
 
     It provides additional conveniences:
-
-    - If the hidden dimensions have a value of ``-1``, the dimension is inferred
-      from the input dimension.
-    - If the output dimension is a tuple, the output is reshaped to the desired
-      shape.
-
+    - If the hidden dimensions have a value of ``-1``, the dimension is inferred from the input dimension.
+    - If the output dimension is a tuple, the output is reshaped to the desired shape.
     """
 
     def __init__(
@@ -41,11 +36,10 @@ def __init__(
         Args:
             input_dim: Dimension of the input.
             output_dim: Dimension of the output.
-            hidden_dims: Dimensions of the hidden layers. A value of ``-1`` indicates
-                that the dimension should be inferred from the input dimension.
-            activation: Activation function. Defaults to "elu".
-            last_activation: Activation function of the last layer. Defaults to None,
-                in which case the last layer is linear.
+            hidden_dims: Dimensions of the hidden layers. A value of ``-1`` indicates that the dimension should be
+                inferred from the input dimension.
+            activation: Activation function.
+            last_activation: Activation function of the last layer. None results in a linear last layer.
         """
         super().__init__()
 
diff --git a/rsl_rl/networks/normalization.py b/rsl_rl/networks/normalization.py
@@ -17,13 +17,12 @@ class EmpiricalNormalization(nn.Module):
     def __init__(self, shape: int | tuple[int] | list[int], eps: float = 1e-2, until: int | None = None) -> None:
         """Initialize EmpiricalNormalization module.
 
+        .. note:: The normalization parameters are computed over the whole batch, not for each environment separately.
+
         Args:
             shape: Shape of input values except batch axis.
             eps: Small value for stability.
-            until: If this arg is specified, the module learns input values until the sum of batch sizes
-            exceeds it.
-
-        Note: The normalization parameters are computed over the whole batch, not for each environment separately.
+            until: If this arg is specified, the module learns input values until the sum of batch sizes exceeds it.
         """
         super().__init__()
         self.eps = eps
@@ -72,9 +71,9 @@ def inverse(self, y: torch.Tensor) -> torch.Tensor:
 class EmpiricalDiscountedVariationNormalization(nn.Module):
     """Reward normalization from Pathak's large scale study on PPO.
 
-    Reward normalization. Since the reward function is non-stationary, it is useful to normalize
-    the scale of the rewards so that the value function can learn quickly. We did this by dividing
-    the rewards by a running estimate of the standard deviation of the sum of discounted rewards.
+    Reward normalization. Since the reward function is non-stationary, it is useful to normalize the scale of the
+    rewards so that the value function can learn quickly. We did this by dividing the rewards by a running estimate of
+    the standard deviation of the sum of discounted rewards.
     """
 
     def __init__(
@@ -99,11 +98,6 @@ def forward(self, rew: torch.Tensor) -> torch.Tensor:
             return rew
 
 
-"""
-Helper class.
-"""
-
-
 class _DiscountedAverage:
     r"""Discounted average of rewards.
 
@@ -112,9 +106,6 @@ class _DiscountedAverage:
     .. math::
 
         \bar{R}_t = \gamma \bar{R}_{t-1} + r_t
-
-    Args:
-        gamma (float): Discount factor.
     """
 
     def __init__(self, gamma: float) -> None:
diff --git a/rsl_rl/utils/utils.py b/rsl_rl/utils/utils.py
@@ -19,7 +19,7 @@ def resolve_nn_activation(act_name: str) -> torch.nn.Module:
     """Resolve the activation function from the name.
 
     Args:
-        act_name: The name of the activation function.
+        act_name: Name of the activation function.
 
     Returns:
         The activation function.
@@ -53,7 +53,7 @@ def resolve_optimizer(optimizer_name: str) -> torch.optim.Optimizer:
     """Resolve the optimizer from the name.
 
     Args:
-        optimizer_name: The name of the optimizer.
+        optimizer_name: Name of the optimizer.
 
     Returns:
         The optimizer.
@@ -174,14 +174,14 @@ def string_to_callable(name: str) -> Callable:
     """Resolve the module and function names to return the function.
 
     Args:
-        name: The function name. The format should be 'module:attribute_name'.
+        name: Function name. The format should be 'module:attribute_name'.
+
+    Returns:
+        The function loaded from the module.
 
     Raises:
         ValueError: When the resolved attribute is not a function.
         ValueError: When unable to resolve the attribute.
-
-    Returns:
-        The function loaded from the module.
     """
     try:
         mod_name, attr_name = name.split(":")
@@ -214,22 +214,22 @@ def resolve_obs_groups(
             "critic": ["group_1", "group_3"]
         }
 
-    This means that the 'policy' observation set will contain the observations "group_1" and "group_2" and the
-    'critic' observation set will contain the observations "group_1" and "group_3". This function will check that all
-    the observations in the 'policy' and 'critic' observation sets are present in the observation dictionary from the
+    This means that the 'policy' observation set will contain the observations "group_1" and "group_2" and the 'critic'
+    observation set will contain the observations "group_1" and "group_3". This function will check that all the
+    observations in the 'policy' and 'critic' observation sets are present in the observation dictionary from the
     environment.
 
-    Additionally, if one of the `default_sets`, e.g. "critic", is not present in the configuration dictionary,
-    this function will:
+    Additionally, if one of the `default_sets`, e.g. "critic", is not present in the configuration dictionary, this
+    function will:
 
     1. Check if a group with the same name exists in the observations and assign this group to the observation set.
     2. If 1. fails, it will assign the observations from the 'policy' observation set to the default observation set.
 
     Args:
         obs: Observations from the environment in the form of a dictionary.
         obs_groups: Observation sets configuration.
-        default_sets: Reserved observation set names used by the algorithm (besides 'policy').
-            If not provided in 'obs_groups', a default behavior gets triggered.
+        default_sets: Reserved observation set names used by the algorithm (besides 'policy'). If not provided in
+            'obs_groups', a default behavior gets triggered.
 
     Returns:
         The resolved observation groups.