Removes unused attributes from VecEnv

Mayankm96 · Mayankm96 · commit 8818338b6248 · 2025-02-18T00:58:07.000+01:00
diff --git a/rsl_rl/env/vec_env.py b/rsl_rl/env/vec_env.py
@@ -16,53 +16,53 @@ class VecEnv(ABC):
     the same action is applied to all environments and the same observation is returned from all environments.
 
     All extra observations must be provided as a dictionary to "extras" in the step() method. Based on the
-    configuration, the extra observations are used for different purposes. The following keys are reserved
-    in the "observations" dictionary (if they are present):
+    configuration, the extra observations are used for different purposes. The following keys are used by the
+    environment:
 
-    - "critic": The observation is used as input to the critic network. Useful for asymmetric observation spaces.
-    - "rnd_state": The observation is used as input to the RND network. Useful for random network distillation.
-    """
+    - "observations" (dict[str, dict[str, torch.Tensor]]):
+        Additional observations that are not used by the actor networks. The keys are the names of the observations
+        and the values are the observations themselves. The following are reserved keys for the observations:
 
-    num_envs: int
-    """Number of environments."""
+        - "critic": The observation is used as input to the critic network. Useful for asymmetric observation spaces.
+        - "rnd_state": The observation is used as input to the RND network. Useful for random network distillation.
 
-    num_obs: int
-    """Number of observations."""
+    - "time_outs" (torch.Tensor): Timeouts for the environments. These correspond to terminations that happen due to time limits and
+      not due to the environment reaching a terminal state. This is useful for environments that have a fixed
+      episode length.
 
-    num_privileged_obs: int
-    """Number of privileged observations."""
+    - "log" (dict[str, float | torch.Tensor]): Additional information for logging and debugging purposes.
+      The key should be a string and start with "/" for namespacing. The value can be a scalar or a tensor.
+      If it is a tensor, the mean of the tensor is used for logging.
 
-    num_actions: int
-    """Number of actions."""
+      .. deprecated:: 2.0.0
 
-    max_episode_length: int
-    """Maximum episode length."""
+        Use "log" in the extra information dictionary instead of the "episode" key.
 
-    privileged_obs_buf: torch.Tensor
-    """Buffer for privileged observations."""
+    """
 
-    obs_buf: torch.Tensor
-    """Buffer for observations."""
+    num_envs: int
+    """Number of environments."""
 
-    rew_buf: torch.Tensor
-    """Buffer for rewards."""
+    num_actions: int
+    """Number of actions."""
+
+    max_episode_length: int | torch.Tensor
+    """Maximum episode length.
 
-    reset_buf: torch.Tensor
-    """Buffer for resets."""
+    The maximum episode length can be a scalar or a tensor. If it is a scalar, it is the same for all environments.
+    If it is a tensor, it is the maximum episode length for each environment. This is useful for dynamic episode
+    lengths.
+    """
 
     episode_length_buf: torch.Tensor
     """Buffer for current episode lengths."""
 
-    extras: dict
-    """Extra information (metrics).
-
-    Extra information is stored in a dictionary. This includes metrics such as the episode reward, episode length,
-    etc. Additional information can be stored in the dictionary such as observations for the critic network, etc.
-    """
-
     device: torch.device
     """Device to use."""
 
+    cfg: dict | object
+    """Configuration object."""
+
     """
     Operations.
     """
@@ -72,7 +72,7 @@ def get_observations(self) -> tuple[torch.Tensor, dict]:
         """Return the current observations.
 
         Returns:
-            Tuple[torch.Tensor, dict]: Tuple containing the observations and extras.
+            Tuple containing the observations and extras.
         """
         raise NotImplementedError
 
@@ -81,19 +81,21 @@ def reset(self) -> tuple[torch.Tensor, dict]:
         """Reset all environment instances.
 
         Returns:
-            Tuple[torch.Tensor, dict]: Tuple containing the observations and extras.
+            Tuple containing the observations and extras.
         """
         raise NotImplementedError
 
     @abstractmethod
     def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
         """Apply input action on the environment.
 
+        The extra information is a dictionary. It includes metrics such as the episode reward, episode length,
+        etc. Additional information can be stored in the dictionary such as observations for the critic network, etc.
+
         Args:
-            actions (torch.Tensor): Input actions to apply. Shape: (num_envs, num_actions)
+            actions: Input actions to apply. Shape: (num_envs, num_actions)
 
         Returns:
-            Tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
-                A tuple containing the observations, rewards, dones and extra information (metrics).
+            A tuple containing the observations, rewards, dones and extra information (metrics).
         """
         raise NotImplementedError