@@ -16,53 +16,53 @@ class VecEnv(ABC):
1616 the same action is applied to all environments and the same observation is returned from all environments.
1717
1818 All extra observations must be provided as a dictionary to "extras" in the step() method. Based on the
19- configuration, the extra observations are used for different purposes. The following keys are reserved
20- in the "observations" dictionary (if they are present) :
19+ configuration, the extra observations are used for different purposes. The following keys are used by the
20+ environment :
2121
22- - "critic": The observation is used as input to the critic network. Useful for asymmetric observation spaces.
23- - "rnd_state": The observation is used as input to the RND network. Useful for random network distillation.
24- """
22+ - "observations" (dict[str, dict[str, torch.Tensor]]):
23+ Additional observations that are not used by the actor networks. The keys are the names of the observations
24+ and the values are the observations themselves. The following are reserved keys for the observations:
2525
26- num_envs : int
27- """Number of environments."""
26+ - "critic": The observation is used as input to the critic network. Useful for asymmetric observation spaces.
27+ - "rnd_state": The observation is used as input to the RND network. Useful for random network distillation.
2828
29- num_obs : int
30- """Number of observations."""
29+ - "time_outs" (torch.Tensor): Timeouts for the environments. These correspond to terminations that happen due to time limits and
30+ not due to the environment reaching a terminal state. This is useful for environments that have a fixed
31+ episode length.
3132
32- num_privileged_obs : int
33- """Number of privileged observations."""
33+ - "log" (dict[str, float | torch.Tensor]): Additional information for logging and debugging purposes.
34+ The key should be a string and start with "/" for namespacing. The value can be a scalar or a tensor.
35+ If it is a tensor, the mean of the tensor is used for logging.
3436
35- num_actions : int
36- """Number of actions."""
37+ .. deprecated:: 2.0.0
3738
38- max_episode_length : int
39- """Maximum episode length."""
39+ Use "log" in the extra information dictionary instead of the "episode" key.
4040
41- privileged_obs_buf : torch .Tensor
42- """Buffer for privileged observations."""
41+ """
4342
44- obs_buf : torch . Tensor
45- """Buffer for observations ."""
43+ num_envs : int
44+ """Number of environments ."""
4645
47- rew_buf : torch .Tensor
48- """Buffer for rewards."""
46+ num_actions : int
47+ """Number of actions."""
48+
49+ max_episode_length : int | torch .Tensor
50+ """Maximum episode length.
4951
50- reset_buf : torch .Tensor
51- """Buffer for resets."""
52+ The maximum episode length can be a scalar or a tensor. If it is a scalar, it is the same for all environments.
53+ If it is a tensor, it is the maximum episode length for each environment. This is useful for dynamic episode
54+ lengths.
55+ """
5256
5357 episode_length_buf : torch .Tensor
5458 """Buffer for current episode lengths."""
5559
56- extras : dict
57- """Extra information (metrics).
58-
59- Extra information is stored in a dictionary. This includes metrics such as the episode reward, episode length,
60- etc. Additional information can be stored in the dictionary such as observations for the critic network, etc.
61- """
62-
6360 device : torch .device
6461 """Device to use."""
6562
63+ cfg : dict | object
64+ """Configuration object."""
65+
6666 """
6767 Operations.
6868 """
@@ -72,7 +72,7 @@ def get_observations(self) -> tuple[torch.Tensor, dict]:
7272 """Return the current observations.
7373
7474 Returns:
75- Tuple[torch.Tensor, dict]: Tuple containing the observations and extras.
75+ Tuple containing the observations and extras.
7676 """
7777 raise NotImplementedError
7878
@@ -81,19 +81,21 @@ def reset(self) -> tuple[torch.Tensor, dict]:
8181 """Reset all environment instances.
8282
8383 Returns:
84- Tuple[torch.Tensor, dict]: Tuple containing the observations and extras.
84+ Tuple containing the observations and extras.
8585 """
8686 raise NotImplementedError
8787
8888 @abstractmethod
8989 def step (self , actions : torch .Tensor ) -> tuple [torch .Tensor , torch .Tensor , torch .Tensor , dict ]:
9090 """Apply input action on the environment.
9191
92+ The extra information is a dictionary. It includes metrics such as the episode reward, episode length,
93+ etc. Additional information can be stored in the dictionary such as observations for the critic network, etc.
94+
9295 Args:
93- actions (torch.Tensor) : Input actions to apply. Shape: (num_envs, num_actions)
96+ actions: Input actions to apply. Shape: (num_envs, num_actions)
9497
9598 Returns:
96- Tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
97- A tuple containing the observations, rewards, dones and extra information (metrics).
99+ A tuple containing the observations, rewards, dones and extra information (metrics).
98100 """
99101 raise NotImplementedError
0 commit comments