From 1ad6a0ae0901286b73237277435f93d97dde7610 Mon Sep 17 00:00:00 2001 From: Farbod Farshidian Date: Sun, 2 Nov 2025 10:15:28 -0500 Subject: [PATCH 1/6] Introduce EXCLUDED_CFG_KEYS class constant to use in _prepare_terms --- .../isaaclab/managers/observation_manager.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/source/isaaclab/isaaclab/managers/observation_manager.py b/source/isaaclab/isaaclab/managers/observation_manager.py index 014ae2a00b8..8afd57cbcf0 100644 --- a/source/isaaclab/isaaclab/managers/observation_manager.py +++ b/source/isaaclab/isaaclab/managers/observation_manager.py @@ -62,6 +62,15 @@ class ObservationManager(ManagerBase): The observations are clipped and scaled as per the configuration settings. """ + # Configuration fields to skip when parsing observation terms cfg + _EXCLUDED_CFG_KEYS: tuple[str, ...] = ( + "enable_corruption", + "concatenate_terms", + "history_length", + "flatten_history_dim", + "concatenate_dim", + ) + def __init__(self, cfg: object, env: ManagerBasedEnv): """Initialize observation manager. @@ -515,13 +524,7 @@ def _prepare_terms(self): # iterate over all the terms in each group for term_name, term_cfg in group_cfg_items: # skip non-obs settings - if term_name in [ - "enable_corruption", - "concatenate_terms", - "history_length", - "flatten_history_dim", - "concatenate_dim", - ]: + if term_name in self._EXCLUDED_CFG_KEYS: continue # check for non config if term_cfg is None: From 4703ff893875f9d0db7a6af79d45610339a14186 Mon Sep 17 00:00:00 2001 From: Farbod Farshidian Date: Sun, 2 Nov 2025 10:18:24 -0500 Subject: [PATCH 2/6] Standardize access to _get_observations in RslRlVecEnvWrapper --- source/isaaclab/isaaclab/envs/manager_based_env.py | 2 +- source/isaaclab/isaaclab/envs/manager_based_rl_env.py | 4 ++-- source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py | 5 +---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py index 9ddc538aa41..0fc438e23b3 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env.py @@ -471,7 +471,7 @@ def step(self, action: torch.Tensor) -> tuple[VecEnvObs, dict]: self.event_manager.apply(mode="interval", dt=self.step_dt) # -- compute observations - self.obs_buf = self.observation_manager.compute(update_history=True) + self.obs_buf = self._get_observations(update_history=True) self.recorder_manager.record_post_step() # return observations and extras diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py index 634bec4cae9..88f27b3ba9d 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py @@ -210,7 +210,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: if len(self.recorder_manager.active_terms) > 0: # update observations for recording if needed - self.obs_buf = self.observation_manager.compute() + self.obs_buf = self._get_observations() self.recorder_manager.record_post_step() # -- reset envs that terminated/timed-out and log the episode information @@ -235,7 +235,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: self.event_manager.apply(mode="interval", dt=self.step_dt) # -- compute observations # note: done after reset to get the correct observations for reset envs - self.obs_buf = self.observation_manager.compute(update_history=True) + self.obs_buf = self._get_observations(update_history=True) # return observations, rewards, resets and extras return self.obs_buf, self.reward_buf, self.reset_terminated, self.reset_time_outs, self.extras diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index 73ceae04693..1a93796f05f 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -143,10 +143,7 @@ def reset(self) -> tuple[TensorDict, dict]: # noqa: D102 def get_observations(self) -> TensorDict: """Returns the current observations of the environment.""" - if hasattr(self.unwrapped, "observation_manager"): - obs_dict = self.unwrapped.observation_manager.compute() - else: - obs_dict = self.unwrapped._get_observations() + obs_dict = self.unwrapped._get_observations() return TensorDict(obs_dict, batch_size=[self.num_envs]) def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]: From 366b7a76a1df285273a7a59be094ac5ff0b9965c Mon Sep 17 00:00:00 2001 From: Farbod Farshidian Date: Sun, 2 Nov 2025 10:26:47 -0500 Subject: [PATCH 3/6] refactor reset logic --- .../isaaclab/envs/manager_based_env.py | 133 ++++++++++-------- 1 file changed, 78 insertions(+), 55 deletions(-) diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py index 0fc438e23b3..a4fa304c2e1 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env.py @@ -336,31 +336,7 @@ def reset( Returns: A tuple containing the observations and extras. """ - if env_ids is None: - env_ids = torch.arange(self.num_envs, dtype=torch.int64, device=self.device) - - # trigger recorder terms for pre-reset calls - self.recorder_manager.record_pre_reset(env_ids) - - # set the seed - if seed is not None: - self.seed(seed) - - # reset state of scene - self._reset_idx(env_ids) - - # update articulation kinematics - self.scene.write_data_to_sim() - self.sim.forward() - # if sensors are added to the scene, make sure we render to reflect changes in reset - if self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset: - self.sim.render() - - # trigger recorder terms for post-reset calls - self.recorder_manager.record_post_reset(env_ids) - - # compute observations - self.obs_buf = self.observation_manager.compute(update_history=True) + self.obs_buf, self.extras = self._reset(env_ids, None, seed) if self.cfg.wait_for_textures and self.sim.has_rtx_sensors(): while SimulationManager.assets_loading(): @@ -393,37 +369,10 @@ def reset_to( is_relative: If set to True, the state is considered relative to the environment origins. Defaults to False. """ - # reset all envs in the scene if env_ids is None - if env_ids is None: - env_ids = torch.arange(self.num_envs, dtype=torch.int64, device=self.device) + if state is None: + raise ValueError("state cannot be None!") - # trigger recorder terms for pre-reset calls - self.recorder_manager.record_pre_reset(env_ids) - - # set the seed - if seed is not None: - self.seed(seed) - - self._reset_idx(env_ids) - - # set the state - self.scene.reset_to(state, env_ids, is_relative=is_relative) - - # update articulation kinematics - self.sim.forward() - - # if sensors are added to the scene, make sure we render to reflect changes in reset - if self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset: - self.sim.render() - - # trigger recorder terms for post-reset calls - self.recorder_manager.record_post_reset(env_ids) - - # compute observations - self.obs_buf = self.observation_manager.compute(update_history=True) - - # return observations - return self.obs_buf, self.extras + return self._reset(env_ids, state, seed, is_relative) def step(self, action: torch.Tensor) -> tuple[VecEnvObs, dict]: """Execute one time-step of the environment's dynamics. @@ -529,6 +478,80 @@ def close(self): Helper functions. """ + def _get_observations(self, update_history: bool = False) -> VecEnvObs: + """ + Computes and returns the current observation dictionary for the environment. + + Args: + update_history: The boolean indicator without return obs should be appended to observation history. + Default to False, in which case calling compute_group does not modify history. This input is no-ops + if the group's history_length == 0. + + Returns: + A dictionary containing the full set of observations. + """ + return self.observation_manager.compute(update_history) + + def _reset( + self, + env_ids: Sequence[int] | None, + state: dict[str, dict[str, dict[str, torch.Tensor]]] | None = None, + seed: int | None = None, + is_relative: bool = False, + ): + """Reset the specified environments to a given or randomized state. + + If a ``state`` is provided, the environments are restored accordingly. + Otherwise, they are reset using the environment randomization logic. + + This function calls the :meth:`_reset_idx` function to reset the specified environments. + However, certain operations, such as procedural terrain generation, that happened during initialization + are not repeated. + + Args: + env_ids: The environment ids to reset. Defaults to None, in which case all environments are reset. + state: The state is a dictionary containing the state of the scene entities. Defaults to None. + Please refer to :meth:`InteractiveScene.get_state` for the format. + seed: The seed to use for randomization. Defaults to None, in which case the seed is not set. + is_relative: If set to True, the state is considered relative to the environment origins. + Defaults to False. + """ + # reset all envs in the scene if env_ids is None + if env_ids is None: + env_ids = torch.arange(self.num_envs, dtype=torch.int64, device=self.device) + + # trigger recorder terms for pre-reset calls + self.recorder_manager.record_pre_reset(env_ids) + + # set the seed + if seed is not None: + self.seed(seed) + + # reset state of scene + self._reset_idx(env_ids) + + # set the state + if state is None: + self.scene.write_data_to_sim() + else: + self.scene.reset_to(state, env_ids, is_relative=is_relative) + + # update articulation kinematics + self.sim.forward() + + # if sensors are added to the scene, make sure we render to reflect changes in reset + if self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset: + self.sim.render() + + # trigger recorder terms for post-reset calls + self.recorder_manager.record_post_reset(env_ids) + + # compute observations + self.obs_buf = self._get_observations(update_history=True) + + # return observations + return self.obs_buf, self.extras + def _reset_idx(self, env_ids: Sequence[int]): """Reset environments based on specified indices. From c3b1df39ccf384b567a44bc942b0ed7eb69aa86a Mon Sep 17 00:00:00 2001 From: Farbod Farshidian Date: Sun, 2 Nov 2025 10:29:15 -0500 Subject: [PATCH 4/6] Add normalize_processed_actions in ActionTerm and its relevant overrides --- .../isaaclab/envs/mdp/actions/joint_actions.py | 14 ++++++++++++++ .../isaaclab/isaaclab/managers/action_manager.py | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/source/isaaclab/isaaclab/envs/mdp/actions/joint_actions.py b/source/isaaclab/isaaclab/envs/mdp/actions/joint_actions.py index 8d5e7ebd4b3..61a552f8b42 100644 --- a/source/isaaclab/isaaclab/envs/mdp/actions/joint_actions.py +++ b/source/isaaclab/isaaclab/envs/mdp/actions/joint_actions.py @@ -177,6 +177,10 @@ def process_actions(self, actions: torch.Tensor): def reset(self, env_ids: Sequence[int] | None = None) -> None: self._raw_actions[env_ids] = 0.0 + def normalize_processed_actions(self, processed_actions: torch.Tensor) -> torch.Tensor: + offset_free_actions = processed_actions - self._offset + return torch.where(self._scale > 1e-8, offset_free_actions / self._scale, offset_free_actions) + class JointPositionAction(JointAction): """Joint action term that applies the processed actions to the articulation's joints as position commands.""" @@ -228,6 +232,16 @@ def apply_actions(self): # set position targets self._asset.set_joint_position_target(current_actions, joint_ids=self._joint_ids) + def normalize_processed_actions(self, processed_actions: torch.Tensor) -> torch.Tensor: + """Normalization of processed actions is not supported. + + This method cannot be applied since the transformation is performed during the action application + stage (:meth:`apply_actions`) rather than during processing (:meth:`process_actions`). + """ + raise NotImplementedError( + f"Normalizing of the processed actions is not supported for {self.__class__.__name__}." + ) + class JointVelocityAction(JointAction): """Joint action term that applies the processed actions to the articulation's joints as velocity commands.""" diff --git a/source/isaaclab/isaaclab/managers/action_manager.py b/source/isaaclab/isaaclab/managers/action_manager.py index 9b561ceb6a7..584a59087c2 100644 --- a/source/isaaclab/isaaclab/managers/action_manager.py +++ b/source/isaaclab/isaaclab/managers/action_manager.py @@ -164,6 +164,21 @@ def apply_actions(self): """ raise NotImplementedError + def normalize_processed_actions(self, processed_actions: torch.Tensor) -> torch.Tensor: + """Maps the processed actions to the normalized action space. + + This function takes processed (e.g., scaled or shifted) actions and applies the inverse + transformation to match the expected normalized action range outputted by the policy. + + Args: + processed_actions: The processed actions, typically scaled or shifted of the policy output. + Returns: + A tensor of actions mapped back to the normalized action space. + """ + raise NotImplementedError( + f"Normalizing of the processed actions is not implemented for {self.__class__.__name__}." + ) + def _set_debug_vis_impl(self, debug_vis: bool): """Set debug visualization into visualization objects. This function is responsible for creating the visualization objects if they don't exist From 8376a64d91abe3d8945a03f36e0764fe220dcebd Mon Sep 17 00:00:00 2001 From: Farbod Farshidian Date: Sun, 2 Nov 2025 14:26:53 -0500 Subject: [PATCH 5/6] Add return type to _reset --- source/isaaclab/isaaclab/envs/manager_based_env.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py index a4fa304c2e1..3a7a668770d 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env.py @@ -498,7 +498,7 @@ def _reset( state: dict[str, dict[str, dict[str, torch.Tensor]]] | None = None, seed: int | None = None, is_relative: bool = False, - ): + ) -> tuple[VecEnvObs, dict]: """Reset the specified environments to a given or randomized state. If a ``state`` is provided, the environments are restored accordingly. @@ -515,6 +515,9 @@ def _reset( seed: The seed to use for randomization. Defaults to None, in which case the seed is not set. is_relative: If set to True, the state is considered relative to the environment origins. Defaults to False. + + Returns: + A tuple containing the observations and extras. """ # reset all envs in the scene if env_ids is None if env_ids is None: From 0b36bbd216cb121a17eb858d07fa565ceffc9613 Mon Sep 17 00:00:00 2001 From: Farbod Farshidian Date: Mon, 3 Nov 2025 11:37:29 -0500 Subject: [PATCH 6/6] Add return type --- .../isaaclab/envs/manager_based_env.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py index 3a7a668770d..ce2e7dcf794 100644 --- a/source/isaaclab/isaaclab/envs/manager_based_env.py +++ b/source/isaaclab/isaaclab/envs/manager_based_env.py @@ -336,7 +336,7 @@ def reset( Returns: A tuple containing the observations and extras. """ - self.obs_buf, self.extras = self._reset(env_ids, None, seed) + self.obs_buf = self._reset(env_ids, None, seed) if self.cfg.wait_for_textures and self.sim.has_rtx_sensors(): while SimulationManager.assets_loading(): @@ -351,7 +351,7 @@ def reset_to( env_ids: Sequence[int] | None, seed: int | None = None, is_relative: bool = False, - ): + ) -> tuple[VecEnvObs, dict]: """Resets specified environments to provided states. This function resets the environments to the provided states. The state is a dictionary @@ -366,13 +366,17 @@ def reset_to( :meth:`InteractiveScene.get_state` for the format. env_ids: The environment ids to reset. Defaults to None, in which case all environments are reset. seed: The seed to use for randomization. Defaults to None, in which case the seed is not set. - is_relative: If set to True, the state is considered relative to the environment origins. - Defaults to False. + is_relative: If True, the state is considered relative to the environment origins. Defaults to False. + + Returns: + A tuple containing the observations and extras. """ if state is None: raise ValueError("state cannot be None!") - return self._reset(env_ids, state, seed, is_relative) + self.obs_buf = self._reset(env_ids, state, seed, is_relative) + + return self.obs_buf, self.extras def step(self, action: torch.Tensor) -> tuple[VecEnvObs, dict]: """Execute one time-step of the environment's dynamics. @@ -498,7 +502,7 @@ def _reset( state: dict[str, dict[str, dict[str, torch.Tensor]]] | None = None, seed: int | None = None, is_relative: bool = False, - ) -> tuple[VecEnvObs, dict]: + ) -> VecEnvObs: """Reset the specified environments to a given or randomized state. If a ``state`` is provided, the environments are restored accordingly. @@ -513,11 +517,10 @@ def _reset( state: The state is a dictionary containing the state of the scene entities. Defaults to None. Please refer to :meth:`InteractiveScene.get_state` for the format. seed: The seed to use for randomization. Defaults to None, in which case the seed is not set. - is_relative: If set to True, the state is considered relative to the environment origins. - Defaults to False. + is_relative: If True, the state is considered relative to the environment origins. Defaults to False. Returns: - A tuple containing the observations and extras. + A dictionary containing the full set of observations. """ # reset all envs in the scene if env_ids is None if env_ids is None: @@ -553,7 +556,7 @@ def _reset( self.obs_buf = self._get_observations(update_history=True) # return observations - return self.obs_buf, self.extras + return self.obs_buf def _reset_idx(self, env_ids: Sequence[int]): """Reset environments based on specified indices.