From 1ad6a0ae0901286b73237277435f93d97dde7610 Mon Sep 17 00:00:00 2001
From: Farbod Farshidian <farbod.farshidian@gmail.com>
Date: Sun, 2 Nov 2025 10:15:28 -0500
Subject: [PATCH 1/6] Introduce EXCLUDED_CFG_KEYS class constant to use in
 _prepare_terms

---
 .../isaaclab/managers/observation_manager.py    | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/source/isaaclab/isaaclab/managers/observation_manager.py b/source/isaaclab/isaaclab/managers/observation_manager.py
index 014ae2a00b8..8afd57cbcf0 100644
--- a/source/isaaclab/isaaclab/managers/observation_manager.py
+++ b/source/isaaclab/isaaclab/managers/observation_manager.py
@@ -62,6 +62,15 @@ class ObservationManager(ManagerBase):
     The observations are clipped and scaled as per the configuration settings.
     """
 
+    # Configuration fields to skip when parsing observation terms cfg
+    _EXCLUDED_CFG_KEYS: tuple[str, ...] = (
+        "enable_corruption",
+        "concatenate_terms",
+        "history_length",
+        "flatten_history_dim",
+        "concatenate_dim",
+    )
+
     def __init__(self, cfg: object, env: ManagerBasedEnv):
         """Initialize observation manager.
 
@@ -515,13 +524,7 @@ def _prepare_terms(self):
             # iterate over all the terms in each group
             for term_name, term_cfg in group_cfg_items:
                 # skip non-obs settings
-                if term_name in [
-                    "enable_corruption",
-                    "concatenate_terms",
-                    "history_length",
-                    "flatten_history_dim",
-                    "concatenate_dim",
-                ]:
+                if term_name in self._EXCLUDED_CFG_KEYS:
                     continue
                 # check for non config
                 if term_cfg is None:

From 4703ff893875f9d0db7a6af79d45610339a14186 Mon Sep 17 00:00:00 2001
From: Farbod Farshidian <farbod.farshidian@gmail.com>
Date: Sun, 2 Nov 2025 10:18:24 -0500
Subject: [PATCH 2/6] Standardize access to _get_observations in
 RslRlVecEnvWrapper

---
 source/isaaclab/isaaclab/envs/manager_based_env.py      | 2 +-
 source/isaaclab/isaaclab/envs/manager_based_rl_env.py   | 4 ++--
 source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py | 5 +----
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py
index 9ddc538aa41..0fc438e23b3 100644
--- a/source/isaaclab/isaaclab/envs/manager_based_env.py
+++ b/source/isaaclab/isaaclab/envs/manager_based_env.py
@@ -471,7 +471,7 @@ def step(self, action: torch.Tensor) -> tuple[VecEnvObs, dict]:
             self.event_manager.apply(mode="interval", dt=self.step_dt)
 
         # -- compute observations
-        self.obs_buf = self.observation_manager.compute(update_history=True)
+        self.obs_buf = self._get_observations(update_history=True)
         self.recorder_manager.record_post_step()
 
         # return observations and extras
diff --git a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py
index 634bec4cae9..88f27b3ba9d 100644
--- a/source/isaaclab/isaaclab/envs/manager_based_rl_env.py
+++ b/source/isaaclab/isaaclab/envs/manager_based_rl_env.py
@@ -210,7 +210,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
 
         if len(self.recorder_manager.active_terms) > 0:
             # update observations for recording if needed
-            self.obs_buf = self.observation_manager.compute()
+            self.obs_buf = self._get_observations()
             self.recorder_manager.record_post_step()
 
         # -- reset envs that terminated/timed-out and log the episode information
@@ -235,7 +235,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
             self.event_manager.apply(mode="interval", dt=self.step_dt)
         # -- compute observations
         # note: done after reset to get the correct observations for reset envs
-        self.obs_buf = self.observation_manager.compute(update_history=True)
+        self.obs_buf = self._get_observations(update_history=True)
 
         # return observations, rewards, resets and extras
         return self.obs_buf, self.reward_buf, self.reset_terminated, self.reset_time_outs, self.extras
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
index 73ceae04693..1a93796f05f 100644
--- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
+++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
@@ -143,10 +143,7 @@ def reset(self) -> tuple[TensorDict, dict]:  # noqa: D102
 
     def get_observations(self) -> TensorDict:
         """Returns the current observations of the environment."""
-        if hasattr(self.unwrapped, "observation_manager"):
-            obs_dict = self.unwrapped.observation_manager.compute()
-        else:
-            obs_dict = self.unwrapped._get_observations()
+        obs_dict = self.unwrapped._get_observations()
         return TensorDict(obs_dict, batch_size=[self.num_envs])
 
     def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]:

From 366b7a76a1df285273a7a59be094ac5ff0b9965c Mon Sep 17 00:00:00 2001
From: Farbod Farshidian <farbod.farshidian@gmail.com>
Date: Sun, 2 Nov 2025 10:26:47 -0500
Subject: [PATCH 3/6] refactor reset logic

---
 .../isaaclab/envs/manager_based_env.py        | 133 ++++++++++--------
 1 file changed, 78 insertions(+), 55 deletions(-)

diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py
index 0fc438e23b3..a4fa304c2e1 100644
--- a/source/isaaclab/isaaclab/envs/manager_based_env.py
+++ b/source/isaaclab/isaaclab/envs/manager_based_env.py
@@ -336,31 +336,7 @@ def reset(
         Returns:
             A tuple containing the observations and extras.
         """
-        if env_ids is None:
-            env_ids = torch.arange(self.num_envs, dtype=torch.int64, device=self.device)
-
-        # trigger recorder terms for pre-reset calls
-        self.recorder_manager.record_pre_reset(env_ids)
-
-        # set the seed
-        if seed is not None:
-            self.seed(seed)
-
-        # reset state of scene
-        self._reset_idx(env_ids)
-
-        # update articulation kinematics
-        self.scene.write_data_to_sim()
-        self.sim.forward()
-        # if sensors are added to the scene, make sure we render to reflect changes in reset
-        if self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset:
-            self.sim.render()
-
-        # trigger recorder terms for post-reset calls
-        self.recorder_manager.record_post_reset(env_ids)
-
-        # compute observations
-        self.obs_buf = self.observation_manager.compute(update_history=True)
+        self.obs_buf, self.extras = self._reset(env_ids, None, seed)
 
         if self.cfg.wait_for_textures and self.sim.has_rtx_sensors():
             while SimulationManager.assets_loading():
@@ -393,37 +369,10 @@ def reset_to(
             is_relative: If set to True, the state is considered relative to the environment origins.
                 Defaults to False.
         """
-        # reset all envs in the scene if env_ids is None
-        if env_ids is None:
-            env_ids = torch.arange(self.num_envs, dtype=torch.int64, device=self.device)
+        if state is None:
+            raise ValueError("state cannot be None!")
 
-        # trigger recorder terms for pre-reset calls
-        self.recorder_manager.record_pre_reset(env_ids)
-
-        # set the seed
-        if seed is not None:
-            self.seed(seed)
-
-        self._reset_idx(env_ids)
-
-        # set the state
-        self.scene.reset_to(state, env_ids, is_relative=is_relative)
-
-        # update articulation kinematics
-        self.sim.forward()
-
-        # if sensors are added to the scene, make sure we render to reflect changes in reset
-        if self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset:
-            self.sim.render()
-
-        # trigger recorder terms for post-reset calls
-        self.recorder_manager.record_post_reset(env_ids)
-
-        # compute observations
-        self.obs_buf = self.observation_manager.compute(update_history=True)
-
-        # return observations
-        return self.obs_buf, self.extras
+        return self._reset(env_ids, state, seed, is_relative)
 
     def step(self, action: torch.Tensor) -> tuple[VecEnvObs, dict]:
         """Execute one time-step of the environment's dynamics.
@@ -529,6 +478,80 @@ def close(self):
     Helper functions.
     """
 
+    def _get_observations(self, update_history: bool = False) -> VecEnvObs:
+        """
+        Computes and returns the current observation dictionary for the environment.
+
+        Args:
+            update_history: The boolean indicator without return obs should be appended to observation history.
+                Default to False, in which case calling compute_group does not modify history. This input is no-ops
+                if the group's history_length == 0.
+
+        Returns:
+            A dictionary containing the full set of observations.
+        """
+        return self.observation_manager.compute(update_history)
+
+    def _reset(
+        self,
+        env_ids: Sequence[int] | None,
+        state: dict[str, dict[str, dict[str, torch.Tensor]]] | None = None,
+        seed: int | None = None,
+        is_relative: bool = False,
+    ):
+        """Reset the specified environments to a given or randomized state.
+
+        If a ``state`` is provided, the environments are restored accordingly.
+        Otherwise, they are reset using the environment randomization logic.
+
+        This function calls the :meth:`_reset_idx` function to reset the specified environments.
+        However, certain operations, such as procedural terrain generation, that happened during initialization
+        are not repeated.
+
+        Args:
+            env_ids: The environment ids to reset. Defaults to None, in which case all environments are reset.
+            state: The state is a dictionary containing the state of the scene entities. Defaults to None.
+                Please refer to :meth:`InteractiveScene.get_state` for the format.
+            seed: The seed to use for randomization. Defaults to None, in which case the seed is not set.
+            is_relative: If set to True, the state is considered relative to the environment origins.
+                Defaults to False.
+        """
+        # reset all envs in the scene if env_ids is None
+        if env_ids is None:
+            env_ids = torch.arange(self.num_envs, dtype=torch.int64, device=self.device)
+
+        # trigger recorder terms for pre-reset calls
+        self.recorder_manager.record_pre_reset(env_ids)
+
+        # set the seed
+        if seed is not None:
+            self.seed(seed)
+
+        # reset state of scene
+        self._reset_idx(env_ids)
+
+        # set the state
+        if state is None:
+            self.scene.write_data_to_sim()
+        else:
+            self.scene.reset_to(state, env_ids, is_relative=is_relative)
+
+        # update articulation kinematics
+        self.sim.forward()
+
+        # if sensors are added to the scene, make sure we render to reflect changes in reset
+        if self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset:
+            self.sim.render()
+
+        # trigger recorder terms for post-reset calls
+        self.recorder_manager.record_post_reset(env_ids)
+
+        # compute observations
+        self.obs_buf = self._get_observations(update_history=True)
+
+        # return observations
+        return self.obs_buf, self.extras
+
     def _reset_idx(self, env_ids: Sequence[int]):
         """Reset environments based on specified indices.
 

From c3b1df39ccf384b567a44bc942b0ed7eb69aa86a Mon Sep 17 00:00:00 2001
From: Farbod Farshidian <farbod.farshidian@gmail.com>
Date: Sun, 2 Nov 2025 10:29:15 -0500
Subject: [PATCH 4/6] Add normalize_processed_actions in ActionTerm and its
 relevant overrides

---
 .../isaaclab/envs/mdp/actions/joint_actions.py    | 14 ++++++++++++++
 .../isaaclab/isaaclab/managers/action_manager.py  | 15 +++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/source/isaaclab/isaaclab/envs/mdp/actions/joint_actions.py b/source/isaaclab/isaaclab/envs/mdp/actions/joint_actions.py
index 8d5e7ebd4b3..61a552f8b42 100644
--- a/source/isaaclab/isaaclab/envs/mdp/actions/joint_actions.py
+++ b/source/isaaclab/isaaclab/envs/mdp/actions/joint_actions.py
@@ -177,6 +177,10 @@ def process_actions(self, actions: torch.Tensor):
     def reset(self, env_ids: Sequence[int] | None = None) -> None:
         self._raw_actions[env_ids] = 0.0
 
+    def normalize_processed_actions(self, processed_actions: torch.Tensor) -> torch.Tensor:
+        offset_free_actions = processed_actions - self._offset
+        return torch.where(self._scale > 1e-8, offset_free_actions / self._scale, offset_free_actions)
+
 
 class JointPositionAction(JointAction):
     """Joint action term that applies the processed actions to the articulation's joints as position commands."""
@@ -228,6 +232,16 @@ def apply_actions(self):
         # set position targets
         self._asset.set_joint_position_target(current_actions, joint_ids=self._joint_ids)
 
+    def normalize_processed_actions(self, processed_actions: torch.Tensor) -> torch.Tensor:
+        """Normalization of processed actions is not supported.
+
+        This method cannot be applied since the transformation is performed during the action application
+        stage (:meth:`apply_actions`) rather than during processing (:meth:`process_actions`).
+        """
+        raise NotImplementedError(
+            f"Normalizing of the processed actions is not supported for {self.__class__.__name__}."
+        )
+
 
 class JointVelocityAction(JointAction):
     """Joint action term that applies the processed actions to the articulation's joints as velocity commands."""
diff --git a/source/isaaclab/isaaclab/managers/action_manager.py b/source/isaaclab/isaaclab/managers/action_manager.py
index 9b561ceb6a7..584a59087c2 100644
--- a/source/isaaclab/isaaclab/managers/action_manager.py
+++ b/source/isaaclab/isaaclab/managers/action_manager.py
@@ -164,6 +164,21 @@ def apply_actions(self):
         """
         raise NotImplementedError
 
+    def normalize_processed_actions(self, processed_actions: torch.Tensor) -> torch.Tensor:
+        """Maps the processed actions to the normalized action space.
+
+        This function takes processed (e.g., scaled or shifted) actions and applies the inverse
+        transformation to match the expected normalized action range outputted by the policy.
+
+        Args:
+            processed_actions: The processed actions, typically scaled or shifted of the policy output.
+        Returns:
+            A tensor of actions mapped back to the normalized action space.
+        """
+        raise NotImplementedError(
+            f"Normalizing of the processed actions is not implemented for {self.__class__.__name__}."
+        )
+
     def _set_debug_vis_impl(self, debug_vis: bool):
         """Set debug visualization into visualization objects.
         This function is responsible for creating the visualization objects if they don't exist

From 8376a64d91abe3d8945a03f36e0764fe220dcebd Mon Sep 17 00:00:00 2001
From: Farbod Farshidian <farbod.farshidian@gmail.com>
Date: Sun, 2 Nov 2025 14:26:53 -0500
Subject: [PATCH 5/6] Add return type to _reset

---
 source/isaaclab/isaaclab/envs/manager_based_env.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py
index a4fa304c2e1..3a7a668770d 100644
--- a/source/isaaclab/isaaclab/envs/manager_based_env.py
+++ b/source/isaaclab/isaaclab/envs/manager_based_env.py
@@ -498,7 +498,7 @@ def _reset(
         state: dict[str, dict[str, dict[str, torch.Tensor]]] | None = None,
         seed: int | None = None,
         is_relative: bool = False,
-    ):
+    ) -> tuple[VecEnvObs, dict]:
         """Reset the specified environments to a given or randomized state.
 
         If a ``state`` is provided, the environments are restored accordingly.
@@ -515,6 +515,9 @@ def _reset(
             seed: The seed to use for randomization. Defaults to None, in which case the seed is not set.
             is_relative: If set to True, the state is considered relative to the environment origins.
                 Defaults to False.
+
+        Returns:
+            A tuple containing the observations and extras.
         """
         # reset all envs in the scene if env_ids is None
         if env_ids is None:

From 0b36bbd216cb121a17eb858d07fa565ceffc9613 Mon Sep 17 00:00:00 2001
From: Farbod Farshidian <farbod.farshidian@gmail.com>
Date: Mon, 3 Nov 2025 11:37:29 -0500
Subject: [PATCH 6/6] Add return type

---
 .../isaaclab/envs/manager_based_env.py        | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/source/isaaclab/isaaclab/envs/manager_based_env.py b/source/isaaclab/isaaclab/envs/manager_based_env.py
index 3a7a668770d..ce2e7dcf794 100644
--- a/source/isaaclab/isaaclab/envs/manager_based_env.py
+++ b/source/isaaclab/isaaclab/envs/manager_based_env.py
@@ -336,7 +336,7 @@ def reset(
         Returns:
             A tuple containing the observations and extras.
         """
-        self.obs_buf, self.extras = self._reset(env_ids, None, seed)
+        self.obs_buf = self._reset(env_ids, None, seed)
 
         if self.cfg.wait_for_textures and self.sim.has_rtx_sensors():
             while SimulationManager.assets_loading():
@@ -351,7 +351,7 @@ def reset_to(
         env_ids: Sequence[int] | None,
         seed: int | None = None,
         is_relative: bool = False,
-    ):
+    ) -> tuple[VecEnvObs, dict]:
         """Resets specified environments to provided states.
 
         This function resets the environments to the provided states. The state is a dictionary
@@ -366,13 +366,17 @@ def reset_to(
                 :meth:`InteractiveScene.get_state` for the format.
             env_ids: The environment ids to reset. Defaults to None, in which case all environments are reset.
             seed: The seed to use for randomization. Defaults to None, in which case the seed is not set.
-            is_relative: If set to True, the state is considered relative to the environment origins.
-                Defaults to False.
+            is_relative: If True, the state is considered relative to the environment origins. Defaults to False.
+
+        Returns:
+            A tuple containing the observations and extras.
         """
         if state is None:
             raise ValueError("state cannot be None!")
 
-        return self._reset(env_ids, state, seed, is_relative)
+        self.obs_buf = self._reset(env_ids, state, seed, is_relative)
+
+        return self.obs_buf, self.extras
 
     def step(self, action: torch.Tensor) -> tuple[VecEnvObs, dict]:
         """Execute one time-step of the environment's dynamics.
@@ -498,7 +502,7 @@ def _reset(
         state: dict[str, dict[str, dict[str, torch.Tensor]]] | None = None,
         seed: int | None = None,
         is_relative: bool = False,
-    ) -> tuple[VecEnvObs, dict]:
+    ) -> VecEnvObs:
         """Reset the specified environments to a given or randomized state.
 
         If a ``state`` is provided, the environments are restored accordingly.
@@ -513,11 +517,10 @@ def _reset(
             state: The state is a dictionary containing the state of the scene entities. Defaults to None.
                 Please refer to :meth:`InteractiveScene.get_state` for the format.
             seed: The seed to use for randomization. Defaults to None, in which case the seed is not set.
-            is_relative: If set to True, the state is considered relative to the environment origins.
-                Defaults to False.
+            is_relative: If True, the state is considered relative to the environment origins. Defaults to False.
 
         Returns:
-            A tuple containing the observations and extras.
+            A dictionary containing the full set of observations.
         """
         # reset all envs in the scene if env_ids is None
         if env_ids is None:
@@ -553,7 +556,7 @@ def _reset(
         self.obs_buf = self._get_observations(update_history=True)
 
         # return observations
-        return self.obs_buf, self.extras
+        return self.obs_buf
 
     def _reset_idx(self, env_ids: Sequence[int]):
         """Reset environments based on specified indices.