Adds action clipping to rsl-rl wrapper (#2019)

Mayankm96 · web-flow · commit f774425b4724 · 2025-03-12T19:17:16.000-07:00
# Description Currently, the actions from the policy are directly applied to the environment and also often fed back to the policy using the last action as observation. Doing this can lead to instability during training since applying a large action can introduce a negative feedback loop. More specifically, applying a very large action leads to a large last_action observations, which often results in a large error in the critic, which can lead to even larger actions being sampled in the future. This PR aims to fix this for RSL-RL library, by clipping the actions to (large) hard limits before applying them to the environment. This prohibits the actions from growing continuously and greatly improves training stability. Fixes #984, #1732, #1999 ## Type of change - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there
diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py
@@ -106,7 +106,7 @@ def main():
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
     # wrap around environment for rsl-rl
-    env = RslRlVecEnvWrapper(env)
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
 
     print(f"[INFO]: Loading model checkpoint from: {resume_path}")
     # load previously trained model
diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py
@@ -124,7 +124,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
     # wrap around environment for rsl-rl
-    env = RslRlVecEnvWrapper(env)
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
 
     # create runner from rsl-rl
     runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.1.0"
+version = "0.1.1"
 
 # Description
 title = "Isaac Lab RL"
diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst
@@ -1,6 +1,18 @@
 Changelog
 ---------
 
+0.1.1 (2025-03-10)
+~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added a parameter to clip the actions in the action space inside the RSL-RL wrapper.
+  This parameter is set to None by default, which is the same as not clipping the actions.
+* Added attribute :attr:`isaaclab_rl.rsl_rl.RslRlOnPolicyRunnerCfg.clip_actions` to set
+  the clipping range for the actions in the RSL-RL on-policy runner.
+
+
 0.1.0 (2024-12-27)
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
@@ -98,6 +98,9 @@ class RslRlOnPolicyRunnerCfg:
     algorithm: RslRlPpoAlgorithmCfg = MISSING
     """The algorithm configuration."""
 
+    clip_actions: float | None = None
+    """The clipping value for actions. If ``None``, then no clipping is done."""
+
     ##
     # Checkpointing parameters
     ##
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
@@ -30,14 +30,15 @@ class RslRlVecEnvWrapper(VecEnv):
         https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py
     """
 
-    def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv):
+    def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None):
         """Initializes the wrapper.
 
         Note:
             The wrapper calls :meth:`reset` at the start since the RSL-RL runner does not call reset.
 
         Args:
             env: The environment to wrap around.
+            clip_actions: The clipping value for actions. If ``None``, then no clipping is done.
 
         Raises:
             ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`.
@@ -50,10 +51,17 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv):
             )
         # initialize the wrapper
         self.env = env
+        self.clip_actions = clip_actions
+
         # store information required by wrapper
         self.num_envs = self.unwrapped.num_envs
         self.device = self.unwrapped.device
         self.max_episode_length = self.unwrapped.max_episode_length
+
+        # modify the action space to the clip range
+        self._modify_action_space()
+
+        # obtain dimensions of the environment
         if hasattr(self.unwrapped, "action_manager"):
             self.num_actions = self.unwrapped.action_manager.total_action_dim
         else:
@@ -72,6 +80,7 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv):
             self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"])
         else:
             self.num_privileged_obs = 0
+
         # reset at the start since the RSL-RL runner does not call reset
         self.env.reset()
 
@@ -160,6 +169,9 @@ def reset(self) -> tuple[torch.Tensor, dict]:  # noqa: D102
         return obs_dict["policy"], {"observations": obs_dict}
 
     def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
+        # clip actions
+        if self.clip_actions is not None:
+            actions = torch.clamp(actions, -self.clip_actions, self.clip_actions)
         # record step information
         obs_dict, rew, terminated, truncated, extras = self.env.step(actions)
         # compute dones for compatibility with RSL-RL
@@ -177,3 +189,21 @@ def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch
 
     def close(self):  # noqa: D102
         return self.env.close()
+
+    """
+    Helper functions
+    """
+
+    def _modify_action_space(self):
+        """Modifies the action space to the clip range."""
+        if self.clip_actions is None:
+            return
+
+        # modify the action space to the clip range
+        # note: this is only possible for the box action space. we need to change it in the future for other action spaces.
+        self.env.unwrapped.single_action_space = gym.spaces.Box(
+            low=-self.clip_actions, high=self.clip_actions, shape=(self.num_actions,)
+        )
+        self.env.unwrapped.action_space = gym.vector.utils.batch_space(
+            self.env.unwrapped.single_action_space, self.num_envs
+        )