Skip to content

Commit f774425

Browse files
authored
Adds action clipping to rsl-rl wrapper (#2019)
# Description Currently, the actions from the policy are directly applied to the environment and also often fed back to the policy using the last action as observation. Doing this can lead to instability during training since applying a large action can introduce a negative feedback loop. More specifically, applying a very large action leads to a large last_action observations, which often results in a large error in the critic, which can lead to even larger actions being sampled in the future. This PR aims to fix this for RSL-RL library, by clipping the actions to (large) hard limits before applying them to the environment. This prohibits the actions from growing continuously and greatly improves training stability. Fixes #984, #1732, #1999 ## Type of change - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there
1 parent e6f63e2 commit f774425

File tree

6 files changed

+49
-4
lines changed

6 files changed

+49
-4
lines changed

scripts/reinforcement_learning/rsl_rl/play.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def main():
106106
env = gym.wrappers.RecordVideo(env, **video_kwargs)
107107

108108
# wrap around environment for rsl-rl
109-
env = RslRlVecEnvWrapper(env)
109+
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
110110

111111
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
112112
# load previously trained model

scripts/reinforcement_learning/rsl_rl/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
124124
env = gym.wrappers.RecordVideo(env, **video_kwargs)
125125

126126
# wrap around environment for rsl-rl
127-
env = RslRlVecEnvWrapper(env)
127+
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
128128

129129
# create runner from rsl-rl
130130
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)

source/isaaclab_rl/config/extension.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
# Note: Semantic Versioning is used: https://semver.org/
4-
version = "0.1.0"
4+
version = "0.1.1"
55

66
# Description
77
title = "Isaac Lab RL"

source/isaaclab_rl/docs/CHANGELOG.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
Changelog
22
---------
33

4+
0.1.1 (2025-03-10)
5+
~~~~~~~~~~~~~~~~~~
6+
7+
Added
8+
^^^^^
9+
10+
* Added a parameter to clip the actions in the action space inside the RSL-RL wrapper.
11+
This parameter is set to None by default, which is the same as not clipping the actions.
12+
* Added attribute :attr:`isaaclab_rl.rsl_rl.RslRlOnPolicyRunnerCfg.clip_actions` to set
13+
the clipping range for the actions in the RSL-RL on-policy runner.
14+
15+
416
0.1.0 (2024-12-27)
517
~~~~~~~~~~~~~~~~~~
618

source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ class RslRlOnPolicyRunnerCfg:
9898
algorithm: RslRlPpoAlgorithmCfg = MISSING
9999
"""The algorithm configuration."""
100100

101+
clip_actions: float | None = None
102+
"""The clipping value for actions. If ``None``, then no clipping is done."""
103+
101104
##
102105
# Checkpointing parameters
103106
##

source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,15 @@ class RslRlVecEnvWrapper(VecEnv):
3030
https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py
3131
"""
3232

33-
def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv):
33+
def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None):
3434
"""Initializes the wrapper.
3535
3636
Note:
3737
The wrapper calls :meth:`reset` at the start since the RSL-RL runner does not call reset.
3838
3939
Args:
4040
env: The environment to wrap around.
41+
clip_actions: The clipping value for actions. If ``None``, then no clipping is done.
4142
4243
Raises:
4344
ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`.
@@ -50,10 +51,17 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv):
5051
)
5152
# initialize the wrapper
5253
self.env = env
54+
self.clip_actions = clip_actions
55+
5356
# store information required by wrapper
5457
self.num_envs = self.unwrapped.num_envs
5558
self.device = self.unwrapped.device
5659
self.max_episode_length = self.unwrapped.max_episode_length
60+
61+
# modify the action space to the clip range
62+
self._modify_action_space()
63+
64+
# obtain dimensions of the environment
5765
if hasattr(self.unwrapped, "action_manager"):
5866
self.num_actions = self.unwrapped.action_manager.total_action_dim
5967
else:
@@ -72,6 +80,7 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv):
7280
self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"])
7381
else:
7482
self.num_privileged_obs = 0
83+
7584
# reset at the start since the RSL-RL runner does not call reset
7685
self.env.reset()
7786

@@ -160,6 +169,9 @@ def reset(self) -> tuple[torch.Tensor, dict]: # noqa: D102
160169
return obs_dict["policy"], {"observations": obs_dict}
161170

162171
def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
172+
# clip actions
173+
if self.clip_actions is not None:
174+
actions = torch.clamp(actions, -self.clip_actions, self.clip_actions)
163175
# record step information
164176
obs_dict, rew, terminated, truncated, extras = self.env.step(actions)
165177
# compute dones for compatibility with RSL-RL
@@ -177,3 +189,21 @@ def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch
177189

178190
def close(self): # noqa: D102
179191
return self.env.close()
192+
193+
"""
194+
Helper functions
195+
"""
196+
197+
def _modify_action_space(self):
198+
"""Modifies the action space to the clip range."""
199+
if self.clip_actions is None:
200+
return
201+
202+
# modify the action space to the clip range
203+
# note: this is only possible for the box action space. we need to change it in the future for other action spaces.
204+
self.env.unwrapped.single_action_space = gym.spaces.Box(
205+
low=-self.clip_actions, high=self.clip_actions, shape=(self.num_actions,)
206+
)
207+
self.env.unwrapped.action_space = gym.vector.utils.batch_space(
208+
self.env.unwrapped.single_action_space, self.num_envs
209+
)

0 commit comments

Comments
 (0)