Skip to content

Commit 7ea72c4

Browse files
authored
Fixes MARL workflows for recording videos during training/inferencing (#1596)
# Description Fixing bug so that using training workflow on MARL workflow populates videos/train. See #1595 ## Type of change - Bug fix (non-breaking change which fixes an issue) ## Screenshots ![before_and_after](https://github.com/user-attachments/assets/5b662a88-dedd-4220-a0c4-8e7d09ceb51f) The first run was without the changes where we see videos/train empty. The second run is after the changes with videos/train successfully populated. ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [N/A] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [Sort of] I have added tests that prove my fix is effective or that my feature works; I have verified that it works on train.py for skrl and rl_games. I have not verified rsl_rl or sb3 as well have not verified play.py on any of the four. However I have implemented the changes on all of them as they all seem to follow the exact same structure. - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [ ] I have added my name to the `CONTRIBUTORS.md` or my name already exists there; Unsure if this fix is worth being labelled as a contributor, if so would be happy to be added to the contributors.md (full name is Rishi Veerapaneni).
1 parent e8ea185 commit 7ea72c4

File tree

12 files changed

+45
-32
lines changed

12 files changed

+45
-32
lines changed

source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def __init__(self, env: DirectMARLEnv) -> None:
5858
self.cfg = self.env.cfg
5959
self.sim = self.env.sim
6060
self.scene = self.env.scene
61+
self.render_mode = self.env.render_mode
6162

6263
self.single_observation_space = gym.spaces.Dict()
6364
if self._state_as_observation:
@@ -126,7 +127,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
126127
return obs, rewards, terminated, time_outs, extras
127128

128129
def render(self, recompute: bool = False) -> np.ndarray | None:
129-
self.env.render(recompute)
130+
return self.env.render(recompute)
130131

131132
def close(self) -> None:
132133
self.env.close()

source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,5 @@ agent:
7676
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
7777
trainer:
7878
class: SequentialTrainer
79-
timesteps: 1600
79+
timesteps: 4800
8080
environment_info: log

source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,5 @@ agent:
7878
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
7979
trainer:
8080
class: SequentialTrainer
81-
timesteps: 1600
81+
timesteps: 4800
8282
environment_info: log

source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,5 @@ agent:
7676
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
7777
trainer:
7878
class: SequentialTrainer
79-
timesteps: 1600
79+
timesteps: 4800
8080
environment_info: log

source/standalone/workflows/rl_games/play.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,11 @@ def main():
9494

9595
# create isaac environment
9696
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
97+
98+
# convert to single-agent instance if required by the RL algorithm
99+
if isinstance(env.unwrapped, DirectMARLEnv):
100+
env = multi_agent_to_single_agent(env)
101+
97102
# wrap for video recording
98103
if args_cli.video:
99104
video_kwargs = {
@@ -106,10 +111,6 @@ def main():
106111
print_dict(video_kwargs, nesting=4)
107112
env = gym.wrappers.RecordVideo(env, **video_kwargs)
108113

109-
# convert to single-agent instance if required by the RL algorithm
110-
if isinstance(env.unwrapped, DirectMARLEnv):
111-
env = multi_agent_to_single_agent(env)
112-
113114
# wrap around environment for rl-games
114115
env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions)
115116

source/standalone/workflows/rl_games/train.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
129129

130130
# create isaac environment
131131
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
132+
133+
# convert to single-agent instance if required by the RL algorithm
134+
if isinstance(env.unwrapped, DirectMARLEnv):
135+
env = multi_agent_to_single_agent(env)
136+
132137
# wrap for video recording
133138
if args_cli.video:
134139
video_kwargs = {
@@ -141,10 +146,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
141146
print_dict(video_kwargs, nesting=4)
142147
env = gym.wrappers.RecordVideo(env, **video_kwargs)
143148

144-
# convert to single-agent instance if required by the RL algorithm
145-
if isinstance(env.unwrapped, DirectMARLEnv):
146-
env = multi_agent_to_single_agent(env)
147-
148149
# wrap around environment for rl-games
149150
env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions)
150151

source/standalone/workflows/rsl_rl/play.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ def main():
7474

7575
# create isaac environment
7676
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
77+
78+
# convert to single-agent instance if required by the RL algorithm
79+
if isinstance(env.unwrapped, DirectMARLEnv):
80+
env = multi_agent_to_single_agent(env)
81+
7782
# wrap for video recording
7883
if args_cli.video:
7984
video_kwargs = {
@@ -86,10 +91,6 @@ def main():
8691
print_dict(video_kwargs, nesting=4)
8792
env = gym.wrappers.RecordVideo(env, **video_kwargs)
8893

89-
# convert to single-agent instance if required by the RL algorithm
90-
if isinstance(env.unwrapped, DirectMARLEnv):
91-
env = multi_agent_to_single_agent(env)
92-
9394
# wrap around environment for rsl-rl
9495
env = RslRlVecEnvWrapper(env)
9596

source/standalone/workflows/rsl_rl/train.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
100100
# create isaac environment
101101
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
102102

103+
# convert to single-agent instance if required by the RL algorithm
104+
if isinstance(env.unwrapped, DirectMARLEnv):
105+
env = multi_agent_to_single_agent(env)
106+
103107
# save resume path before creating a new log_dir
104108
if agent_cfg.resume:
105109
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
@@ -116,10 +120,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
116120
print_dict(video_kwargs, nesting=4)
117121
env = gym.wrappers.RecordVideo(env, **video_kwargs)
118122

119-
# convert to single-agent instance if required by the RL algorithm
120-
if isinstance(env.unwrapped, DirectMARLEnv):
121-
env = multi_agent_to_single_agent(env)
122-
123123
# wrap around environment for rsl-rl
124124
env = RslRlVecEnvWrapper(env)
125125

source/standalone/workflows/sb3/play.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from stable_baselines3 import PPO
4949
from stable_baselines3.common.vec_env import VecNormalize
5050

51+
from omni.isaac.lab.envs import DirectMARLEnv, multi_agent_to_single_agent
5152
from omni.isaac.lab.utils.dict import print_dict
5253

5354
import omni.isaac.lab_tasks # noqa: F401
@@ -82,6 +83,11 @@ def main():
8283

8384
# create isaac environment
8485
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
86+
87+
# convert to single-agent instance if required by the RL algorithm
88+
if isinstance(env.unwrapped, DirectMARLEnv):
89+
env = multi_agent_to_single_agent(env)
90+
8591
# wrap for video recording
8692
if args_cli.video:
8793
video_kwargs = {

source/standalone/workflows/sb3/train.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
104104

105105
# create isaac environment
106106
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
107+
108+
# convert to single-agent instance if required by the RL algorithm
109+
if isinstance(env.unwrapped, DirectMARLEnv):
110+
env = multi_agent_to_single_agent(env)
111+
107112
# wrap for video recording
108113
if args_cli.video:
109114
video_kwargs = {
@@ -116,10 +121,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
116121
print_dict(video_kwargs, nesting=4)
117122
env = gym.wrappers.RecordVideo(env, **video_kwargs)
118123

119-
# convert to single-agent instance if required by the RL algorithm
120-
if isinstance(env.unwrapped, DirectMARLEnv):
121-
env = multi_agent_to_single_agent(env)
122-
123124
# wrap around environment for stable baselines
124125
env = Sb3VecEnvWrapper(env)
125126

0 commit comments

Comments
 (0)