From de409808543d322d6dd60d46a7eade7ed4fdfc1f Mon Sep 17 00:00:00 2001 From: louislelay Date: Fri, 12 Sep 2025 12:19:29 +0200 Subject: [PATCH 1/6] adds missing import --- source/SO_100/SO_100/robots/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/source/SO_100/SO_100/robots/__init__.py b/source/SO_100/SO_100/robots/__init__.py index 9e63b1b..ff8c68b 100644 --- a/source/SO_100/SO_100/robots/__init__.py +++ b/source/SO_100/SO_100/robots/__init__.py @@ -9,3 +9,4 @@ # SPDX-License-Identifier: BSD-3-Clause from .so_arm100 import * +from .so_arm100_roscon import * \ No newline at end of file From 275d0ee3d740d38cbb6c435368eb9e71f73344d8 Mon Sep 17 00:00:00 2001 From: louislelay Date: Fri, 12 Sep 2025 12:29:35 +0200 Subject: [PATCH 2/6] updates rsl rl train script --- scripts/rsl_rl/cli_args.py | 17 +-- scripts/rsl_rl/play.py | 231 +++++++++++++++++++++---------------- 2 files changed, 139 insertions(+), 109 deletions(-) diff --git a/scripts/rsl_rl/cli_args.py b/scripts/rsl_rl/cli_args.py index 4bc4f87..23b79b6 100644 --- a/scripts/rsl_rl/cli_args.py +++ b/scripts/rsl_rl/cli_args.py @@ -1,9 +1,4 @@ -# Copyright (c) 2024-2025, Muammer Bay (LycheeAI), Louis Le Lay -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -# -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). # All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause @@ -15,7 +10,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg + from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg def add_rsl_rl_args(parser: argparse.ArgumentParser): @@ -44,7 +39,7 @@ def add_rsl_rl_args(parser: argparse.ArgumentParser): ) -def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlOnPolicyRunnerCfg: +def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlBaseRunnerCfg: """Parse configuration for RSL-RL agent based on inputs. Args: @@ -57,12 +52,12 @@ def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlOnPol from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry # load the default configuration - rslrl_cfg: RslRlOnPolicyRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point") + rslrl_cfg: RslRlBaseRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point") rslrl_cfg = update_rsl_rl_cfg(rslrl_cfg, args_cli) return rslrl_cfg -def update_rsl_rl_cfg(agent_cfg: RslRlOnPolicyRunnerCfg, args_cli: argparse.Namespace): +def update_rsl_rl_cfg(agent_cfg: RslRlBaseRunnerCfg, args_cli: argparse.Namespace): """Update configuration for RSL-RL agent based on inputs. Args: @@ -93,4 +88,4 @@ def update_rsl_rl_cfg(agent_cfg: RslRlOnPolicyRunnerCfg, args_cli: argparse.Name agent_cfg.wandb_project = args_cli.log_project_name agent_cfg.neptune_project = args_cli.log_project_name - return agent_cfg + return agent_cfg \ No newline at end of file diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py index dba2368..00932a3 100644 --- a/scripts/rsl_rl/play.py +++ b/scripts/rsl_rl/play.py @@ -1,18 +1,14 @@ -# Copyright (c) 2024-2025, Muammer Bay (LycheeAI), Louis Le Lay -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -# -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). # All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause -"""Script to play a checkpoint if an RL agent from RSL-RL.""" +"""Script to train RL agent with RSL-RL.""" """Launch Isaac Sim Simulator first.""" import argparse +import sys from isaaclab.app import AppLauncher @@ -23,76 +19,136 @@ parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") -parser.add_argument( - "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." -) +parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") parser.add_argument( - "--use_pretrained_checkpoint", - action="store_true", - help="Use the pre-trained checkpoint from Nucleus.", + "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point." +) +parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") +parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.") +parser.add_argument( + "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes." ) -parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") +parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.") # append RSL-RL cli arguments cli_args.add_rsl_rl_args(parser) # append AppLauncher cli args AppLauncher.add_app_launcher_args(parser) -args_cli = parser.parse_args() +args_cli, hydra_args = parser.parse_known_args() + # always enable cameras to record video if args_cli.video: args_cli.enable_cameras = True +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args + # launch omniverse app app_launcher = AppLauncher(args_cli) simulation_app = app_launcher.app +"""Check for minimum supported RSL-RL version.""" + +import importlib.metadata as metadata +import platform + +from packaging import version + +# check minimum supported rsl-rl version +RSL_RL_VERSION = "3.0.1" +installed_version = metadata.version("rsl-rl-lib") +if version.parse(installed_version) < version.parse(RSL_RL_VERSION): + if platform.system() == "Windows": + cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] + else: + cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] + print( + f"Please install the correct version of RSL-RL.\nExisting version is: '{installed_version}'" + f" and required version is: '{RSL_RL_VERSION}'.\nTo install the correct version, run:" + f"\n\n\t{' '.join(cmd)}\n" + ) + exit(1) + """Rest everything follows.""" +import gymnasium as gym import os -import time +import torch +from datetime import datetime + +import omni +from rsl_rl.runners import DistillationRunner, OnPolicyRunner + +from isaaclab.envs import ( + DirectMARLEnv, + DirectMARLEnvCfg, + DirectRLEnvCfg, + ManagerBasedRLEnvCfg, + multi_agent_to_single_agent, +) +from isaaclab.utils.dict import print_dict +from isaaclab.utils.io import dump_pickle, dump_yaml + +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper -import gymnasium as gym import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import get_checkpoint_path +from isaaclab_tasks.utils.hydra import hydra_task_config + import SO_100.tasks # noqa: F401 -import torch -from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent -from isaaclab.utils.assets import retrieve_file_path -from isaaclab.utils.dict import print_dict -from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint -from isaaclab_rl.rsl_rl import ( - RslRlOnPolicyRunnerCfg, - RslRlVecEnvWrapper, - export_policy_as_jit, - export_policy_as_onnx, -) -from isaaclab_tasks.utils import get_checkpoint_path, parse_env_cfg -from rsl_rl.runners import OnPolicyRunner +torch.backends.cuda.matmul.allow_tf32 = True +torch.backends.cudnn.allow_tf32 = True +torch.backends.cudnn.deterministic = False +torch.backends.cudnn.benchmark = False -def main(): - """Play with RSL-RL agent.""" - # parse configuration - env_cfg = parse_env_cfg( - args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric + +@hydra_task_config(args_cli.task, args_cli.agent) +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): + """Train with RSL-RL agent.""" + # override configurations with non-hydra CLI arguments + agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + agent_cfg.max_iterations = ( + args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg.max_iterations ) - agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli) + + # set the environment seed + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg.seed + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + + # multi-gpu training configuration + if args_cli.distributed: + env_cfg.sim.device = f"cuda:{app_launcher.local_rank}" + agent_cfg.device = f"cuda:{app_launcher.local_rank}" + + # set seed to have diversity in different threads + seed = agent_cfg.seed + app_launcher.local_rank + env_cfg.seed = seed + agent_cfg.seed = seed # specify directory for logging experiments log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) log_root_path = os.path.abspath(log_root_path) - print(f"[INFO] Loading experiment from directory: {log_root_path}") - if args_cli.use_pretrained_checkpoint: - resume_path = get_published_pretrained_checkpoint("rsl_rl", args_cli.task) - if not resume_path: - print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.") - return - elif args_cli.checkpoint: - resume_path = retrieve_file_path(args_cli.checkpoint) + print(f"[INFO] Logging experiment in directory: {log_root_path}") + # specify directory for logging runs: {time-stamp}_{run_name} + log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + # The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849) + print(f"Exact experiment name requested from command line: {log_dir}") + if agent_cfg.run_name: + log_dir += f"_{agent_cfg.run_name}" + log_dir = os.path.join(log_root_path, log_dir) + + # set the IO descriptors output directory if requested + if isinstance(env_cfg, ManagerBasedRLEnvCfg): + env_cfg.export_io_descriptors = args_cli.export_io_descriptors + env_cfg.io_descriptors_output_dir = log_dir else: - resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) - - log_dir = os.path.dirname(resume_path) + omni.log.warn( + "IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported." + ) # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) @@ -101,11 +157,15 @@ def main(): if isinstance(env.unwrapped, DirectMARLEnv): env = multi_agent_to_single_agent(env) + # save resume path before creating a new log_dir + if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation": + resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) + # wrap for video recording if args_cli.video: video_kwargs = { - "video_folder": os.path.join(log_dir, "videos", "play"), - "step_trigger": lambda step: step == 0, + "video_folder": os.path.join(log_dir, "videos", "train"), + "step_trigger": lambda step: step % args_cli.video_interval == 0, "video_length": args_cli.video_length, "disable_logger": True, } @@ -116,54 +176,29 @@ def main(): # wrap around environment for rsl-rl env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) - print(f"[INFO]: Loading model checkpoint from: {resume_path}") - # load previously trained model - ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) - ppo_runner.load(resume_path) - - # obtain the trained policy for inference - policy = ppo_runner.get_inference_policy(device=env.unwrapped.device) - - # extract the neural network module - # we do this in a try-except to maintain backwards compatibility. - try: - # version 2.3 onwards - policy_nn = ppo_runner.alg.policy - except AttributeError: - # version 2.2 and below - policy_nn = ppo_runner.alg.actor_critic - - # export policy to onnx/jit - export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") - export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt") - export_policy_as_onnx( - policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx" - ) - - dt = env.unwrapped.step_dt - - # reset environment - obs, _ = env.get_observations() - timestep = 0 - # simulate environment - while simulation_app.is_running(): - start_time = time.time() - # run everything in inference mode - with torch.inference_mode(): - # agent stepping - actions = policy(obs) - # env stepping - obs, _, _, _ = env.step(actions) - if args_cli.video: - timestep += 1 - # Exit the play loop after recording one video - if timestep == args_cli.video_length: - break - - # time delay for real-time evaluation - sleep_time = dt - (time.time() - start_time) - if args_cli.real_time and sleep_time > 0: - time.sleep(sleep_time) + # create runner from rsl-rl + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") + # write git state to logs + runner.add_git_repo_to_log(__file__) + # load the checkpoint + if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation": + print(f"[INFO]: Loading model checkpoint from: {resume_path}") + # load previously trained model + runner.load(resume_path) + + # dump the configuration into log-directory + dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) + dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg) + dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg) + dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg) + + # run training + runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True) # close the simulator env.close() @@ -173,4 +208,4 @@ def main(): # run the main function main() # close sim app - simulation_app.close() + simulation_app.close() \ No newline at end of file From 34e55819cba959647478f65bf18bf27ede623157 Mon Sep 17 00:00:00 2001 From: louislelay Date: Fri, 12 Sep 2025 12:37:32 +0200 Subject: [PATCH 3/6] formats --- scripts/rsl_rl/cli_args.py | 12 +++++++++++- scripts/rsl_rl/play.py | 26 +++++++++++++++---------- source/SO_100/SO_100/robots/__init__.py | 2 +- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/scripts/rsl_rl/cli_args.py b/scripts/rsl_rl/cli_args.py index 23b79b6..c651a38 100644 --- a/scripts/rsl_rl/cli_args.py +++ b/scripts/rsl_rl/cli_args.py @@ -1,3 +1,13 @@ +# Copyright (c) 2024-2025, Muammer Bay (LycheeAI), Louis Le Lay +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +# +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + # Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). # All rights reserved. # @@ -88,4 +98,4 @@ def update_rsl_rl_cfg(agent_cfg: RslRlBaseRunnerCfg, args_cli: argparse.Namespac agent_cfg.wandb_project = args_cli.log_project_name agent_cfg.neptune_project = args_cli.log_project_name - return agent_cfg \ No newline at end of file + return agent_cfg diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py index 00932a3..0f2d2cb 100644 --- a/scripts/rsl_rl/play.py +++ b/scripts/rsl_rl/play.py @@ -1,3 +1,13 @@ +# Copyright (c) 2024-2025, Muammer Bay (LycheeAI), Louis Le Lay +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +# +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + # Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). # All rights reserved. # @@ -72,14 +82,14 @@ """Rest everything follows.""" -import gymnasium as gym import os -import torch from datetime import datetime +import gymnasium as gym +import isaaclab_tasks # noqa: F401 import omni -from rsl_rl.runners import DistillationRunner, OnPolicyRunner - +import SO_100.tasks # noqa: F401 +import torch from isaaclab.envs import ( DirectMARLEnv, DirectMARLEnvCfg, @@ -89,14 +99,10 @@ ) from isaaclab.utils.dict import print_dict from isaaclab.utils.io import dump_pickle, dump_yaml - from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper - -import isaaclab_tasks # noqa: F401 from isaaclab_tasks.utils import get_checkpoint_path from isaaclab_tasks.utils.hydra import hydra_task_config - -import SO_100.tasks # noqa: F401 +from rsl_rl.runners import DistillationRunner, OnPolicyRunner torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True @@ -208,4 +214,4 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # run the main function main() # close sim app - simulation_app.close() \ No newline at end of file + simulation_app.close() diff --git a/source/SO_100/SO_100/robots/__init__.py b/source/SO_100/SO_100/robots/__init__.py index ff8c68b..aa0a926 100644 --- a/source/SO_100/SO_100/robots/__init__.py +++ b/source/SO_100/SO_100/robots/__init__.py @@ -9,4 +9,4 @@ # SPDX-License-Identifier: BSD-3-Clause from .so_arm100 import * -from .so_arm100_roscon import * \ No newline at end of file +from .so_arm100_roscon import * From 9ce2a4d78b03e06a4f278f404ecf738087c31904 Mon Sep 17 00:00:00 2001 From: louislelay Date: Fri, 12 Sep 2025 12:39:14 +0200 Subject: [PATCH 4/6] corrects license header duplicate --- scripts/rsl_rl/cli_args.py | 5 ----- scripts/rsl_rl/play.py | 5 ----- 2 files changed, 10 deletions(-) diff --git a/scripts/rsl_rl/cli_args.py b/scripts/rsl_rl/cli_args.py index c651a38..fde3046 100644 --- a/scripts/rsl_rl/cli_args.py +++ b/scripts/rsl_rl/cli_args.py @@ -8,11 +8,6 @@ # # SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - from __future__ import annotations import argparse diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py index 0f2d2cb..1bbb604 100644 --- a/scripts/rsl_rl/play.py +++ b/scripts/rsl_rl/play.py @@ -8,11 +8,6 @@ # # SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - """Script to train RL agent with RSL-RL.""" """Launch Isaac Sim Simulator first.""" From 5cea2a28ff0cbeeca8e1b294b073fc815e96e58f Mon Sep 17 00:00:00 2001 From: louislelay Date: Fri, 12 Sep 2025 12:58:51 +0200 Subject: [PATCH 5/6] corrects play script --- scripts/rsl_rl/play.py | 201 +++++++++--------- .../tasks/lift/agents/rsl_rl_ppo_cfg.py | 2 +- .../tasks/reach/agents/skrl_ppo_cfg.yaml | 4 +- 3 files changed, 100 insertions(+), 107 deletions(-) diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py index 1bbb604..fe375c0 100644 --- a/scripts/rsl_rl/play.py +++ b/scripts/rsl_rl/play.py @@ -1,14 +1,9 @@ -# Copyright (c) 2024-2025, Muammer Bay (LycheeAI), Louis Le Lay -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -# -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). # All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause -"""Script to train RL agent with RSL-RL.""" +"""Script to play a checkpoint if an RL agent from RSL-RL.""" """Launch Isaac Sim Simulator first.""" @@ -24,24 +19,27 @@ parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).") -parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") +parser.add_argument( + "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations." +) parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") parser.add_argument( "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point." ) parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") -parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.") parser.add_argument( - "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes." + "--use_pretrained_checkpoint", + action="store_true", + help="Use the pre-trained checkpoint from Nucleus.", ) -parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.") +parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") # append RSL-RL cli arguments cli_args.add_rsl_rl_args(parser) # append AppLauncher cli args AppLauncher.add_app_launcher_args(parser) +# parse the arguments args_cli, hydra_args = parser.parse_known_args() - # always enable cameras to record video if args_cli.video: args_cli.enable_cameras = True @@ -53,38 +51,15 @@ app_launcher = AppLauncher(args_cli) simulation_app = app_launcher.app -"""Check for minimum supported RSL-RL version.""" - -import importlib.metadata as metadata -import platform - -from packaging import version - -# check minimum supported rsl-rl version -RSL_RL_VERSION = "3.0.1" -installed_version = metadata.version("rsl-rl-lib") -if version.parse(installed_version) < version.parse(RSL_RL_VERSION): - if platform.system() == "Windows": - cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] - else: - cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] - print( - f"Please install the correct version of RSL-RL.\nExisting version is: '{installed_version}'" - f" and required version is: '{RSL_RL_VERSION}'.\nTo install the correct version, run:" - f"\n\n\t{' '.join(cmd)}\n" - ) - exit(1) - """Rest everything follows.""" -import os -from datetime import datetime - import gymnasium as gym -import isaaclab_tasks # noqa: F401 -import omni -import SO_100.tasks # noqa: F401 +import os +import time import torch + +from rsl_rl.runners import DistillationRunner, OnPolicyRunner + from isaaclab.envs import ( DirectMARLEnv, DirectMARLEnvCfg, @@ -92,64 +67,50 @@ ManagerBasedRLEnvCfg, multi_agent_to_single_agent, ) +from isaaclab.utils.assets import retrieve_file_path from isaaclab.utils.dict import print_dict -from isaaclab.utils.io import dump_pickle, dump_yaml -from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper +from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint + +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx + +import isaaclab_tasks # noqa: F401 from isaaclab_tasks.utils import get_checkpoint_path from isaaclab_tasks.utils.hydra import hydra_task_config -from rsl_rl.runners import DistillationRunner, OnPolicyRunner -torch.backends.cuda.matmul.allow_tf32 = True -torch.backends.cudnn.allow_tf32 = True -torch.backends.cudnn.deterministic = False -torch.backends.cudnn.benchmark = False +import SO_100.tasks # noqa: F401 @hydra_task_config(args_cli.task, args_cli.agent) def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): - """Train with RSL-RL agent.""" + """Play with RSL-RL agent.""" + # grab task name for checkpoint path + task_name = args_cli.task.split(":")[-1] + train_task_name = task_name.replace("-Play", "") + # override configurations with non-hydra CLI arguments - agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs - agent_cfg.max_iterations = ( - args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg.max_iterations - ) # set the environment seed # note: certain randomizations occur in the environment initialization so we set the seed here env_cfg.seed = agent_cfg.seed env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device - # multi-gpu training configuration - if args_cli.distributed: - env_cfg.sim.device = f"cuda:{app_launcher.local_rank}" - agent_cfg.device = f"cuda:{app_launcher.local_rank}" - - # set seed to have diversity in different threads - seed = agent_cfg.seed + app_launcher.local_rank - env_cfg.seed = seed - agent_cfg.seed = seed - # specify directory for logging experiments log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) log_root_path = os.path.abspath(log_root_path) - print(f"[INFO] Logging experiment in directory: {log_root_path}") - # specify directory for logging runs: {time-stamp}_{run_name} - log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - # The Ray Tune workflow extracts experiment name using the logging line below, hence, do not change it (see PR #2346, comment-2819298849) - print(f"Exact experiment name requested from command line: {log_dir}") - if agent_cfg.run_name: - log_dir += f"_{agent_cfg.run_name}" - log_dir = os.path.join(log_root_path, log_dir) - - # set the IO descriptors output directory if requested - if isinstance(env_cfg, ManagerBasedRLEnvCfg): - env_cfg.export_io_descriptors = args_cli.export_io_descriptors - env_cfg.io_descriptors_output_dir = log_dir + print(f"[INFO] Loading experiment from directory: {log_root_path}") + if args_cli.use_pretrained_checkpoint: + resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name) + if not resume_path: + print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.") + return + elif args_cli.checkpoint: + resume_path = retrieve_file_path(args_cli.checkpoint) else: - omni.log.warn( - "IO descriptors are only supported for manager based RL environments. No IO descriptors will be exported." - ) + resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) + + log_dir = os.path.dirname(resume_path) # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) @@ -158,15 +119,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen if isinstance(env.unwrapped, DirectMARLEnv): env = multi_agent_to_single_agent(env) - # save resume path before creating a new log_dir - if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation": - resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) - # wrap for video recording if args_cli.video: video_kwargs = { - "video_folder": os.path.join(log_dir, "videos", "train"), - "step_trigger": lambda step: step % args_cli.video_interval == 0, + "video_folder": os.path.join(log_dir, "videos", "play"), + "step_trigger": lambda step: step == 0, "video_length": args_cli.video_length, "disable_logger": True, } @@ -177,29 +134,65 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # wrap around environment for rsl-rl env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) - # create runner from rsl-rl + print(f"[INFO]: Loading model checkpoint from: {resume_path}") + # load previously trained model if agent_cfg.class_name == "OnPolicyRunner": - runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) elif agent_cfg.class_name == "DistillationRunner": - runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) else: raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") - # write git state to logs - runner.add_git_repo_to_log(__file__) - # load the checkpoint - if agent_cfg.resume or agent_cfg.algorithm.class_name == "Distillation": - print(f"[INFO]: Loading model checkpoint from: {resume_path}") - # load previously trained model - runner.load(resume_path) - - # dump the configuration into log-directory - dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) - dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg) - dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg) - dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg) - - # run training - runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True) + runner.load(resume_path) + + # obtain the trained policy for inference + policy = runner.get_inference_policy(device=env.unwrapped.device) + + # extract the neural network module + # we do this in a try-except to maintain backwards compatibility. + try: + # version 2.3 onwards + policy_nn = runner.alg.policy + except AttributeError: + # version 2.2 and below + policy_nn = runner.alg.actor_critic + + # extract the normalizer + if hasattr(policy_nn, "actor_obs_normalizer"): + normalizer = policy_nn.actor_obs_normalizer + elif hasattr(policy_nn, "student_obs_normalizer"): + normalizer = policy_nn.student_obs_normalizer + else: + normalizer = None + + # export policy to onnx/jit + export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") + export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") + export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") + + dt = env.unwrapped.step_dt + + # reset environment + obs = env.get_observations() + timestep = 0 + # simulate environment + while simulation_app.is_running(): + start_time = time.time() + # run everything in inference mode + with torch.inference_mode(): + # agent stepping + actions = policy(obs) + # env stepping + obs, _, _, _ = env.step(actions) + if args_cli.video: + timestep += 1 + # Exit the play loop after recording one video + if timestep == args_cli.video_length: + break + + # time delay for real-time evaluation + sleep_time = dt - (time.time() - start_time) + if args_cli.real_time and sleep_time > 0: + time.sleep(sleep_time) # close the simulator env.close() @@ -209,4 +202,4 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # run the main function main() # close sim app - simulation_app.close() + simulation_app.close() \ No newline at end of file diff --git a/source/SO_100/SO_100/tasks/lift/agents/rsl_rl_ppo_cfg.py b/source/SO_100/SO_100/tasks/lift/agents/rsl_rl_ppo_cfg.py index 1ae2c8e..27fa0d9 100644 --- a/source/SO_100/SO_100/tasks/lift/agents/rsl_rl_ppo_cfg.py +++ b/source/SO_100/SO_100/tasks/lift/agents/rsl_rl_ppo_cfg.py @@ -21,7 +21,7 @@ class LiftCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): num_steps_per_env = 24 max_iterations = 1500 save_interval = 50 - experiment_name = "so_arm100_lift" + experiment_name = "lift" empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, diff --git a/source/SO_100/SO_100/tasks/reach/agents/skrl_ppo_cfg.yaml b/source/SO_100/SO_100/tasks/reach/agents/skrl_ppo_cfg.yaml index 73ef462..15e2c15 100644 --- a/source/SO_100/SO_100/tasks/reach/agents/skrl_ppo_cfg.yaml +++ b/source/SO_100/SO_100/tasks/reach/agents/skrl_ppo_cfg.yaml @@ -66,8 +66,8 @@ agent: time_limit_bootstrap: False # logging and checkpoint experiment: - directory: "reach_so_arm100" - experiment_name: "reach_so_arm100" + directory: "reach" + experiment_name: "reach" write_interval: auto checkpoint_interval: auto From b1ba02448e06fb71a67a1a86b6223089ff003f44 Mon Sep 17 00:00:00 2001 From: louislelay Date: Fri, 12 Sep 2025 12:59:30 +0200 Subject: [PATCH 6/6] formats --- scripts/rsl_rl/play.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py index fe375c0..a9a98e5 100644 --- a/scripts/rsl_rl/play.py +++ b/scripts/rsl_rl/play.py @@ -1,4 +1,9 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# Copyright (c) 2024-2025, Muammer Bay (LycheeAI), Louis Le Lay +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +# +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. # All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause @@ -53,13 +58,13 @@ """Rest everything follows.""" -import gymnasium as gym import os import time -import torch - -from rsl_rl.runners import DistillationRunner, OnPolicyRunner +import gymnasium as gym +import isaaclab_tasks # noqa: F401 +import SO_100.tasks # noqa: F401 +import torch from isaaclab.envs import ( DirectMARLEnv, DirectMARLEnvCfg, @@ -70,14 +75,15 @@ from isaaclab.utils.assets import retrieve_file_path from isaaclab.utils.dict import print_dict from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint - -from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx - -import isaaclab_tasks # noqa: F401 +from isaaclab_rl.rsl_rl import ( + RslRlBaseRunnerCfg, + RslRlVecEnvWrapper, + export_policy_as_jit, + export_policy_as_onnx, +) from isaaclab_tasks.utils import get_checkpoint_path from isaaclab_tasks.utils.hydra import hydra_task_config - -import SO_100.tasks # noqa: F401 +from rsl_rl.runners import DistillationRunner, OnPolicyRunner @hydra_task_config(args_cli.task, args_cli.agent) @@ -202,4 +208,4 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # run the main function main() # close sim app - simulation_app.close() \ No newline at end of file + simulation_app.close()