-
Notifications
You must be signed in to change notification settings - Fork 2.6k
WIP: RL environment for the surface gripper #3885
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
99d6eca
81ea7b1
e4c76c4
fab2d12
9e14cc2
7e7d1f1
234a74f
b139fc1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,42 @@ | ||||||
| # Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). | ||||||
| # All rights reserved. | ||||||
| # | ||||||
| # SPDX-License-Identifier: BSD-3-Clause | ||||||
| import gymnasium as gym | ||||||
| import os | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style:
Suggested change
|
||||||
|
|
||||||
| from . import agents | ||||||
|
|
||||||
| ## | ||||||
| # Register Gym environments. | ||||||
| ## | ||||||
|
|
||||||
| ## | ||||||
| # Joint Position Control | ||||||
| ## | ||||||
|
|
||||||
| gym.register( | ||||||
| id="Isaac-Lift-Cube-UR10SG-Short-v0", | ||||||
| entry_point="isaaclab.envs:ManagerBasedRLEnv", | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: entry point should be |
||||||
| kwargs={ | ||||||
| "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGCubeLiftEnvCfg", | ||||||
| "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10LiftCubePPORunnerCfg", | ||||||
| "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", | ||||||
| "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", | ||||||
| "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", | ||||||
| }, | ||||||
| disable_env_checker=True, | ||||||
| ) | ||||||
|
|
||||||
| gym.register( | ||||||
| id="Isaac-Lift-Cube-UR10SG-Long-v0", | ||||||
| entry_point="isaaclab.envs:ManagerBasedRLEnv", | ||||||
| kwargs={ | ||||||
| "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGLongCubeLiftEnvCfg", | ||||||
| "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10LiftCubePPORunnerCfg", | ||||||
| "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", | ||||||
| "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", | ||||||
| "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", | ||||||
| }, | ||||||
| disable_env_checker=True, | ||||||
| ) | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: missing PLAY environment registrations ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: missing PLAY environment registrations (e.g., |
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| # Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). | ||
| # All rights reserved. | ||
| # | ||
| # SPDX-License-Identifier: BSD-3-Clause |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| # Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). | ||
| # All rights reserved. | ||
| # | ||
| # SPDX-License-Identifier: BSD-3-Clause | ||
|
|
||
| params: | ||
| seed: 42 | ||
|
|
||
| # environment wrapper clipping | ||
| env: | ||
| clip_observations: 100.0 | ||
| clip_actions: 100.0 | ||
|
|
||
| algo: | ||
| name: a2c_continuous | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: file named |
||
|
|
||
| model: | ||
| name: continuous_a2c_logstd | ||
|
|
||
| network: | ||
| name: actor_critic | ||
| separate: False | ||
| space: | ||
| continuous: | ||
| mu_activation: None | ||
| sigma_activation: None | ||
|
|
||
| mu_init: | ||
| name: default | ||
| sigma_init: | ||
| name: const_initializer | ||
| val: 0 | ||
| fixed_sigma: True | ||
| mlp: | ||
| units: [256, 128, 64] | ||
| activation: elu | ||
| d2rl: False | ||
|
|
||
| initializer: | ||
| name: default | ||
| regularizer: | ||
| name: None | ||
|
|
||
| load_checkpoint: False # flag which sets whether to load the checkpoint | ||
| load_path: '' # path to the checkpoint to load | ||
|
|
||
| config: | ||
| name: UR10SG_lift | ||
| env_name: rlgpu | ||
| device: 'cuda:0' | ||
| device_name: 'cuda:0' | ||
| multi_gpu: False | ||
| ppo: True | ||
| mixed_precision: False | ||
| normalize_input: True | ||
| normalize_value: True | ||
| value_bootstrap: False | ||
| num_actors: -1 | ||
| reward_shaper: | ||
| scale_value: 0.01 | ||
| normalize_advantage: True | ||
| gamma: 0.99 | ||
| tau: 0.95 | ||
| learning_rate: 1e-4 | ||
| lr_schedule: adaptive | ||
| schedule_type: legacy | ||
| kl_threshold: 0.01 | ||
| score_to_win: 100000000 | ||
| max_epochs: 1500 | ||
| save_best_after: 100 | ||
| save_frequency: 50 | ||
| print_stats: True | ||
| grad_norm: 1.0 | ||
| entropy_coef: 0.001 | ||
| truncate_grads: True | ||
| e_clip: 0.2 | ||
| horizon_length: 24 | ||
| minibatch_size: 24576 | ||
| mini_epochs: 8 | ||
| critic_coef: 4 | ||
| clip_value: True | ||
| clip_actions: False | ||
| seq_len: 4 | ||
| bounds_loss_coef: 0.0001 | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,38 @@ | ||||||
| # Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). | ||||||
| # All rights reserved. | ||||||
| # | ||||||
| # SPDX-License-Identifier: BSD-3-Clause | ||||||
|
|
||||||
| from isaaclab.utils import configclass | ||||||
|
|
||||||
| from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg | ||||||
|
|
||||||
|
|
||||||
| @configclass | ||||||
| class UR10LiftCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): | ||||||
| num_steps_per_env = 24 | ||||||
| max_iterations = 1500 | ||||||
| save_interval = 50 | ||||||
| experiment_name = "franka_lift" | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic:
Suggested change
|
||||||
| policy = RslRlPpoActorCriticCfg( | ||||||
| init_noise_std=1.0, | ||||||
| actor_obs_normalization=False, | ||||||
| critic_obs_normalization=False, | ||||||
| actor_hidden_dims=[256, 128, 64], | ||||||
| critic_hidden_dims=[256, 128, 64], | ||||||
| activation="elu", | ||||||
| ) | ||||||
| algorithm = RslRlPpoAlgorithmCfg( | ||||||
| value_loss_coef=1.0, | ||||||
| use_clipped_value_loss=True, | ||||||
| clip_param=0.2, | ||||||
| entropy_coef=0.006, | ||||||
| num_learning_epochs=5, | ||||||
| num_mini_batches=4, | ||||||
| learning_rate=1.0e-4, | ||||||
| schedule="adaptive", | ||||||
| gamma=0.98, | ||||||
| lam=0.95, | ||||||
| desired_kl=0.01, | ||||||
| max_grad_norm=1.0, | ||||||
| ) | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| # Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). | ||
| # All rights reserved. | ||
| # | ||
| # SPDX-License-Identifier: BSD-3-Clause | ||
|
|
||
| # Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 | ||
| seed: 42 | ||
|
|
||
| # epoch * n_steps * nenvs: 500×512*8*8 | ||
| n_timesteps: 16384000 | ||
| policy: 'MlpPolicy' | ||
| n_steps: 64 | ||
| # mini batch size: num_envs * nsteps / nminibatches 2048×512÷2048 | ||
| batch_size: 192 | ||
| gae_lambda: 0.95 | ||
| gamma: 0.99 | ||
| n_epochs: 8 | ||
| ent_coef: 0.00 | ||
| vf_coef: 0.0001 | ||
| learning_rate: !!float 3e-4 | ||
| clip_range: 0.2 | ||
| policy_kwargs: | ||
| activation_fn: 'nn.ELU' | ||
| net_arch: | ||
| pi: [256, 128, 64] | ||
| vf: [256, 128, 64] | ||
| target_kl: 0.01 | ||
| max_grad_norm: 1.0 | ||
|
|
||
| # # Uses VecNormalize class to normalize obs | ||
| # normalize_input: True | ||
| # # Uses VecNormalize class to normalize rew | ||
| # normalize_value: True | ||
| # clip_obs: 5 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| # Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). | ||
| # All rights reserved. | ||
| # | ||
| # SPDX-License-Identifier: BSD-3-Clause | ||
|
|
||
| seed: 42 | ||
|
|
||
|
|
||
| # Models are instantiated using skrl's model instantiator utility | ||
| # https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html | ||
| models: | ||
| separate: False | ||
| policy: # see gaussian_model parameters | ||
| class: GaussianMixin | ||
| clip_actions: False | ||
| clip_log_std: True | ||
| min_log_std: -20.0 | ||
| max_log_std: 2.0 | ||
| initial_log_std: 0.0 | ||
| network: | ||
| - name: net | ||
| input: OBSERVATIONS | ||
| layers: [256, 128, 64] | ||
| activations: elu | ||
| output: ACTIONS | ||
| value: # see deterministic_model parameters | ||
| class: DeterministicMixin | ||
| clip_actions: False | ||
| network: | ||
| - name: net | ||
| input: OBSERVATIONS | ||
| layers: [256, 128, 64] | ||
| activations: elu | ||
| output: ONE | ||
|
|
||
|
|
||
| # Rollout memory | ||
| # https://skrl.readthedocs.io/en/latest/api/memories/random.html | ||
| memory: | ||
| class: RandomMemory | ||
| memory_size: -1 # automatically determined (same as agent:rollouts) | ||
|
|
||
|
|
||
| # PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) | ||
| # https://skrl.readthedocs.io/en/latest/api/agents/ppo.html | ||
| agent: | ||
| class: PPO | ||
| rollouts: 24 | ||
| learning_epochs: 8 | ||
| mini_batches: 4 | ||
| discount_factor: 0.99 | ||
| lambda: 0.95 | ||
| learning_rate: 1.0e-04 | ||
| learning_rate_scheduler: KLAdaptiveLR | ||
| learning_rate_scheduler_kwargs: | ||
| kl_threshold: 0.01 | ||
| state_preprocessor: RunningStandardScaler | ||
| state_preprocessor_kwargs: null | ||
| value_preprocessor: RunningStandardScaler | ||
| value_preprocessor_kwargs: null | ||
| random_timesteps: 0 | ||
| learning_starts: 0 | ||
| grad_norm_clip: 1.0 | ||
| ratio_clip: 0.2 | ||
| value_clip: 0.2 | ||
| clip_predicted_values: True | ||
| entropy_loss_scale: 0.001 | ||
| value_loss_scale: 2.0 | ||
| kl_threshold: 0.0 | ||
| rewards_shaper_scale: 0.01 | ||
| time_limit_bootstrap: False | ||
| # logging and checkpoint | ||
| experiment: | ||
| directory: "ur10sg_lift" | ||
| experiment_name: "" | ||
| write_interval: auto | ||
| checkpoint_interval: auto | ||
|
|
||
|
|
||
| # Sequential trainer | ||
| # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html | ||
| trainer: | ||
| class: SequentialTrainer | ||
| timesteps: 36000 | ||
| environment_info: log |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
style: changed wrist_2_joint from 1.5707 to -1.5707 (90° rotation). Check that this new pose is correct for the surface gripper orientation and doesn't cause collisions or unexpected behavior in existing environments using
UR10_LONG_SUCTION_CFG