Provides PBT training cfg example for Isaac-Dexsuite-Kuka-Allegro-Lift-v0 env for rl_games (#3553)

ooctipus · web-flow · commit faa96dfcbcbc · 2025-09-30T06:33:54.000+09:00
# Description This PR provides a PBT builtin training example for Isaac-Dexsuite-Kuka-Allegro-Lift-v0 environment. Though we had introduction and explanation for how to run PBT, We didn't have an builtin example. This will make using PBT easier for user. Fixes # (issue)  ## Type of change - New feature (non-breaking change which adds functionality) ## Screenshots Please attach before and after screenshots of the change if applicable.  ## Checklist - [x] I have read and understood the [contribution guidelines](https://isaac-sim.github.io/IsaacLab/main/source/refs/contributing.html) - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there
diff --git a/docs/source/features/population_based_training.rst b/docs/source/features/population_based_training.rst
@@ -80,9 +80,8 @@ You must start **one process per policy** and point them to the **same workspace
 Minimal flags you need:
 
 * ``agent.pbt.enabled=True``
-* ``agent.pbt.workspace=<path/to/shared_folder>``
+* ``agent.pbt.directory=<path/to/shared_folder>``
 * ``agent.pbt.policy_idx=<0..num_policies-1>``
-* ``agent.pbt.num_policies=<N>``
 
 .. note::
    All processes must use the same ``agent.pbt.workspace`` so they can see each other's checkpoints.
@@ -93,8 +92,37 @@ Minimal flags you need:
 Tips
 ----
 
-* Keep checkpoints fast: reduce ``interval_steps`` only if you really need tighter PBT cadence.
-* It is recommended to run 6+ workers to see benefit of pbt
+* Keep checkpoints reasonable: reduce ``interval_steps`` only if you really need tighter PBT cadence.
+* Use larger ``threshold_std`` and ``threshold_abs`` for greater population diversity.
+* It is recommended to run 6+ workers to see benefit of pbt.
+
+
+Training Example
+----------------
+
+We provide a reference PPO config here for task:
+`Isaac-Dexsuite-Kuka-Allegro-Lift-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/config/kuka_allegro/agents/rl_games_ppo_cfg.yaml>`_.
+For the best logging experience, we recommend using wandb for the logging in the script.
+
+Launch *N* workers, where *n* indicates each worker index:
+
+.. code-block:: bash
+
+   # Run this once per worker (n = 0..N-1), all pointing to the same directory/workspace
+   ./isaaclab.sh -p scripts/reinforcement_learning/rl_games/train.py \
+     --seed=<n> \
+     --task=Isaac-Dexsuite-Kuka-Allegro-Lift-v0 \
+     --num_envs=8192 \
+     --headless \
+     --track \
+     --wandb-name=idx<n> \
+     --wandb-entity=<**entity**> \
+     --wandb-project-name=<**project**>
+     agent.pbt.enabled=True \
+     agent.pbt.num_policies=<N> \
+     agent.pbt.policy_idx=<n> \
+     agent.pbt.workspace=<**pbt_workspace_name**> \
+     agent.pbt.directory=<**/path/to/shared_folder**> \
 
 
 References
diff --git a/source/isaaclab_tasks/config/extension.toml b/source/isaaclab_tasks/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.11.0"
+version = "0.11.1"
 
 # Description
 title = "Isaac Lab Environments"
diff --git a/source/isaaclab_tasks/docs/CHANGELOG.rst b/source/isaaclab_tasks/docs/CHANGELOG.rst
@@ -1,6 +1,15 @@
 Changelog
 ---------
 
+0.11.1 (2025-09-24)
+~~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added dextrous lifting pbt configuration example cfg for rl_games.
+
+
 0.11.0 (2025-09-07)
 ~~~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/config/kuka_allegro/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/config/kuka_allegro/agents/rl_games_ppo_cfg.yaml
@@ -84,3 +84,28 @@ params:
     clip_actions: False
     seq_len: 4
     bounds_loss_coef: 0.0001
+
+pbt:
+  enabled: False
+  policy_idx: 0  # policy index in a population
+  num_policies: 8  # total number of policies in the population
+  directory: .
+  workspace: "pbt_workspace"  # suffix of the workspace dir name inside train_dir
+  objective: episode.Curriculum/adr
+
+  # PBT hyperparams
+  interval_steps: 50000000
+  threshold_std: 0.1
+  threshold_abs: 0.025
+  mutation_rate: 0.25
+  change_range: [1.1, 2.0]
+  mutation:
+
+    agent.params.config.learning_rate: "mutate_float"
+    agent.params.config.grad_norm: "mutate_float"
+    agent.params.config.entropy_coef: "mutate_float"
+    agent.params.config.critic_coef: "mutate_float"
+    agent.params.config.bounds_loss_coef: "mutate_float"
+    agent.params.config.kl_threshold: "mutate_float"
+    agent.params.config.gamma: "mutate_discount"
+    agent.params.config.tau: "mutate_discount"