Skip to content
2 changes: 2 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Upcoming version (not yet released)
Added
^^^^^

- Added ``Mjlab-Velocity-Flat-Run-Unitree-G1`` task with velocity command
curriculum for training the G1 to run on flat terrain.
- Added ``STAIRS_TERRAINS_CFG`` terrain preset for progressive stair
curriculum training and ``@terrain_preset`` decorator for composing
terrain configurations from reusable presets.
Expand Down
16 changes: 13 additions & 3 deletions src/mjlab/sensor/terrain_height_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,19 @@ def _compute_data(self) -> TerrainHeightData:
heights = frame_z.unsqueeze(-1) - hit_z # [B, F, N]

miss = raw.distances.view(B, F, N) < 0
heights = torch.where(
miss, torch.full_like(heights, self.cfg.max_distance), heights
)
# When all rays for a frame miss there are two cases:
# 1. Frame is below or at the terrain surface (rays start below and
# point down, never hitting anything). True clearance is ~0.
# 2. Frame is genuinely above max_distance. True clearance >=
# max_distance.
# We distinguish them using frame_z clamped to [0, max_distance].
# For partial misses (some rays hit, some don't), max_distance is
# the right fallback since the frame is above terrain.
all_miss = miss.all(dim=-1, keepdim=True).expand_as(miss) # [B, F, N]
fallback = frame_z.unsqueeze(-1).clamp(0, self.cfg.max_distance)
fallback = fallback.expand_as(heights) # [B, F, N]
miss_value = torch.where(all_miss, fallback, self.cfg.max_distance)
heights = torch.where(miss, miss_value, heights)

reduction = self.cfg.reduction
if reduction == "min":
Expand Down
9 changes: 9 additions & 0 deletions src/mjlab/tasks/velocity/config/g1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from .env_cfgs import (
unitree_g1_flat_env_cfg,
unitree_g1_flat_run_env_cfg,
unitree_g1_rough_env_cfg,
)
from .rl_cfg import unitree_g1_ppo_runner_cfg
Expand All @@ -22,3 +23,11 @@
rl_cfg=unitree_g1_ppo_runner_cfg(),
runner_cls=VelocityOnPolicyRunner,
)

register_mjlab_task(
task_id="Mjlab-Velocity-Flat-Run-Unitree-G1",
env_cfg=unitree_g1_flat_run_env_cfg(),
play_env_cfg=unitree_g1_flat_run_env_cfg(play=True),
rl_cfg=unitree_g1_ppo_runner_cfg(),
runner_cls=VelocityOnPolicyRunner,
)
34 changes: 25 additions & 9 deletions src/mjlab/tasks/velocity/config/g1/env_cfgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from mjlab.envs import ManagerBasedRlEnvCfg
from mjlab.envs import mdp as envs_mdp
from mjlab.envs.mdp.actions import JointPositionActionCfg
from mjlab.managers.curriculum_manager import CurriculumTermCfg
from mjlab.managers.event_manager import EventTermCfg
from mjlab.managers.reward_manager import RewardTermCfg
from mjlab.sensor import (
Expand All @@ -26,9 +27,8 @@ def unitree_g1_rough_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
"""Create Unitree G1 rough terrain velocity configuration."""
cfg = make_velocity_env_cfg()

cfg.sim.mujoco.ccd_iterations = 500
cfg.sim.contact_sensor_maxmatch = 500
cfg.sim.nconmax = 70
cfg.sim.njmax = 200
cfg.sim.nconmax = 30
Comment on lines +30 to +31
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, such a low njmax/nconmax work?


cfg.scene.entities = {"robot": get_g1_robot_cfg()}

Expand Down Expand Up @@ -189,10 +189,7 @@ def unitree_g1_flat_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
"""Create Unitree G1 flat terrain velocity configuration."""
cfg = unitree_g1_rough_env_cfg(play=play)

cfg.sim.njmax = 300
cfg.sim.mujoco.ccd_iterations = 50
cfg.sim.contact_sensor_maxmatch = 64
cfg.sim.nconmax = None
cfg.sim.njmax = 170

# Switch to flat terrain.
assert cfg.scene.terrain is not None
Expand All @@ -211,10 +208,29 @@ def unitree_g1_flat_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
# Disable terrain curriculum (not present in play mode since rough clears all).
cfg.curriculum.pop("terrain_levels", None)

return cfg


def unitree_g1_flat_run_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
"""G1 flat terrain with velocity curriculum for learning to run."""
cfg = unitree_g1_flat_env_cfg(play=play)

cfg.curriculum["command_vel"] = CurriculumTermCfg(
func=mdp.commands_vel,
params={
"command_name": "twist",
"velocity_stages": [
{"step": 0, "lin_vel_x": (-1.0, 1.0)},
{"step": 5000 * 24, "lin_vel_x": (-1.5, 2.0), "ang_vel_z": (-1.5, 1.5)},
{"step": 10000 * 24, "lin_vel_x": (-2.0, 3.0), "ang_vel_z": (-2.0, 2.0)},
],
},
)

if play:
twist_cmd = cfg.commands["twist"]
assert isinstance(twist_cmd, UniformVelocityCommandCfg)
twist_cmd.ranges.lin_vel_x = (-1.5, 2.0)
twist_cmd.ranges.ang_vel_z = (-0.7, 0.7)
twist_cmd.ranges.lin_vel_x = (-2.0, 3.0)
twist_cmd.ranges.ang_vel_z = (-2.0, 2.0)

return cfg
4 changes: 2 additions & 2 deletions src/mjlab/tasks/velocity/config/go1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
unitree_go1_flat_env_cfg,
unitree_go1_rough_env_cfg,
)
from .rl_cfg import unitree_go1_ppo_runner_cfg
from .rl_cfg import unitree_go1_flat_ppo_runner_cfg, unitree_go1_ppo_runner_cfg

register_mjlab_task(
task_id="Mjlab-Velocity-Rough-Unitree-Go1",
Expand All @@ -19,6 +19,6 @@
task_id="Mjlab-Velocity-Flat-Unitree-Go1",
env_cfg=unitree_go1_flat_env_cfg(),
play_env_cfg=unitree_go1_flat_env_cfg(play=True),
rl_cfg=unitree_go1_ppo_runner_cfg(),
rl_cfg=unitree_go1_flat_ppo_runner_cfg(),
runner_cls=VelocityOnPolicyRunner,
)
35 changes: 19 additions & 16 deletions src/mjlab/tasks/velocity/config/go1/env_cfgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
TerrainHeightSensorCfg,
)
from mjlab.tasks.velocity import mdp
from mjlab.tasks.velocity.mdp import UniformVelocityCommandCfg
from mjlab.tasks.velocity.velocity_env_cfg import make_velocity_env_cfg

TerrainType = Literal["rough", "obstacles"]
Expand All @@ -35,10 +34,11 @@ def unitree_go1_rough_env_cfg(
"""Create Unitree Go1 rough terrain velocity configuration."""
cfg = make_velocity_env_cfg()

cfg.sim.mujoco.ccd_iterations = 500
cfg.sim.njmax = 120
cfg.sim.nconmax = 20

cfg.sim.mujoco.impratio = 10
cfg.sim.mujoco.cone = "elliptic"
cfg.sim.contact_sensor_maxmatch = 500

cfg.scene.entities = {"robot": get_go1_robot_cfg()}

Expand Down Expand Up @@ -203,9 +203,11 @@ def unitree_go1_rough_env_cfg(
for reward_name in ["foot_clearance", "foot_slip"]:
cfg.rewards[reward_name].params["asset_cfg"].site_names = site_names

cfg.rewards["body_ang_vel"].weight = 0.0
cfg.rewards["angular_momentum"].weight = 0.0
cfg.rewards["body_ang_vel"].weight = -1e-4
cfg.rewards["angular_momentum"].weight = -1e-4
cfg.rewards["air_time"].weight = 0.0
cfg.rewards["joint_vel_l2"] = RewardTermCfg(func=mdp.joint_vel_l2, weight=-1e-5)
cfg.rewards["joint_acc_l2"] = RewardTermCfg(func=mdp.joint_acc_l2, weight=-1e-7)

# Per-body-group collision penalties.
cfg.rewards["self_collisions"] = RewardTermCfg(
Expand All @@ -218,6 +220,11 @@ def unitree_go1_rough_env_cfg(
weight=-0.1,
params={"sensor_name": shank_ground_cfg.name},
)
cfg.rewards["thigh_collision"] = RewardTermCfg(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WDYT about an alive reward? I was thinking it could be helpful especially for rough terrain

func=mdp.self_collision_cost,
weight=-0.5,
params={"sensor_name": thigh_ground_cfg.name},
)
cfg.rewards["trunk_head_collision"] = RewardTermCfg(
func=mdp.self_collision_cost,
weight=-0.1,
Expand Down Expand Up @@ -262,10 +269,7 @@ def unitree_go1_flat_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
"""Create Unitree Go1 flat terrain velocity configuration."""
cfg = unitree_go1_rough_env_cfg(play=play)

cfg.sim.njmax = 300
cfg.sim.mujoco.ccd_iterations = 50
cfg.sim.contact_sensor_maxmatch = 64
cfg.sim.nconmax = None
cfg.sim.njmax = 50

# Switch to flat terrain.
assert cfg.scene.terrain is not None
Expand All @@ -288,7 +292,12 @@ def unitree_go1_flat_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
cfg.rewards["upright"].params.pop("terrain_sensor_names", None)

# Remove granular collision rewards (not useful on flat ground).
for key in ("self_collisions", "shank_collision", "trunk_head_collision"):
for key in (
"self_collisions",
"shank_collision",
"thigh_collision",
"trunk_head_collision",
):
cfg.rewards.pop(key, None)

# On flat terrain fell_over is sufficient; thigh contact implies fallen.
Expand All @@ -302,10 +311,4 @@ def unitree_go1_flat_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
# Disable terrain curriculum (not present in play mode since rough clears all).
cfg.curriculum.pop("terrain_levels", None)

if play:
twist_cmd = cfg.commands["twist"]
assert isinstance(twist_cmd, UniformVelocityCommandCfg)
twist_cmd.ranges.lin_vel_x = (-1.5, 2.0)
twist_cmd.ranges.ang_vel_z = (-0.7, 0.7)

return cfg
13 changes: 11 additions & 2 deletions src/mjlab/tasks/velocity/config/go1/rl_cfg.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
"""RL configuration for Unitree Go1 velocity task."""

from functools import partial

from mjlab.rl import (
RslRlModelCfg,
RslRlOnPolicyRunnerCfg,
RslRlPpoAlgorithmCfg,
)


def unitree_go1_ppo_runner_cfg() -> RslRlOnPolicyRunnerCfg:
def unitree_go1_ppo_runner_cfg(
max_iterations: int = 10_000,
) -> RslRlOnPolicyRunnerCfg:
"""Create RL runner configuration for Unitree Go1 velocity task."""
return RslRlOnPolicyRunnerCfg(
actor=RslRlModelCfg(
Expand Down Expand Up @@ -42,5 +46,10 @@ def unitree_go1_ppo_runner_cfg() -> RslRlOnPolicyRunnerCfg:
experiment_name="go1_velocity",
save_interval=50,
num_steps_per_env=24,
max_iterations=10_000,
max_iterations=max_iterations,
)


unitree_go1_flat_ppo_runner_cfg = partial(
unitree_go1_ppo_runner_cfg, max_iterations=1_500
)
7 changes: 3 additions & 4 deletions src/mjlab/tasks/velocity/mdp/curriculums.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ def terrain_levels_vel(
# Robots that walked far enough progress to harder terrains.
move_up = distance > terrain_generator.size[0] / 2

# Robots that walked less than half of their required distance go to
# simpler terrains.
# Robots that walked less than half of their required distance go to simpler terrains.
move_down = (
distance < torch.norm(command[env_ids, :2], dim=1) * env.max_episode_length_s * 0.5
)
Expand All @@ -64,8 +63,8 @@ def terrain_levels_vel(
"max": torch.max(levels),
}

# In curriculum mode num_cols == num_terrains (one column per type),
# so the column index directly maps to the sub-terrain name.
# In curriculum mode num_cols == num_terrains (one column per type), so the column
# index directly maps to the sub-terrain name.
sub_terrain_names = list(terrain_generator.sub_terrains.keys())
terrain_origins = terrain.terrain_origins
assert terrain_origins is not None
Expand Down
5 changes: 5 additions & 0 deletions src/mjlab/tasks/velocity/mdp/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,11 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRlEnv):
)
self.step_dt = env.step_dt

def reset(self, env_ids: torch.Tensor | slice | None = None) -> None:
if env_ids is None:
env_ids = slice(None)
self.peak_heights[env_ids] = 0.0

def __call__(
self,
env: ManagerBasedRlEnv,
Expand Down
17 changes: 3 additions & 14 deletions src/mjlab/tasks/velocity/velocity_env_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def make_velocity_env_cfg() -> ManagerBasedRlEnvCfg:
"joint_pos": ObservationTermCfg(
func=mdp.joint_pos_rel,
noise=Unoise(n_min=-0.01, n_max=0.01),
params={"biased": True},
),
"joint_vel": ObservationTermCfg(
func=mdp.joint_vel_rel,
Expand All @@ -111,6 +112,7 @@ def make_velocity_env_cfg() -> ManagerBasedRlEnvCfg:

critic_terms = {
**actor_terms,
"joint_pos": ObservationTermCfg(func=mdp.joint_pos_rel),
"height_scan": ObservationTermCfg(
func=envs_mdp.height_scan,
params={"sensor_name": "terrain_scan"},
Expand Down Expand Up @@ -187,7 +189,7 @@ def make_velocity_env_cfg() -> ManagerBasedRlEnvCfg:
ranges=UniformVelocityCommandCfg.Ranges(
lin_vel_x=(-1.0, 1.0),
lin_vel_y=(-1.0, 1.0),
ang_vel_z=(-0.5, 0.5),
ang_vel_z=(-1.0, 1.0),
heading=(-math.pi, math.pi),
),
)
Expand Down Expand Up @@ -395,17 +397,6 @@ def make_velocity_env_cfg() -> ManagerBasedRlEnvCfg:
func=mdp.terrain_levels_vel,
params={"command_name": "twist"},
),
"command_vel": CurriculumTermCfg(
func=mdp.commands_vel,
params={
"command_name": "twist",
"velocity_stages": [
{"step": 0, "lin_vel_x": (-1.0, 1.0), "ang_vel_z": (-0.5, 0.5)},
{"step": 5000 * 24, "lin_vel_x": (-1.5, 2.0), "ang_vel_z": (-0.7, 0.7)},
{"step": 10000 * 24, "lin_vel_x": (-2.0, 3.0)},
],
},
),
}

##
Expand Down Expand Up @@ -440,8 +431,6 @@ def make_velocity_env_cfg() -> ManagerBasedRlEnvCfg:
azimuth=90.0,
),
sim=SimulationCfg(
nconmax=35,
njmax=1500,
mujoco=MujocoCfg(
timestep=0.005,
iterations=10,
Expand Down
Loading
Loading