mujocolab · kevinzakka · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -8,6 +8,8 @@ Upcoming version (not yet released)
 Added
 ^^^^^
 
+- Added ``Mjlab-Velocity-Flat-Run-Unitree-G1`` task with velocity command
+  curriculum for training the G1 to run on flat terrain.
 - Added ``STAIRS_TERRAINS_CFG`` terrain preset for progressive stair
   curriculum training and ``@terrain_preset`` decorator for composing
   terrain configurations from reusable presets.

diff --git a/src/mjlab/sensor/terrain_height_sensor.py b/src/mjlab/sensor/terrain_height_sensor.py
@@ -64,9 +64,19 @@ def _compute_data(self) -> TerrainHeightData:
     heights = frame_z.unsqueeze(-1) - hit_z  # [B, F, N]
 
     miss = raw.distances.view(B, F, N) < 0
-    heights = torch.where(
-      miss, torch.full_like(heights, self.cfg.max_distance), heights
-    )
+    # When all rays for a frame miss there are two cases:
+    # 1. Frame is below or at the terrain surface (rays start below and
+    #    point down, never hitting anything). True clearance is ~0.
+    # 2. Frame is genuinely above max_distance. True clearance >=
+    #    max_distance.
+    # We distinguish them using frame_z clamped to [0, max_distance].
+    # For partial misses (some rays hit, some don't), max_distance is
+    # the right fallback since the frame is above terrain.
+    all_miss = miss.all(dim=-1, keepdim=True).expand_as(miss)  # [B, F, N]
+    fallback = frame_z.unsqueeze(-1).clamp(0, self.cfg.max_distance)
+    fallback = fallback.expand_as(heights)  # [B, F, N]
+    miss_value = torch.where(all_miss, fallback, self.cfg.max_distance)
+    heights = torch.where(miss, miss_value, heights)
 
     reduction = self.cfg.reduction
     if reduction == "min":

diff --git a/src/mjlab/tasks/velocity/config/g1/__init__.py b/src/mjlab/tasks/velocity/config/g1/__init__.py
@@ -3,6 +3,7 @@
 
 from .env_cfgs import (
   unitree_g1_flat_env_cfg,
+  unitree_g1_flat_run_env_cfg,
   unitree_g1_rough_env_cfg,
 )
 from .rl_cfg import unitree_g1_ppo_runner_cfg
@@ -22,3 +23,11 @@
   rl_cfg=unitree_g1_ppo_runner_cfg(),
   runner_cls=VelocityOnPolicyRunner,
 )
+
+register_mjlab_task(
+  task_id="Mjlab-Velocity-Flat-Run-Unitree-G1",
+  env_cfg=unitree_g1_flat_run_env_cfg(),
+  play_env_cfg=unitree_g1_flat_run_env_cfg(play=True),
+  rl_cfg=unitree_g1_ppo_runner_cfg(),
+  runner_cls=VelocityOnPolicyRunner,
+)
diff --git a/src/mjlab/tasks/velocity/config/g1/env_cfgs.py b/src/mjlab/tasks/velocity/config/g1/env_cfgs.py
@@ -7,6 +7,7 @@
 from mjlab.envs import ManagerBasedRlEnvCfg
 from mjlab.envs import mdp as envs_mdp
 from mjlab.envs.mdp.actions import JointPositionActionCfg
+from mjlab.managers.curriculum_manager import CurriculumTermCfg
 from mjlab.managers.event_manager import EventTermCfg
 from mjlab.managers.reward_manager import RewardTermCfg
 from mjlab.sensor import (
@@ -26,9 +27,8 @@ def unitree_g1_rough_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
   """Create Unitree G1 rough terrain velocity configuration."""
   cfg = make_velocity_env_cfg()
 
-  cfg.sim.mujoco.ccd_iterations = 500
-  cfg.sim.contact_sensor_maxmatch = 500
-  cfg.sim.nconmax = 70
+  cfg.sim.njmax = 200
+  cfg.sim.nconmax = 30
 
   cfg.scene.entities = {"robot": get_g1_robot_cfg()}
 
@@ -189,10 +189,7 @@ def unitree_g1_flat_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
   """Create Unitree G1 flat terrain velocity configuration."""
   cfg = unitree_g1_rough_env_cfg(play=play)
 
-  cfg.sim.njmax = 300
-  cfg.sim.mujoco.ccd_iterations = 50
-  cfg.sim.contact_sensor_maxmatch = 64
-  cfg.sim.nconmax = None
+  cfg.sim.njmax = 170
 
   # Switch to flat terrain.
   assert cfg.scene.terrain is not None
@@ -211,10 +208,29 @@ def unitree_g1_flat_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
   # Disable terrain curriculum (not present in play mode since rough clears all).
   cfg.curriculum.pop("terrain_levels", None)
 
+  return cfg
+
+
+def unitree_g1_flat_run_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
+  """G1 flat terrain with velocity curriculum for learning to run."""
+  cfg = unitree_g1_flat_env_cfg(play=play)
+
+  cfg.curriculum["command_vel"] = CurriculumTermCfg(
+    func=mdp.commands_vel,
+    params={
+      "command_name": "twist",
+      "velocity_stages": [
+        {"step": 0, "lin_vel_x": (-1.0, 1.0)},
+        {"step": 5000 * 24, "lin_vel_x": (-1.5, 2.0), "ang_vel_z": (-1.5, 1.5)},
+        {"step": 10000 * 24, "lin_vel_x": (-2.0, 3.0), "ang_vel_z": (-2.0, 2.0)},
+      ],
+    },
+  )
+
   if play:
     twist_cmd = cfg.commands["twist"]
     assert isinstance(twist_cmd, UniformVelocityCommandCfg)
-    twist_cmd.ranges.lin_vel_x = (-1.5, 2.0)
-    twist_cmd.ranges.ang_vel_z = (-0.7, 0.7)
+    twist_cmd.ranges.lin_vel_x = (-2.0, 3.0)
+    twist_cmd.ranges.ang_vel_z = (-2.0, 2.0)
 
   return cfg
diff --git a/src/mjlab/tasks/velocity/config/go1/__init__.py b/src/mjlab/tasks/velocity/config/go1/__init__.py
@@ -5,7 +5,7 @@
   unitree_go1_flat_env_cfg,
   unitree_go1_rough_env_cfg,
 )
-from .rl_cfg import unitree_go1_ppo_runner_cfg
+from .rl_cfg import unitree_go1_flat_ppo_runner_cfg, unitree_go1_ppo_runner_cfg
 
 register_mjlab_task(
   task_id="Mjlab-Velocity-Rough-Unitree-Go1",
@@ -19,6 +19,6 @@
   task_id="Mjlab-Velocity-Flat-Unitree-Go1",
   env_cfg=unitree_go1_flat_env_cfg(),
   play_env_cfg=unitree_go1_flat_env_cfg(play=True),
-  rl_cfg=unitree_go1_ppo_runner_cfg(),
+  rl_cfg=unitree_go1_flat_ppo_runner_cfg(),
   runner_cls=VelocityOnPolicyRunner,
 )
diff --git a/src/mjlab/tasks/velocity/config/go1/env_cfgs.py b/src/mjlab/tasks/velocity/config/go1/env_cfgs.py
@@ -23,7 +23,6 @@
   TerrainHeightSensorCfg,
 )
 from mjlab.tasks.velocity import mdp
-from mjlab.tasks.velocity.mdp import UniformVelocityCommandCfg
 from mjlab.tasks.velocity.velocity_env_cfg import make_velocity_env_cfg
 
 TerrainType = Literal["rough", "obstacles"]
@@ -35,10 +34,11 @@ def unitree_go1_rough_env_cfg(
   """Create Unitree Go1 rough terrain velocity configuration."""
   cfg = make_velocity_env_cfg()
 
-  cfg.sim.mujoco.ccd_iterations = 500
+  cfg.sim.njmax = 120
+  cfg.sim.nconmax = 20
+
   cfg.sim.mujoco.impratio = 10
   cfg.sim.mujoco.cone = "elliptic"
-  cfg.sim.contact_sensor_maxmatch = 500
 
   cfg.scene.entities = {"robot": get_go1_robot_cfg()}
 
@@ -203,9 +203,11 @@ def unitree_go1_rough_env_cfg(
   for reward_name in ["foot_clearance", "foot_slip"]:
     cfg.rewards[reward_name].params["asset_cfg"].site_names = site_names
 
-  cfg.rewards["body_ang_vel"].weight = 0.0
-  cfg.rewards["angular_momentum"].weight = 0.0
+  cfg.rewards["body_ang_vel"].weight = -1e-4
+  cfg.rewards["angular_momentum"].weight = -1e-4
   cfg.rewards["air_time"].weight = 0.0
+  cfg.rewards["joint_vel_l2"] = RewardTermCfg(func=mdp.joint_vel_l2, weight=-1e-5)
+  cfg.rewards["joint_acc_l2"] = RewardTermCfg(func=mdp.joint_acc_l2, weight=-1e-7)
 
   # Per-body-group collision penalties.
   cfg.rewards["self_collisions"] = RewardTermCfg(
@@ -218,6 +220,11 @@ def unitree_go1_rough_env_cfg(
     weight=-0.1,
     params={"sensor_name": shank_ground_cfg.name},
   )
+  cfg.rewards["thigh_collision"] = RewardTermCfg(
+    func=mdp.self_collision_cost,
+    weight=-0.5,
+    params={"sensor_name": thigh_ground_cfg.name},
+  )
   cfg.rewards["trunk_head_collision"] = RewardTermCfg(
     func=mdp.self_collision_cost,
     weight=-0.1,
@@ -262,10 +269,7 @@ def unitree_go1_flat_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
   """Create Unitree Go1 flat terrain velocity configuration."""
   cfg = unitree_go1_rough_env_cfg(play=play)
 
-  cfg.sim.njmax = 300
-  cfg.sim.mujoco.ccd_iterations = 50
-  cfg.sim.contact_sensor_maxmatch = 64
-  cfg.sim.nconmax = None
+  cfg.sim.njmax = 50
 
   # Switch to flat terrain.
   assert cfg.scene.terrain is not None
@@ -288,7 +292,12 @@ def unitree_go1_flat_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
   cfg.rewards["upright"].params.pop("terrain_sensor_names", None)
 
   # Remove granular collision rewards (not useful on flat ground).
-  for key in ("self_collisions", "shank_collision", "trunk_head_collision"):
+  for key in (
+    "self_collisions",
+    "shank_collision",
+    "thigh_collision",
+    "trunk_head_collision",
+  ):
     cfg.rewards.pop(key, None)
 
   # On flat terrain fell_over is sufficient; thigh contact implies fallen.
@@ -302,10 +311,4 @@ def unitree_go1_flat_env_cfg(play: bool = False) -> ManagerBasedRlEnvCfg:
   # Disable terrain curriculum (not present in play mode since rough clears all).
   cfg.curriculum.pop("terrain_levels", None)
 
-  if play:
-    twist_cmd = cfg.commands["twist"]
-    assert isinstance(twist_cmd, UniformVelocityCommandCfg)
-    twist_cmd.ranges.lin_vel_x = (-1.5, 2.0)
-    twist_cmd.ranges.ang_vel_z = (-0.7, 0.7)
-
   return cfg
diff --git a/src/mjlab/tasks/velocity/config/go1/rl_cfg.py b/src/mjlab/tasks/velocity/config/go1/rl_cfg.py
@@ -1,13 +1,17 @@
 """RL configuration for Unitree Go1 velocity task."""
 
+from functools import partial
+
 from mjlab.rl import (
   RslRlModelCfg,
   RslRlOnPolicyRunnerCfg,
   RslRlPpoAlgorithmCfg,
 )
 
 
-def unitree_go1_ppo_runner_cfg() -> RslRlOnPolicyRunnerCfg:
+def unitree_go1_ppo_runner_cfg(
+  max_iterations: int = 10_000,
+) -> RslRlOnPolicyRunnerCfg:
   """Create RL runner configuration for Unitree Go1 velocity task."""
   return RslRlOnPolicyRunnerCfg(
     actor=RslRlModelCfg(
@@ -42,5 +46,10 @@ def unitree_go1_ppo_runner_cfg() -> RslRlOnPolicyRunnerCfg:
     experiment_name="go1_velocity",
     save_interval=50,
     num_steps_per_env=24,
-    max_iterations=10_000,
+    max_iterations=max_iterations,
   )
+
+
+unitree_go1_flat_ppo_runner_cfg = partial(
+  unitree_go1_ppo_runner_cfg, max_iterations=1_500
+)
diff --git a/src/mjlab/tasks/velocity/mdp/curriculums.py b/src/mjlab/tasks/velocity/mdp/curriculums.py
@@ -47,8 +47,7 @@ def terrain_levels_vel(
   # Robots that walked far enough progress to harder terrains.
   move_up = distance > terrain_generator.size[0] / 2
 
-  # Robots that walked less than half of their required distance go to
-  # simpler terrains.
+  # Robots that walked less than half of their required distance go to simpler terrains.
   move_down = (
     distance < torch.norm(command[env_ids, :2], dim=1) * env.max_episode_length_s * 0.5
   )
@@ -64,8 +63,8 @@ def terrain_levels_vel(
     "max": torch.max(levels),
   }
 
-  # In curriculum mode num_cols == num_terrains (one column per type),
-  # so the column index directly maps to the sub-terrain name.
+  # In curriculum mode num_cols == num_terrains (one column per type), so the column
+  # index directly maps to the sub-terrain name.
   sub_terrain_names = list(terrain_generator.sub_terrains.keys())
   terrain_origins = terrain.terrain_origins
   assert terrain_origins is not None

diff --git a/src/mjlab/tasks/velocity/mdp/rewards.py b/src/mjlab/tasks/velocity/mdp/rewards.py
@@ -279,6 +279,11 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRlEnv):
     )
     self.step_dt = env.step_dt
 
+  def reset(self, env_ids: torch.Tensor | slice | None = None) -> None:
+    if env_ids is None:
+      env_ids = slice(None)
+    self.peak_heights[env_ids] = 0.0
+
   def __call__(
     self,
     env: ManagerBasedRlEnv,

diff --git a/src/mjlab/tasks/velocity/velocity_env_cfg.py b/src/mjlab/tasks/velocity/velocity_env_cfg.py
@@ -91,6 +91,7 @@ def make_velocity_env_cfg() -> ManagerBasedRlEnvCfg:
     "joint_pos": ObservationTermCfg(
       func=mdp.joint_pos_rel,
       noise=Unoise(n_min=-0.01, n_max=0.01),
+      params={"biased": True},
     ),
     "joint_vel": ObservationTermCfg(
       func=mdp.joint_vel_rel,
@@ -111,6 +112,7 @@ def make_velocity_env_cfg() -> ManagerBasedRlEnvCfg:
 
   critic_terms = {
     **actor_terms,
+    "joint_pos": ObservationTermCfg(func=mdp.joint_pos_rel),
     "height_scan": ObservationTermCfg(
       func=envs_mdp.height_scan,
       params={"sensor_name": "terrain_scan"},
@@ -187,7 +189,7 @@ def make_velocity_env_cfg() -> ManagerBasedRlEnvCfg:
       ranges=UniformVelocityCommandCfg.Ranges(
         lin_vel_x=(-1.0, 1.0),
         lin_vel_y=(-1.0, 1.0),
-        ang_vel_z=(-0.5, 0.5),
+        ang_vel_z=(-1.0, 1.0),
         heading=(-math.pi, math.pi),
       ),
     )
@@ -395,17 +397,6 @@ def make_velocity_env_cfg() -> ManagerBasedRlEnvCfg:
       func=mdp.terrain_levels_vel,
       params={"command_name": "twist"},
     ),
-    "command_vel": CurriculumTermCfg(
-      func=mdp.commands_vel,
-      params={
-        "command_name": "twist",
-        "velocity_stages": [
-          {"step": 0, "lin_vel_x": (-1.0, 1.0), "ang_vel_z": (-0.5, 0.5)},
-          {"step": 5000 * 24, "lin_vel_x": (-1.5, 2.0), "ang_vel_z": (-0.7, 0.7)},
-          {"step": 10000 * 24, "lin_vel_x": (-2.0, 3.0)},
-        ],
-      },
-    ),
   }
 
   ##
@@ -440,8 +431,6 @@ def make_velocity_env_cfg() -> ManagerBasedRlEnvCfg:
       azimuth=90.0,
     ),
     sim=SimulationCfg(
-      nconmax=35,
-      njmax=1500,
       mujoco=MujocoCfg(
         timestep=0.005,
         iterations=10,