Improve reward functions with adaptive scaling and stability checks

swamy18 · web-flow · commit 76995d896f7e · 2025-11-19T14:47:07.000+05:30
- Add adaptive std scaling based on episode progress
- Add reward clipping to prevent extreme values
- Add velocity stability bonus in object_goal_distance
- Add new action_smoothness_penalty function
- Add new grasp_success_bonus function for better reward shaping

Signed-off-by: Swamy Gadila &lt;122666091+swamy18@users.noreply.github.com&gt;
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/mdp/rewards.py
@@ -31,7 +31,7 @@ def object_ee_distance(
     object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
     ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
 ) -> torch.Tensor:
-    """Reward the agent for reaching the object using tanh-kernel."""
+    """Reward the agent for reaching the object using tanh-kernel with improvements."""
     # extract the used quantities (to enable type-hinting)
     object: RigidObject = env.scene[object_cfg.name]
     ee_frame: FrameTransformer = env.scene[ee_frame_cfg.name]
@@ -41,8 +41,16 @@ def object_ee_distance(
     ee_w = ee_frame.data.target_pos_w[..., 0, :]
     # Distance of the end-effector to the object: (num_envs,)
     object_ee_distance = torch.norm(cube_pos_w - ee_w, dim=1)
-
-    return 1 - torch.tanh(object_ee_distance / std)
+    
+    # IMPROVEMENT 1: Add adaptive scaling based on episode progress
+    episode_progress = env.episode_length_buf.float() / env.max_episode_length
+    std_adaptive = std * (1.0 + 0.1 * episode_progress)
+    
+    # IMPROVEMENT 2: Calculate reward with clipping to prevent extreme values
+    reward = 1 - torch.tanh(object_ee_distance / std_adaptive)
+    reward = torch.clamp(reward, 0.0, 1.0)
+    
+    return reward
 
 
 def object_goal_distance(
@@ -53,7 +61,7 @@ def object_goal_distance(
     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
     object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
 ) -> torch.Tensor:
-    """Reward the agent for tracking the goal pose using tanh-kernel."""
+    """Reward the agent for tracking the goal pose using tanh-kernel with improvements."""
     # extract the used quantities (to enable type-hinting)
     robot: RigidObject = env.scene[robot_cfg.name]
     object: RigidObject = env.scene[object_cfg.name]
@@ -63,5 +71,57 @@ def object_goal_distance(
     des_pos_w, _ = combine_frame_transforms(robot.data.root_pos_w, robot.data.root_quat_w, des_pos_b)
     # distance of the end-effector to the object: (num_envs,)
     distance = torch.norm(des_pos_w - object.data.root_pos_w, dim=1)
-    # rewarded if the object is lifted above the threshold
-    return (object.data.root_pos_w[:, 2] > minimal_height) * (1 - torch.tanh(distance / std))
+    
+    # IMPROVEMENT 1: Check if object is lifted
+    is_lifted = object.data.root_pos_w[:, 2] > minimal_height
+    
+    # IMPROVEMENT 2: Add velocity stability bonus
+    velocity = torch.norm(object.data.root_lin_vel_w, dim=1)
+    velocity_bonus = torch.exp(-2.0 * velocity)  # Reward stability
+    
+    # IMPROVEMENT 3: Combined reward with clipping
+    distance_reward = 1 - torch.tanh(distance / std)
+    combined_reward = is_lifted.float() * distance_reward * velocity_bonus
+    combined_reward = torch.clamp(combined_reward, 0.0, 1.0)
+    
+    return combined_reward
+
+
+def action_smoothness_penalty(
+    env: ManagerBasedRLEnv,
+    penalty_scale: float = 0.01,
+) -> torch.Tensor:
+    """NEW: Penalize large action changes to encourage smooth movements."""
+    if not hasattr(env, '_prev_actions'):
+        env._prev_actions = torch.zeros_like(env.action_manager.action)
+        return torch.zeros(env.num_envs, device=env.device)
+    
+    action_diff = torch.norm(env.action_manager.action - env._prev_actions, dim=1)
+    env._prev_actions = env.action_manager.action.clone()
+    
+    penalty = -penalty_scale * action_diff
+    return penalty
+
+
+def grasp_success_bonus(
+    env: ManagerBasedRLEnv,
+    bonus_value: float = 2.0,
+    object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
+    ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
+) -> torch.Tensor:
+    """NEW: Provide large bonus when object is successfully grasped and stable."""
+    object: RigidObject = env.scene[object_cfg.name]
+    ee_frame: FrameTransformer = env.scene[ee_frame_cfg.name]
+    
+    # Check if object is close to gripper
+    cube_pos_w = object.data.root_pos_w
+    ee_w = ee_frame.data.target_pos_w[..., 0, :]
+    distance = torch.norm(cube_pos_w - ee_w, dim=1)
+    
+    # Check if object velocity is low (stable grasp)
+    velocity = torch.norm(object.data.root_lin_vel_w, dim=1)
+    
+    # Grasp is successful if distance < 0.05m and velocity < 0.1 m/s
+    successful_grasp = (distance < 0.05) & (velocity < 0.1)
+    
+    return torch.where(successful_grasp, torch.tensor(bonus_value, device=env.device), torch.tensor(0.0, device=env.device))