Fixes weight schedule dict for RND

Mayankm96 · Mayankm96 · commit 6909a475d95b · 2025-02-18T10:04:33.000+01:00
diff --git a/config/dummy_config.yaml b/config/dummy_config.yaml
@@ -23,7 +23,7 @@ algorithm:
 
       # note: This is a dictionary with a required key called "mode" which can be one of "constant" or "step".
       #   - If "constant", then the weight is constant.
-      #   - If "step", then the weight is updated using the step scheduler. It takes additional parameters:
+      #   - If "step", then the weight is updated using the step scheduler. The dictionary should contain additional parameters:
       #     - max_num_steps: maximum number of steps to update the weight
       #     - final_value: final value of the weight
       # If None, then no scheduler is used.
diff --git a/rsl_rl/modules/rnd.py b/rsl_rl/modules/rnd.py
@@ -30,7 +30,7 @@ def __init__(
         state_normalization: bool = False,
         reward_normalization: bool = False,
         device: str = "cpu",
-        weight_schedule: str | None = None,
+        weight_schedule: dict | None = None,
         **kwargs,
     ):
         """Initialize the RND module.
@@ -53,8 +53,13 @@ def __init__(
             state_normalization: Whether to normalize the input state. Defaults to False.
             reward_normalization: Whether to normalize the intrinsic reward. Defaults to False.
             device: Device to use. Defaults to "cpu".
-            weight_schedule: The type of schedule to use for the RND weight parameter. Must be one of ["constant", "step"].
+            weight_schedule: The type of schedule to use for the RND weight parameter.
                 Defaults to None, in which case the weight parameter is constant.
+                It is a dictionary with the following keys:
+
+                - "mode": The type of schedule to use for the RND weight parameter.
+                - "max_num_steps": Maximum number of steps per episode. Used for the weight schedule of type "step".
+                - "final_value": Final value of the weight parameter. Used for the weight schedule of type "step".
 
         Keyword Args:
 
@@ -111,7 +116,7 @@ def get_intrinsic_reward(self, gated_state) -> tuple[torch.Tensor, torch.Tensor]
 
         # Check the weight schedule
         if self.weight_scheduler is not None:
-            self.weight = self.weight_scheduler(self.update_counter, **self.weight_scheduler_params)
+            self.weight = self.weight_scheduler(step=self.update_counter, **self.weight_scheduler_params)
         else:
             self.weight = self.initial_weight
         # Scale intrinsic reward