Fixes onpolicy runner to handle intrnsic rewards (#71)

larsrpe · Mayankm96 · Mayankm96 · commit aaac187e3ee1 · 2025-03-05T12:47:21.000+01:00
This commit fixes two potential bugs in the Runner:
    1: Accessing a dictionary that is None if either alg_cfg["rnd_cfg"] or
    alg_cfg["symmetry_cfg"] are None.
    2: Accessing alg.intrinsic_rewards before it is created in
    alg.process_env_step(...).

Co-authored-by: Mayank Mittal &lt;12863862+Mayankm96@users.noreply.github.com&gt;
diff --git a/rsl_rl/runners/on_policy_runner.py b/rsl_rl/runners/on_policy_runner.py
@@ -41,7 +41,7 @@ def __init__(self, env: VecEnv, train_cfg: dict, log_dir: str | None = None, dev
         ).to(self.device)
 
         # resolve dimension of rnd gated state
-        if "rnd_cfg" in self.alg_cfg and self.alg_cfg["rnd_cfg"] is not None:
+        if self.alg_cfg.get("rnd_cfg") is not None:
             # check if rnd gated state is present
             rnd_state = extras["observations"].get("rnd_state")
             if rnd_state is None:
@@ -54,7 +54,7 @@ def __init__(self, env: VecEnv, train_cfg: dict, log_dir: str | None = None, dev
             self.alg_cfg["rnd_cfg"]["weight"] *= env.dt
 
         # if using symmetry then pass the environment config object
-        if "symmetry_cfg" in self.alg_cfg and self.alg_cfg["symmetry_cfg"] is not None:
+        if self.alg_cfg.get("symmetry_cfg") is not None:
             # this is used by the symmetry function for handling different observation terms
             self.alg_cfg["symmetry_cfg"]["_env"] = env
 
@@ -161,12 +161,12 @@ def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = Fals
                     else:
                         critic_obs = obs
 
-                    # Intrinsic rewards (extracted here only for logging)!
-                    intrinsic_rewards = self.alg.intrinsic_rewards if self.alg.rnd else None
-
                     # Process env step and store in buffer
                     self.alg.process_env_step(rewards, dones, infos)
 
+                    # Intrinsic rewards (extracted here only for logging)!
+                    intrinsic_rewards = self.alg.intrinsic_rewards if self.alg.rnd else None
+
                     if self.log_dir is not None:
                         # Book keeping
                         if "episode" in infos: