[BE] Catching common errors in env.rollout and rb.add (#3102)

vmoens · web-flow · commit 38544a551ec7 · 2025-08-01T15:43:28.000+01:00
diff --git a/test/mocking_classes.py b/test/mocking_classes.py
@@ -2588,3 +2588,69 @@ def _step(self, tensordict: TensorDict) -> TensorDict:
 
     def _set_seed(self):
         pass
+
+
+class EnvThatErrorsBecauseOfStack(EnvBase):
+    def __init__(self, target: int = 5, batch_size: int | None = None):
+        super().__init__(device="cpu", batch_size=batch_size)
+        self.target = target
+        self.observation_spec = Bounded(
+            low=0, high=self.target, shape=(1,), dtype=torch.int64
+        )
+        self.action_spec = Categorical(n=2, shape=(1,), dtype=torch.int64)
+        self.reward_spec = Unbounded(shape=(1,), dtype=torch.float32)
+        self.done_spec = Categorical(n=2, shape=(1,), dtype=torch.bool)
+
+    def _reset(self, tensordict: TensorDict | None = None, **kwargs) -> TensorDict:
+        if tensordict is None:
+            tensordict = TensorDict(batch_size=self.batch_size, device=self.device)
+
+        observation = torch.zeros(
+            self.batch_size, dtype=self.observation_spec.dtype, device=self.device
+        )
+        reward = torch.zeros(
+            self.batch_size + torch.Size([1]),
+            dtype=self.reward_spec.dtype,
+            device=self.device,
+        )
+        done = torch.zeros(
+            self.batch_size + torch.Size([1]), dtype=torch.bool, device=self.device
+        )
+        terminated = torch.zeros_like(done)
+        action = torch.zeros(
+            self.batch_size + torch.Size([1]), dtype=torch.int64, device=self.device
+        )
+
+        tensordict.set(self.observation_keys[0], observation)
+        tensordict.set(self.reward_key, reward)
+        tensordict.set(self.done_keys[0], done)
+        tensordict.set("terminated", terminated)
+        tensordict.set(self.action_keys[0], action)
+
+        return tensordict
+
+    def _step(self, tensordict: TensorDict) -> TensorDict:
+        obs = tensordict.get(
+            self.observation_keys[0]
+        )  # the counter value or the counters value if it is several batchs
+        action = tensordict.get(self.action_keys[0]).squeeze(-1)
+
+        new_obs = obs + (action == 1).to(obs.dtype)
+        new_obs = new_obs.clamp_max(self.target)
+        reward = (new_obs == self.target).to(self.reward_spec.dtype).unsqueeze(-1)
+        done = (new_obs == self.target).to(torch.bool).unsqueeze(-1)
+        terminated = done.clone()
+        return TensorDict(
+            {
+                self.observation_keys[0]: new_obs,
+                self.reward_keys[0]: reward,
+                self.done_keys[0]: done,
+                "terminated": terminated,
+                self.action_keys[0]: action.unsqueeze(-1),
+            },
+            batch_size=self.batch_size,
+            device=self.device,
+        )
+
+    def _set_seed(self, seed: int | None) -> None:
+        return 0
diff --git a/test/test_env.py b/test/test_env.py
@@ -136,6 +136,7 @@
         DiscreteActionVecMockEnv,
         DummyModelBasedEnvBase,
         EnvThatDoesNothing,
+        EnvThatErrorsBecauseOfStack,
         EnvWithDynamicSpec,
         EnvWithMetadata,
         EnvWithTensorClass,
@@ -178,6 +179,7 @@
         DiscreteActionVecMockEnv,
         DummyModelBasedEnvBase,
         EnvThatDoesNothing,
+        EnvThatErrorsBecauseOfStack,
         EnvWithDynamicSpec,
         EnvWithMetadata,
         EnvWithTensorClass,
@@ -344,6 +346,20 @@ def forward(self, values):
         )
         env.rollout(10, policy)
 
+    def test_stack_error(self):
+        env = EnvThatErrorsBecauseOfStack()
+        assert not env._has_dynamic_specs
+        cm = pytest.raises(
+            RuntimeError,
+            match="The reward key was present in the root tensordict of at least one of the tensordicts to stack",
+        )
+        with cm:
+            env.check_env_specs()
+        with cm:
+            env.rollout(10, break_when_any_done=True, return_contiguous=True)
+        with cm:
+            env.rollout(10, break_when_any_done=False, return_contiguous=True)
+
     @pytest.mark.parametrize("dynamic_shape", [True, False])
     def test_make_spec_from_td(self, dynamic_shape):
         data = TensorDict(
diff --git a/test/test_rb.py b/test/test_rb.py
@@ -1790,6 +1790,15 @@ def test_batch_errors():
     rb.sample()
 
 
+def test_add_warning():
+    rb = ReplayBuffer(storage=ListStorage(10), batch_size=3)
+    with pytest.warns(
+        UserWarning,
+        match=r"Using `add\(\)` with a TensorDict that has batch_size",
+    ):
+        rb.add(TensorDict(batch_size=[1]))
+
+
 @pytest.mark.parametrize("priority_key", ["pk", "td_error"])
 @pytest.mark.parametrize("contiguous", [True, False])
 @pytest.mark.parametrize("device", get_default_devices())
diff --git a/torchrl/data/replay_buffers/replay_buffers.py b/torchrl/data/replay_buffers/replay_buffers.py
@@ -39,7 +39,7 @@
 from torch import Tensor
 from torch.utils._pytree import tree_map
 
-from torchrl._utils import accept_remote_rref_udf_invocation
+from torchrl._utils import accept_remote_rref_udf_invocation, RL_WARNINGS
 from torchrl.data.replay_buffers.samplers import (
     PrioritizedSampler,
     RandomSampler,
@@ -719,6 +719,13 @@ def add(self, data: Any) -> int:
                     data = None
         if data is None:
             return torch.zeros((0, self._storage.ndim), dtype=torch.long)
+        if RL_WARNINGS and is_tensor_collection(data) and data.ndim:
+            warnings.warn(
+                f"Using `add()` with a TensorDict that has batch_size={data.batch_size}. "
+                f"Use `extend()` to add multiple elements, or `add()` with a single element (batch_size=torch.Size([])). "
+                "You can silence this warning by setting the `RL_WARNINGS` environment variable to `'0'`."
+            )
+
         return self._add(data)
 
     def _add(self, data):
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -6,6 +6,7 @@
 from __future__ import annotations
 
 import abc
+import re
 import warnings
 import weakref
 from copy import deepcopy
@@ -725,7 +726,6 @@ def auto_specs_(
 
         return self
 
-    @wraps(check_env_specs_func)
     def check_env_specs(self, *args, **kwargs):
         kwargs.setdefault("return_contiguous", not self._has_dynamic_specs)
         return check_env_specs_func(self, *args, **kwargs)
@@ -2927,7 +2927,7 @@ def _reset_check_done(self, tensordict, tensordict_reset):
                     ):
                         warnings.warn(
                             f"A partial `'_reset'` key has been passed to `reset` ({reset_key}), "
-                            f"but the corresponding done_key ({done_key}) was not present in the input "
+                            f"but the corresponding done_key ({done_key}) wasn't present in the input "
                             f"tensordict. "
                             f"This is discouraged, since the input tensordict should contain "
                             f"all the data not being reset."
@@ -3387,12 +3387,26 @@ def rollout(
                 out_td = torch.stack(tensordicts, len(batch_size), out=out)
             except RuntimeError as err:
                 if (
-                    "The shapes of the tensors to stack is incompatible" in str(err)
+                    re.match(
+                        "The shapes of the tensors to stack is incompatible", str(err)
+                    )
                     and self._has_dynamic_specs
                 ):
                     raise RuntimeError(
                         "The environment specs are dynamic. Call rollout with return_contiguous=False."
                     )
+                if re.match(
+                    "The sets of keys in the tensordicts to stack are exclusive",
+                    str(err),
+                ):
+                    for reward_key in self.reward_keys:
+                        if any(reward_key in td for td in tensordicts):
+                            raise RuntimeError(
+                                "The reward key was present in the root tensordict of at least one of the tensordicts to stack. "
+                                "The likely cause is that your environment returns a reward during a call to `reset`, which is not allowed. "
+                                "To fix this, you should return the reward in the `step` method but not in during `reset`. If you need a reward "
+                                "to be returned during `reset`, submit an issue on github."
+                            )
                 raise
         else:
             out_td = LazyStackedTensorDict.maybe_dense_stack(
@@ -3967,7 +3981,7 @@ def __getattr__(self, attr: str) -> Any:
         super().__getattr__(attr)
 
         raise AttributeError(
-            f"env not set in {self.__class__.__name__}, cannot access {attr}"
+            f"The env wasn't set in {self.__class__.__name__}, cannot access {attr}"
         )
 
     @abc.abstractmethod
diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py
@@ -191,7 +191,7 @@ def _is_reset(key: NestedKey):
                     "extra keys can be present in the input TensorDict). "
                     "As a result, step_mdp will need to run extra key checks at each iteration. "
                     f"{{Expected keys}}-{{Actual keys}}={set(expected) - actual} (<= this set should be empty), \n"
-                    f"{{Actual keys}}-{{Expected keys}}={actual- set(expected)}."
+                    f"{{Actual keys}}-{{Expected keys}}={actual - set(expected)}."
                 )
         return self.validated
 
@@ -689,7 +689,7 @@ def check_env_specs(
     check_dtype=True,
     seed: int | None = None,
     tensordict: TensorDictBase | None = None,
-    break_when_any_done: bool | Literal["both"] = None,
+    break_when_any_done: bool | Literal["both"] | None = None,
 ):
     """Tests an environment specs against the results of short rollout.
 
@@ -786,10 +786,12 @@ def check_env_specs(
         real_tensordict.keys(True, True, is_leaf=_is_leaf_nontensor)
     )
     if fake_tensordict_keys != real_tensordict_keys:
+        keys_in_real_not_in_fake = real_tensordict_keys - fake_tensordict_keys
+        keys_in_fake_not_in_real = fake_tensordict_keys - real_tensordict_keys
         raise AssertionError(
             f"""The keys of the specs and data do not match:
-    - List of keys present in real but not in fake: {real_tensordict_keys-fake_tensordict_keys},
-    - List of keys present in fake but not in real: {fake_tensordict_keys-real_tensordict_keys}.
+- List of keys present in real but not in fake: {keys_in_real_not_in_fake=},
+- List of keys present in fake but not in real: {keys_in_fake_not_in_real=}.
 """
         )
 
@@ -1105,14 +1107,14 @@ def check_marl_grouping(group_map: dict[str, list[str]], agent_names: list[str])
             raise ValueError(f"Group {group_name} is empty")
         for agent_name in group:
             if agent_name not in found_agents:
-                raise ValueError(f"Agent {agent_name} not present in environment")
+                raise ValueError(f"Agent {agent_name} wasn't present in environment")
             if not found_agents[agent_name]:
                 found_agents[agent_name] = True
             else:
                 raise ValueError(f"Agent {agent_name} present more than once")
     for agent_name, found in found_agents.items():
         if not found:
-            raise ValueError(f"Agent {agent_name} not found in any group")
+            raise ValueError(f"Agent {agent_name} wasn't found in any group")
 
 
 def _terminated_or_truncated(
@@ -1607,19 +1609,19 @@ def _make_compatible_policy(
         else:
             raise TypeError(
                 f"""This error is raised because TorchRL tried to automatically wrap your policy in
-    a TensorDictModule. If you're confident the policy can directly process environment outputs, set
-    the `trust_policy` argument to `True` in the constructor.
-
-    Arguments to policy.forward are incompatible with entries in
-    env.observation_spec (got incongruent signatures:
-    the function signature is {set(sig.parameters)} but the specs have keys {set(next_observation)}).
-    If you want TorchRL to automatically wrap your policy with a TensorDictModule
-    then the arguments to policy.forward must correspond one-to-one with entries
-    in env.observation_spec.
-    For more complex behavior and more control you can consider writing your
-    own TensorDictModule.
-    Check the collector documentation to know more about accepted policies.
-    """
+a TensorDictModule. If you're confident the policy can directly process environment outputs, set
+the `trust_policy` argument to `True` in the constructor.
+
+Arguments to policy.forward are incompatible with entries in
+env.observation_spec (got incongruent signatures:
+the function signature is {set(sig.parameters)} but the specs have keys {set(next_observation)}).
+If you want TorchRL to automatically wrap your policy with a TensorDictModule
+then the arguments to policy.forward must correspond one-to-one with entries
+in env.observation_spec.
+For more complex behavior and more control you can consider writing your
+own TensorDictModule.
+Check the collector documentation to know more about accepted policies.
+"""
             )
     return policy
 
@@ -1736,5 +1738,5 @@ def __getattr__(self, attr: str) -> Any:
             super().__getattr__(attr)
         except Exception:
             raise AttributeError(
-                f"policy not set in {self.__class__.__name__}, cannot access {attr}."
+                f"The policy wasn't set in {self.__class__.__name__}, cannot access {attr}."
             )