[Feature] Enable LineariseRewards to work with negative weights (#3064)

Xmaster6y · web-flow · commit 77c00b910e6f · 2025-07-14T05:30:52.000+01:00
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -13778,9 +13778,8 @@ def test_weight_shape_error(self):
         ):
             LineariseRewards(in_keys=("reward",), weights=torch.ones(size=(2, 4)))
 
-    def test_weight_sign_error(self):
-        with pytest.raises(ValueError, match="Expected all weights to be >0"):
-            LineariseRewards(in_keys=("reward",), weights=-torch.ones(size=(2,)))
+    def test_weight_no_sign_error(self):
+        LineariseRewards(in_keys=("reward",), weights=-torch.ones(size=(2,)))
 
     def test_discrete_spec_error(self):
         with pytest.raises(
@@ -13980,6 +13979,7 @@ def _set_seed(self, seed: int | None = None) -> None:
             (1, None),
             (3, None),
             (2, [1.0, 2.0]),
+            (2, [1.0, -1.0]),
         ],
     )
     def test_transform_env(self, num_rewards, weights):
@@ -14062,6 +14062,15 @@ def test_transform_inverse(self):
                 ),
                 BoundedContinuous(low=-1.0, high=1.0, shape=1),
             ),
+            (
+                [1.0, -1.0],
+                BoundedContinuous(
+                    low=[-1.0, -2.0],
+                    high=[1.0, 2.0],
+                    shape=2,
+                ),
+                BoundedContinuous(low=-3.0, high=3.0, shape=1),
+            ),
         ],
     )
     def test_reward_spec(
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -10748,10 +10748,6 @@ def __init__(
                     f"Expected weights to be a unidimensional tensor. Got {weights.ndim} dimension."
                 )
 
-            # Avoids switching from reward to costs.
-            if (weights < 0).any():
-                raise ValueError(f"Expected all weights to be >0. Got {weights}.")
-
             self.register_buffer("weights", weights)
         else:
             self.weights = None
@@ -10781,13 +10777,18 @@ def transform_reward_spec(self, reward_spec: TensorSpec) -> TensorSpec:
             reward_spec.shape = torch.Size([*batch_size, 1])
             return reward_spec
 
-        # The lines below are correct only if all weights are positive.
-        low = (weights * reward_spec.space.low).sum(dim=-1, keepdim=True)
-        high = (weights * reward_spec.space.high).sum(dim=-1, keepdim=True)
+        weights_pos = weights.clamp(min=0)
+        weights_neg = weights.clamp(max=0)
+
+        low_pos = (weights_pos * reward_spec.space.low).sum(dim=-1, keepdim=True)
+        low_neg = (weights_neg * reward_spec.space.high).sum(dim=-1, keepdim=True)
+
+        high_pos = (weights_pos * reward_spec.space.high).sum(dim=-1, keepdim=True)
+        high_neg = (weights_neg * reward_spec.space.low).sum(dim=-1, keepdim=True)
 
         return BoundedContinuous(
-            low=low,
-            high=high,
+            low=low_pos + low_neg,
+            high=high_pos + high_neg,
             device=reward_spec.device,
             dtype=reward_spec.dtype,
         )