pytorch
diff --git a/‎.github/RELEASE_AGENT_PROMPT.md‎
Lines changed: 28 additions & 1 deletion b/‎.github/RELEASE_AGENT_PROMPT.md‎
Lines changed: 28 additions & 1 deletion
diff --git a/‎test/test_objectives.py‎
Lines changed: 12 additions & 1 deletion b/‎test/test_objectives.py‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎torchrl/objectives/cql.py‎
Lines changed: 31 additions & 3 deletions b/‎torchrl/objectives/cql.py‎
Lines changed: 31 additions & 3 deletions
diff --git a/‎torchrl/objectives/crossq.py‎
Lines changed: 31 additions & 3 deletions b/‎torchrl/objectives/crossq.py‎
Lines changed: 31 additions & 3 deletions
diff --git a/‎torchrl/objectives/decision_transformer.py‎
Lines changed: 35 additions & 8 deletions b/‎torchrl/objectives/decision_transformer.py‎
Lines changed: 35 additions & 8 deletions
@@ -42,6 +42,31 @@ Get commits from the last release:
 git log v0.11.0..HEAD --oneline --no-merges
 ```
 
+**Important: PR Selection for Minor Releases**
+
+When selecting PRs for a minor release, follow this decision flow:
+
+1. **If labeled `user-facing`** → **Exclude** (only for major releases)
+2. **If labeled `non-user-facing` or `Suitable for minor`** → **Include**
+3. **If neither label is present** → **Assess yourself** based on the changes
+
+Labels:
+- `user-facing` - API changes, new features, or public interface changes
+- `non-user-facing` - Internal changes, bug fixes, refactoring
+- `Suitable for minor` - Explicitly marked as safe for minor releases
+
+To filter PRs:
+```bash
+# Find PRs explicitly safe for minor release
+gh pr list --label "non-user-facing" --state merged --json number,title
+gh pr list --label "Suitable for minor" --state merged --json number,title
+
+# Check labels on a specific PR
+gh pr view <PR_NUMBER> --json labels --jq '.labels[].name'
+```
+
+For unlabeled PRs, review the changes and determine if they affect the public API or just internal implementation.
+
 ### Critical: Don't Miss ghstack Commits
 
 **The biggest pitfall in release notes is only looking at commits with PR numbers.** Many of the most significant features are merged via ghstack and have NO PR number in the commit message. Always analyze both:
@@ -477,9 +502,11 @@ After completing all steps, provide this summary to the user:
 ## Version Naming Convention
 
 - **Major releases**: `v0.11.0`, `v0.12.0` - New features, may have breaking changes
-- **Minor/Patch releases**: `v0.11.1`, `v0.11.2` - Bug fixes, no new features
+- **Minor/Patch releases**: `v0.11.1`, `v0.11.2` - Bug fixes only, no new features or user-facing changes
 - **Release candidates**: `v0.11.0-rc1` - Pre-release testing
 
+**Note:** PRs labeled `user-facing` must only be included in major releases, never in minor/patch releases.
+
 ## TensorDict Version Compatibility
 
 TorchRL and TensorDict versions must match in major version:
 
@@ -5376,6 +5376,7 @@ def test_sac_reduction(self, reduction, version, composite_action_dist):
             delay_value=False,
             reduction=reduction,
             action_spec=action_spec,
+            scalar_output_mode="exclude" if reduction == "none" else None,
         )
         loss_fn.make_value_estimator()
         loss = loss_fn(td)
@@ -6259,6 +6260,7 @@ def test_discrete_sac_reduction(self, reduction):
             action_space="one-hot",
             delay_qvalue=False,
             reduction=reduction,
+            scalar_output_mode="exclude" if reduction == "none" else None,
         )
         loss_fn.make_value_estimator()
         loss = loss_fn(td)
@@ -7052,6 +7054,7 @@ def test_crossq_reduction(self, reduction):
             qvalue_network=qvalue,
             loss_function="l2",
             reduction=reduction,
+            scalar_output_mode="exclude" if reduction == "none" else None,
         )
         loss_fn.make_value_estimator()
         loss = loss_fn(td)
@@ -8043,6 +8046,7 @@ def test_redq_reduction(self, reduction, deprecated_loss):
                 loss_function="l2",
                 delay_qvalue=False,
                 reduction=reduction,
+                scalar_output_mode="exclude" if reduction == "none" else None,
             )
         loss_fn.make_value_estimator()
         loss = loss_fn(td)
@@ -8706,6 +8710,7 @@ def test_cql_reduction(self, reduction):
             delay_actor=False,
             delay_qvalue=False,
             reduction=reduction,
+            scalar_output_mode="exclude" if reduction == "none" else None,
         )
         loss_fn.make_value_estimator()
         loss = loss_fn(td)
@@ -12677,7 +12682,11 @@ def test_onlinedt_reduction(self, reduction):
         )
         td = self._create_mock_data_odt(device=device)
         actor = self._create_mock_actor(device=device)
-        loss_fn = OnlineDTLoss(actor, reduction=reduction)
+        loss_fn = OnlineDTLoss(
+            actor,
+            reduction=reduction,
+            scalar_output_mode="exclude" if reduction == "none" else None,
+        )
         loss = loss_fn(td)
         if reduction == "none":
             for key in loss.keys():
@@ -13983,6 +13992,7 @@ def test_iql_reduction(self, reduction):
             value_network=value,
             loss_function="l2",
             reduction=reduction,
+            scalar_output_mode="exclude" if reduction == "none" else None,
         )
         loss_fn.make_value_estimator()
         with _check_td_steady(td), pytest.warns(
@@ -14815,6 +14825,7 @@ def test_discrete_iql_reduction(self, reduction):
             loss_function="l2",
             action_space="one-hot",
             reduction=reduction,
+            scalar_output_mode="exclude" if reduction == "none" else None,
         )
         loss_fn.make_value_estimator()
         with _check_td_steady(td), pytest.warns(
 
@@ -298,6 +298,7 @@ def __init__(
         lagrange_thresh: float = 0.0,
         reduction: str | None = None,
         deactivate_vmap: bool = False,
+        scalar_output_mode: str | None = None,
     ) -> None:
         self._out_keys = None
         if reduction is None:
@@ -381,6 +382,23 @@ def __init__(
             )
         self._make_vmap()
         self.reduction = reduction
+
+        # Handle scalar_output_mode for reduction="none"
+        if reduction == "none" and scalar_output_mode is None:
+            warnings.warn(
+                "CQLLoss with reduction='none' cannot include scalar values (alpha, entropy) "
+                "in the output TensorDict without changing their shape. These values will be "
+                "excluded from the output. You can access them via `loss_module._alpha` and "
+                "compute entropy from the log_prob in the actor loss metadata. "
+                "To suppress this warning, pass `scalar_output_mode='exclude'` to the constructor. "
+                "Alternatively, pass `scalar_output_mode='non_tensor'` to include them as non-tensor data. "
+                "This is a known limitation we're working on improving.",
+                category=UserWarning,
+                stacklevel=2,
+            )
+            scalar_output_mode = "exclude"
+        self.scalar_output_mode = scalar_output_mode
+
         _ = self.target_entropy
 
     def _make_vmap(self):
@@ -548,18 +566,28 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         tensordict.set(
             self.tensor_keys.priority, metadata.pop("td_error").detach().max(0).values
         )
+        entropy = -actor_metadata.get(self.tensor_keys.log_prob)
         out = {
             "loss_actor": loss_actor,
             "loss_actor_bc": loss_actor_bc,
             "loss_qvalue": q_loss,
             "loss_cql": cql_loss,
             "loss_alpha": loss_alpha,
-            "alpha": self._alpha,
-            "entropy": -actor_metadata.get(self.tensor_keys.log_prob).mean().detach(),
         }
         if self.with_lagrange:
             out["loss_alpha_prime"] = alpha_prime_loss.mean()
-        td_loss = TensorDict(out)
+
+        # Handle batch_size and scalar values (alpha, entropy) based on reduction mode
+        if self.reduction == "none":
+            batch_size = tensordict.batch_size
+            td_loss = TensorDict(out, batch_size=batch_size)
+            if self.scalar_output_mode == "non_tensor":
+                td_loss.set_non_tensor("alpha", self._alpha)
+                td_loss.set_non_tensor("entropy", entropy.detach().mean())
+        else:
+            out["alpha"] = self._alpha
+            out["entropy"] = entropy.detach().mean()
+            td_loss = TensorDict(out)
         self._clear_weakrefs(
             tensordict,
             td_loss,
 
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import math
+import warnings
 from dataclasses import dataclass
 from functools import wraps
 
@@ -274,6 +275,7 @@ def __init__(
         separate_losses: bool = False,
         reduction: str | None = None,
         deactivate_vmap: bool = False,
+        scalar_output_mode: str | None = None,
     ) -> None:
         self._in_keys = None
         self._out_keys = None
@@ -348,6 +350,23 @@ def __init__(
         self._action_spec = action_spec
         self._make_vmap()
         self.reduction = reduction
+
+        # Handle scalar_output_mode for reduction="none"
+        if reduction == "none" and scalar_output_mode is None:
+            warnings.warn(
+                "CrossQLoss with reduction='none' cannot include scalar values (alpha, entropy) "
+                "in the output TensorDict without changing their shape. These values will be "
+                "excluded from the output. You can access them via `loss_module._alpha` and "
+                "compute entropy from the log_prob in the actor loss metadata. "
+                "To suppress this warning, pass `scalar_output_mode='exclude'` to the constructor. "
+                "Alternatively, pass `scalar_output_mode='non_tensor'` to include them as non-tensor data. "
+                "This is a known limitation we're working on improving.",
+                category=UserWarning,
+                stacklevel=2,
+            )
+            scalar_output_mode = "exclude"
+        self.scalar_output_mode = scalar_output_mode
+
         # init target entropy
         self.maybe_init_target_entropy()
 
@@ -553,12 +572,21 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             "loss_actor": loss_actor,
             "loss_qvalue": loss_qvalue,
             "loss_alpha": loss_alpha,
-            "alpha": self._alpha,
-            "entropy": entropy.detach().mean(),
             **metadata_actor,
             **value_metadata,
         }
-        td_out = TensorDict(out)
+
+        # Handle batch_size and scalar values (alpha, entropy) based on reduction mode
+        if self.reduction == "none":
+            batch_size = tensordict.batch_size
+            td_out = TensorDict(out, batch_size=batch_size)
+            if self.scalar_output_mode == "non_tensor":
+                td_out.set_non_tensor("alpha", self._alpha)
+                td_out.set_non_tensor("entropy", entropy.detach().mean())
+        else:
+            out["alpha"] = self._alpha
+            out["entropy"] = entropy.detach().mean()
+            td_out = TensorDict(out)
         self._clear_weakrefs(
             tensordict,
             td_out,
 
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import math
+import warnings
 from dataclasses import dataclass
 
 import torch
@@ -85,6 +86,7 @@ def __init__(
         target_entropy: str | float = "auto",
         samples_mc_entropy: int = 1,
         reduction: str | None = None,
+        scalar_output_mode: str | None = None,
     ) -> None:
         self._in_keys = None
         self._out_keys = None
@@ -158,6 +160,22 @@ def __init__(
         self._set_in_keys()
         self.reduction = reduction
 
+        # Handle scalar_output_mode for reduction="none"
+        if reduction == "none" and scalar_output_mode is None:
+            warnings.warn(
+                "OnlineDTLoss with reduction='none' cannot include scalar values (alpha, entropy) "
+                "in the output TensorDict without changing their shape. These values will be "
+                "excluded from the output. You can access alpha via `loss_module.alpha` and "
+                "compute entropy from the actor distribution. "
+                "To suppress this warning, pass `scalar_output_mode='exclude'` to the constructor. "
+                "Alternatively, pass `scalar_output_mode='non_tensor'` to include them as non-tensor data. "
+                "This is a known limitation we're working on improving.",
+                category=UserWarning,
+                stacklevel=2,
+            )
+            scalar_output_mode = "exclude"
+        self.scalar_output_mode = scalar_output_mode
+
     def _set_in_keys(self):
         keys = self.actor_network.in_keys
         keys = set(keys)
@@ -230,15 +248,24 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
             "loss_log_likelihood": -log_likelihood,
             "loss_entropy": -entropy_bonus,
             "loss_alpha": loss_alpha,
-            "entropy": entropy.detach().mean(),
-            "alpha": self.alpha.detach(),
         }
-        td_out = TensorDict(out, [])
-        td_out = td_out.named_apply(
-            lambda name, value: _reduce(value, reduction=self.reduction).squeeze(-1)
-            if name.startswith("loss_")
-            else value,
-        )
+
+        # Handle batch_size and scalar values (alpha, entropy) based on reduction mode
+        if self.reduction == "none":
+            batch_size = tensordict.batch_size
+            td_out = TensorDict(out, batch_size=batch_size)
+            if self.scalar_output_mode == "non_tensor":
+                td_out.set_non_tensor("alpha", self.alpha.detach())
+                td_out.set_non_tensor("entropy", entropy.detach().mean())
+        else:
+            out["entropy"] = entropy.detach().mean()
+            out["alpha"] = self.alpha.detach()
+            td_out = TensorDict(out, [])
+            td_out = td_out.named_apply(
+                lambda name, value: _reduce(value, reduction=self.reduction).squeeze(-1)
+                if name.startswith("loss_")
+                else value,
+            )
         self._clear_weakrefs(
             tensordict,
             td_out,