reduce complexity by requiring full path for entropy coeffs

Juan de los Rios · Juan de los Rios · commit 508aac2278f6 · 2025-07-30T17:58:16.000+02:00
diff --git a/test/test_cost.py b/test/test_cost.py
@@ -9779,11 +9779,11 @@ def mixture_constructor(logits, loc, scale):
             # keep per-head entropies instead of the aggregated tensor
             set_composite_lp_aggregate(False).set()
             coef_map = {
-                ("agent0", "action", "action1", "sub_action1_log_prob"): 0.10,
-                "sub_action2_log_prob": 0.10,
-                "action2": 0.10,
-                ("agent1", "action_log_prob"): 0.10,
-                "agent2_log_prob": 0.02,
+                ("agent0", "action", "action1", "sub_action1_log_prob"):0.02,
+                ("agent0", "action", "action1", "sub_action2_log_prob"):0.01,
+                ("agent0", "action", "action2_log_prob"):0.01,
+                ("agent1", "action_log_prob"):0.01,
+                "agent2_log_prob":0.01,
             }
             ppo_weighted = cls(policy, value_operator, entropy_coeff=coef_map)
             ppo_weighted.set_keys(
@@ -9872,7 +9872,7 @@ def test_weighted_entropy_scalar(self):
         torch.testing.assert_close(out, torch.tensor(-1.0))
 
     def test_weighted_entropy_mapping(self):
-        coef = {"head_0": 0.3, "head_1": 0.7}
+        coef = {("head_0","action_log_prob"): 0.3, ("head_1","action_log_prob"): 0.7}
         loss = self._make_entropy_loss(entropy_coeff=coef)
         entropy = TensorDict(
             {
@@ -9882,7 +9882,7 @@ def test_weighted_entropy_mapping(self):
             [],
         )
         out = loss._weighted_loss_entropy(entropy)
-        expected = -(coef["head_0"] * 1.0 + coef["head_1"] * 2.0)
+        expected = -(coef[("head_0","action_log_prob")] * 1.0 + coef[("head_1","action_log_prob")] * 2.0)
         torch.testing.assert_close(out, torch.tensor(expected))
 
     def test_weighted_entropy_mapping_missing_key(self):
diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py
@@ -923,33 +923,14 @@ def _weighted_loss_entropy(
         for head_name, entropy_head in entropy.items(
             include_nested=True, leaves_only=True
         ):
-            if isinstance(head_name, str):
-                head_name = (head_name,)
-            for i, (head_name_from_map, _coeff) in enumerate(
-                self._entropy_coeff_map.items()
-            ):
-                # Check if distinct head name inisde tuple of nested dict
-                if head_name_from_map in head_name:
-                    coeff = _coeff
-                    break
-                # Check if path of head fully or partially in nested dict
-                if any(
-                    head_name_from_map == head_name[i : i + len(head_name_from_map)]
-                    for i in range(len(head_name) - len(head_name_from_map) + 1)
-                ):
-                    coeff = _coeff
-                    break
-                if i == len(self._entropy_coeff_map.items()):
-                    raise KeyError(
-                        f"Missing entropy coeff for head '{head_name}'"
-                    ) from exec
+            try:
+                coeff = self._entropy_coeff_map[head_name]
+            except KeyError as exc:
+                raise KeyError(f"Missing entropy coeff for head '{head_name}'") from exc
             coeff_t = torch.as_tensor(
                 coeff, dtype=entropy_head.dtype, device=entropy_head.device
             )
-            if isinstance(entropy_head, torch.Tensor):
-                head_loss_term = -coeff_t * entropy_head
-            else:
-                head_loss_term = -coeff_t * _sum_td_features(entropy_head)
+            head_loss_term = -coeff_t * entropy_head
             loss_term = (
                 head_loss_term if loss_term is None else loss_term + head_loss_term
             )  # accumulate