Fix composite entropy nested keys

Juan de los Rios · vmoens · commit d1124af8d943 · 2025-08-01T14:12:32.000+01:00
diff --git a/test/test_cost.py b/test/test_cost.py
@@ -9659,6 +9659,10 @@ def test_ppo_composite_dists(self):
 
         make_params = TensorDictModule(
             lambda: (
+                torch.ones(4),
+                torch.ones(4),
+                torch.ones(4),
+                torch.ones(4),
                 torch.ones(4),
                 torch.ones(4),
                 torch.ones(4, 2),
@@ -9669,8 +9673,12 @@ def test_ppo_composite_dists(self):
             ),
             in_keys=[],
             out_keys=[
-                ("params", "gamma", "concentration"),
-                ("params", "gamma", "rate"),
+                ("params", "gamma1", "concentration"),
+                ("params", "gamma1", "rate"),
+                ("params", "gamma2", "concentration"),
+                ("params", "gamma2", "rate"),
+                ("params", "gamma3", "concentration"),
+                ("params", "gamma3", "rate"),
                 ("params", "Kumaraswamy", "concentration0"),
                 ("params", "Kumaraswamy", "concentration1"),
                 ("params", "mixture", "logits"),
@@ -9687,24 +9695,30 @@ def mixture_constructor(logits, loc, scale):
         dist_constructor = functools.partial(
             CompositeDistribution,
             distribution_map={
-                "gamma": d.Gamma,
+                "gamma1": d.Gamma,
+                "gamma2": d.Gamma,
+                "gamma3": d.Gamma,
                 "Kumaraswamy": d.Kumaraswamy,
                 "mixture": mixture_constructor,
             },
             name_map={
-                "gamma": ("agent0", "action"),
+                "gamma1": ("agent0", "action", "action1", "sub_action1"),
+                "gamma2": ("agent0", "action", "action1", "sub_action2"),
+                "gamma3": ("agent0", "action", "action2"),
                 "Kumaraswamy": ("agent1", "action"),
-                "mixture": ("agent2", "action"),
+                "mixture": ("agent2"),
             },
         )
         policy = ProbSeq(
             make_params,
             ProbabilisticTensorDictModule(
                 in_keys=["params"],
                 out_keys=[
-                    ("agent0", "action"),
+                    ("agent0", "action", "action1", "sub_action1"),
+                    ("agent0", "action", "action1", "sub_action2"),
+                    ("agent0", "action", "action2"),
                     ("agent1", "action"),
-                    ("agent2", "action"),
+                    ("agent2"),
                 ],
                 distribution_class=dist_constructor,
                 return_log_prob=True,
@@ -9739,14 +9753,18 @@ def mixture_constructor(logits, loc, scale):
             ppo = cls(policy, value_operator, entropy_coeff=scalar_entropy)
             ppo.set_keys(
                 action=[
-                    ("agent0", "action"),
+                    ("agent0", "action", "action1", "sub_action1"),
+                    ("agent0", "action", "action1", "sub_action2"),
+                    ("agent0", "action", "action2"),
                     ("agent1", "action"),
-                    ("agent2", "action"),
+                    ("agent2"),
                 ],
                 sample_log_prob=[
-                    ("agent0", "action_log_prob"),
+                    ("agent0", "action", "action1", "sub_action1_log_prob"),
+                    ("agent0", "action", "action1", "sub_action2_log_prob"),
+                    ("agent0", "action", "action2_log_prob"),
                     ("agent1", "action_log_prob"),
-                    ("agent2", "action_log_prob"),
+                    ("agent2_log_prob"),
                 ],
             )
             loss = ppo(data)
@@ -9761,21 +9779,27 @@ def mixture_constructor(logits, loc, scale):
             # keep per-head entropies instead of the aggregated tensor
             set_composite_lp_aggregate(False).set()
             coef_map = {
-                "agent0": 0.10,
-                "agent1": 0.05,
-                "agent2": 0.02,
+                ("agent0", "action", "action1", "sub_action1_log_prob"): 0.10,
+                "sub_action2_log_prob": 0.10,
+                "action2": 0.10,
+                ("agent1", "action_log_prob"): 0.10,
+                "agent2_log_prob": 0.02,
             }
             ppo_weighted = cls(policy, value_operator, entropy_coeff=coef_map)
             ppo_weighted.set_keys(
                 action=[
-                    ("agent0", "action"),
+                    ("agent0", "action", "action1", "sub_action1"),
+                    ("agent0", "action", "action1", "sub_action2"),
+                    ("agent0", "action", "action2"),
                     ("agent1", "action"),
-                    ("agent2", "action"),
+                    ("agent2"),
                 ],
                 sample_log_prob=[
-                    ("agent0", "action_log_prob"),
+                    ("agent0", "action", "action1", "sub_action1_log_prob"),
+                    ("agent0", "action", "action1", "sub_action2_log_prob"),
+                    ("agent0", "action", "action2_log_prob"),
                     ("agent1", "action_log_prob"),
-                    ("agent2", "action_log_prob"),
+                    ("agent2_log_prob"),
                 ],
             )
             loss = ppo_weighted(data)
@@ -9786,9 +9810,11 @@ def mixture_constructor(logits, loc, scale):
             assert torch.isfinite(loss["loss_entropy"])
             # Check individual loss is computed with the right weights
             expected_loss = 0.0
-            for name, head_entropy in composite_entropy.items():
+            for i, (_, head_entropy) in enumerate(
+                composite_entropy.items(include_nested=True, leaves_only=True)
+            ):
                 expected_loss -= (
-                    coef_map[name] * _sum_td_features(head_entropy)
+                    coef_map[list(coef_map.keys())[i]] * head_entropy
                 ).mean()
             torch.testing.assert_close(
                 loss["loss_entropy"], expected_loss, rtol=1e-5, atol=1e-7
diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py
@@ -351,7 +351,7 @@ def __init__(
         *,
         entropy_bonus: bool = True,
         samples_mc_entropy: int = 1,
-        entropy_coeff: float | Mapping[str, float] | None = None,
+        entropy_coeff: float | Mapping[str | tuple | list, float] | None = None,
         log_explained_variance: bool = True,
         critic_coeff: float | None = None,
         loss_critic_type: str = "smooth_l1",
@@ -460,7 +460,8 @@ def __init__(
         if isinstance(entropy_coeff, Mapping):
             # Store the mapping for per-head coefficients
             self._entropy_coeff_map = {
-                str(k): float(v) for k, v in entropy_coeff.items()
+                (tuple(k) if isinstance(k, list) else k): float(v)
+                for k, v in entropy_coeff.items()
             }
             # Register an empty buffer for compatibility
             self.register_buffer("entropy_coeff", torch.tensor(0.0))
@@ -918,15 +919,37 @@ def _weighted_loss_entropy(
             return -self.entropy_coeff * entropy
 
         loss_term = None  # running sum over heads
-        for head_name, entropy_head in entropy.items():
-            try:
-                coeff = self._entropy_coeff_map[head_name]
-            except KeyError as exc:
-                raise KeyError(f"Missing entropy coeff for head '{head_name}'") from exc
+        coeff = 0
+        for head_name, entropy_head in entropy.items(
+            include_nested=True, leaves_only=True
+        ):
+            if isinstance(head_name, str):
+                head_name = (head_name,)
+            for i, (head_name_from_map, _coeff) in enumerate(
+                self._entropy_coeff_map.items()
+            ):
+                # Check if distinct head name inisde tuple of nested dict
+                if head_name_from_map in head_name:
+                    coeff = _coeff
+                    break
+                # Check if path of head fully or partially in nested dict
+                if any(
+                    head_name_from_map == head_name[i : i + len(head_name_from_map)]
+                    for i in range(len(head_name) - len(head_name_from_map) + 1)
+                ):
+                    coeff = _coeff
+                    break
+                if i == len(self._entropy_coeff_map.items()):
+                    raise KeyError(
+                        f"Missing entropy coeff for head '{head_name}'"
+                    ) from exec
             coeff_t = torch.as_tensor(
                 coeff, dtype=entropy_head.dtype, device=entropy_head.device
             )
-            head_loss_term = -coeff_t * _sum_td_features(entropy_head)
+            if isinstance(entropy_head, torch.Tensor):
+                head_loss_term = -coeff_t * entropy_head
+            else:
+                head_loss_term = -coeff_t * _sum_td_features(entropy_head)
             loss_term = (
                 head_loss_term if loss_term is None else loss_term + head_loss_term
             )  # accumulate
@@ -1075,7 +1098,7 @@ def __init__(
         clip_epsilon: float = 0.2,
         entropy_bonus: bool = True,
         samples_mc_entropy: int = 1,
-        entropy_coeff: float | Mapping[str, float] | None = None,
+        entropy_coeff: float | Mapping[str | tuple | list, float] | None = None,
         critic_coeff: float | None = None,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = False,
@@ -1369,7 +1392,7 @@ def __init__(
         samples_mc_kl: int = 1,
         entropy_bonus: bool = True,
         samples_mc_entropy: int = 1,
-        entropy_coeff: float | Mapping[str, float] | None = None,
+        entropy_coeff: float | Mapping[str | tuple | list, float] | None = None,
         critic_coeff: float | None = None,
         loss_critic_type: str = "smooth_l1",
         normalize_advantage: bool = False,