fix entropy bug from scalar to tensor input to loss function (#1524)

epoint95 · Edward Point · web-flow · commit 5a0119cd88e6 · 2026-01-29T09:17:59.000-08:00
Co-authored-by: Edward Point &lt;edwardpoint@MacBook-Pro.local&gt;
diff --git a/docs/tutorials/training_agents/vector_a2c.py b/docs/tutorials/training_agents/vector_a2c.py
@@ -442,6 +442,7 @@ def update_parameters(
     ep_value_preds = torch.zeros(n_steps_per_update, n_envs, device=device)
     ep_rewards = torch.zeros(n_steps_per_update, n_envs, device=device)
     ep_action_log_probs = torch.zeros(n_steps_per_update, n_envs, device=device)
+    ep_entropies = torch.zeros(n_steps_per_update, n_envs, device=device)
     masks = torch.zeros(n_steps_per_update, n_envs, device=device)
 
     # at the start of training reset all envs to get an initial state
@@ -463,6 +464,7 @@ def update_parameters(
         ep_value_preds[step] = torch.squeeze(state_value_preds)
         ep_rewards[step] = torch.tensor(rewards, device=device)
         ep_action_log_probs[step] = action_log_probs
+        ep_entropies[step] = entropy
 
         # add a mask (for the return calculation later);
         # for each env the mask is 1 if the episode is ongoing and 0 if it is terminated (not by truncation!)
@@ -473,7 +475,7 @@ def update_parameters(
         ep_rewards,
         ep_action_log_probs,
         ep_value_preds,
-        entropy,
+        ep_entropies,
         masks,
         gamma,
         lam,
@@ -487,7 +489,7 @@ def update_parameters(
     # log the losses and entropy
     critic_losses.append(critic_loss.detach().cpu().numpy())
     actor_losses.append(actor_loss.detach().cpu().numpy())
-    entropies.append(entropy.detach().mean().cpu().numpy())
+    entropies.append(ep_entropies.detach().mean().cpu().numpy())
 
 
 # %%