hotfix

YeAnbang · YeAnbang · commit 5c5cb1863b86 · 2025-07-21T18:04:20.000+08:00
diff --git a/applications/ColossalChat/coati/distributed/grpo_consumer.py b/applications/ColossalChat/coati/distributed/grpo_consumer.py
@@ -371,20 +371,6 @@ def _criterion(outputs, inputs):
                             kl = all_reduce_mean(torch.mean(torch.stack(kl)).to(loss.device), self.plugin).data
                             mean_kl.append(kl)
                         mean_loss.append(all_reduce_mean(loss, self.plugin).data)
-                        mini_batch_entropies.append(
-                            all_reduce_mean(
-                                (
-                                    (
-                                        (
-                                            entropy_from_logits(policy_model_logits[:, -num_action:])
-                                            * action_mask_forward_micro_batch
-                                        ).sum(-1)
-                                    )
-                                    / action_mask_forward_micro_batch.sum(-1)
-                                ).detach(),
-                                self.plugin,
-                            )
-                        )
                 else:
                     policy_model_logits = self.policy_model(
                         input_ids=input_ids_forward_micro_batch,