We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e774ede commit 5c5cb18Copy full SHA for 5c5cb18
applications/ColossalChat/coati/distributed/grpo_consumer.py
@@ -371,20 +371,6 @@ def _criterion(outputs, inputs):
371
kl = all_reduce_mean(torch.mean(torch.stack(kl)).to(loss.device), self.plugin).data
372
mean_kl.append(kl)
373
mean_loss.append(all_reduce_mean(loss, self.plugin).data)
374
- mini_batch_entropies.append(
375
- all_reduce_mean(
376
- (
377
378
379
- entropy_from_logits(policy_model_logits[:, -num_action:])
380
- * action_mask_forward_micro_batch
381
- ).sum(-1)
382
- )
383
- / action_mask_forward_micro_batch.sum(-1)
384
- ).detach(),
385
- self.plugin,
386
387
388
else:
389
policy_model_logits = self.policy_model(
390
input_ids=input_ids_forward_micro_batch,
0 commit comments