We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4318d83 commit 2cc6c6bCopy full SHA for 2cc6c6b
apps/grpo/main.py
@@ -129,7 +129,7 @@ def simple_grpo_loss(
129
ref_logprobs: torch.Tensor,
130
advantages: torch.Tensor,
131
padding_mask: torch.Tensor,
132
- beta: float = 1e-4,
+ beta: float = 1e-5,
133
) -> torch.Tensor:
134
logprobs: torch.Tensor = compute_logprobs(logits, response)
135
kl = torch.exp(ref_logprobs - logprobs) - (ref_logprobs - logprobs) - 1
0 commit comments