We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6186f9f commit c640d37Copy full SHA for c640d37
sandbox/grpo_language/main.py
@@ -125,7 +125,7 @@ def simple_grpo_loss(
125
ref_logprobs: torch.Tensor,
126
advantages: torch.Tensor,
127
padding_mask: torch.Tensor,
128
- beta: float = 0.1,
+ beta: float = 0.0,
129
) -> torch.Tensor:
130
logprobs: torch.Tensor = compute_logprobs(logits, response)
131
kl = torch.exp(ref_logprobs - logprobs) - (ref_logprobs - logprobs) - 1
0 commit comments