hyperparameter

casteryh · casteryh · commit b487c1b1e1d5 · 2025-11-24T11:18:49.000-08:00
diff --git a/apps/grpo/main.py b/apps/grpo/main.py
@@ -129,7 +129,7 @@ def simple_grpo_loss(
     ref_logprobs: torch.Tensor,
     advantages: torch.Tensor,
     padding_mask: torch.Tensor,
-    beta: float = 1e-5,
+    beta: float = 1e-6,
 ) -> torch.Tensor:
     logprobs: torch.Tensor = compute_logprobs(logits, response)
     kl = torch.exp(ref_logprobs - logprobs) - (ref_logprobs - logprobs) - 1
diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml
@@ -2,8 +2,8 @@
 # >>> python -m apps.grpo.main --config apps/grpo/qwen3_8b.yaml
 
 # Global configuration
-group_size: 8
-local_batch_size: 8 # per-device batch size
+group_size: 16
+local_batch_size: 4 # per-device batch size
 max_req_tokens: 1024
 max_res_tokens: 2048
 model: "Qwen/Qwen3-8B"