We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 660ffd8 commit 5161aafCopy full SHA for 5161aaf
scripts/train_grpo.py
@@ -58,7 +58,7 @@ class TrainingConfig:
58
num_train_epochs: int = 1
59
rollouts_per_example: int = 16
60
batch_size: int = 32
61
- micro_batch_size: int = 8
+ micro_batch_size: int = 2 # Keep small to avoid OOM during backward pass
62
learning_rate: float = 1e-6
63
max_seq_len: int = 1024 # Reduced - poems are small
64
max_prompt_len: int = 384
0 commit comments