Increase batch sizes for 270M model

darrenangle · darrenangle · commit 7ea91267a1bc · 2025-12-23T06:27:48.000-06:00
- rollouts_per_example: 8 -&gt; 16
- batch_size: 8 -&gt; 32
- micro_batch_size: 1 -&gt; 8

Better GPU utilization for the small model.
diff --git a/scripts/train_grpo.py b/scripts/train_grpo.py
@@ -53,9 +53,9 @@ class TrainingConfig:
 
     # Training hyperparameters
     num_train_epochs: int = 1
-    rollouts_per_example: int = 8
-    batch_size: int = 8  # Reduced for OOM
-    micro_batch_size: int = 1  # Reduced for OOM
+    rollouts_per_example: int = 16
+    batch_size: int = 32
+    micro_batch_size: int = 8
     learning_rate: float = 1e-6
     max_seq_len: int = 1024  # Reduced - poems are small
     max_prompt_len: int = 384