Update AsyncGRPOConfig chunk_lm_head default to 8192

AmineDiro · AmineDiro · commit ed9c759918a5 · 2026-03-23T15:34:25.000Z
diff --git a/trl/experimental/async_grpo/async_grpo_config.py b/trl/experimental/async_grpo/async_grpo_config.py
@@ -61,12 +61,11 @@ class AsyncGRPOConfig(_BaseConfig):
 
         > Parameters that control the LM head
 
-        chunk_lm_head (`int` or `None`, *optional*, defaults to `None`):
+        chunk_lm_head (`int` or `None`, *optional*, defaults to `8192`):
             Chunk size for the fused LM head. When set, the lm_head computes log-probs and entropy without
             materializing the full `[batch, seq, vocab]` logits tensor, processing the vocabulary in chunks of this
             size instead. Reduces peak memory at the cost of extra matmuls. If `None`, uses the standard full-logits
             path.
-
         > Parameters that control the async rollout pipeline
 
         max_inflight_tasks (`int`, *optional*, defaults to `-1`):
@@ -168,7 +167,7 @@ class AsyncGRPOConfig(_BaseConfig):
 
     # Parameters that control the LM head
     chunk_lm_head_size: int | None = field(
-        default=None,
+        default=8192,
         metadata={
             "help": "Chunk size for the fused LM head. When set, the lm_head computes log-probs and entropy "
             "without materializing the full [batch, seq, vocab] logits tensor, processing the vocabulary in "
@@ -177,7 +176,6 @@ class AsyncGRPOConfig(_BaseConfig):
             "forward pass)."
         },
     )
-
     # Parameters that control the async rollout pipeline
     max_inflight_tasks: int = field(
         default=-1,