update configs to have same bs when tp>=4

yuki-97 · yuki-97 · commit e10241cac099 · 2026-02-02T00:08:11.000-08:00
Signed-off-by: Yuki Huang &lt;yukih@nvidia.com&gt;
diff --git a/examples/configs/grpo_math_70B_megatron.yaml b/examples/configs/grpo_math_70B_megatron.yaml
@@ -12,7 +12,7 @@ policy:
   train_global_batch_size: 512
   train_micro_batch_size: 1
   generation_batch_size: 32 # Only used when generating using HF backend
-  logprob_batch_size: 4
+  logprob_batch_size: 1
   max_total_sequence_length: 4096
   precision: "bfloat16"
 
diff --git a/examples/configs/recipes/llm/grpo-gemma3-27b-it-8n8g-fsdp2tp8-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-gemma3-27b-it-8n8g-fsdp2tp8-actckpt-long.yaml
@@ -11,7 +11,7 @@ policy:
   tokenizer:
     name: google/gemma-3-27b-it
   train_micro_batch_size: 1
-  logprob_batch_size: 2
+  logprob_batch_size: 1
   max_total_sequence_length: 16384
   dtensor_cfg:
     activation_checkpointing: true
diff --git a/examples/configs/recipes/llm/grpo-gptoss-20b-8n8g-megatron.yaml b/examples/configs/recipes/llm/grpo-gptoss-20b-8n8g-megatron.yaml
@@ -7,6 +7,7 @@ loss_fn:
 policy:
   model_name: openai/gpt-oss-20b
   train_micro_batch_size: 1
+  logprob_batch_size: 1
   max_total_sequence_length: 4096
   megatron_cfg:
     enabled: true
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long.v3.yaml
@@ -10,7 +10,7 @@ policy:
   tokenizer:
     name: Qwen/Qwen2.5-32B
   train_micro_batch_size: 1
-  logprob_batch_size: 2
+  logprob_batch_size: 1
   max_total_sequence_length: 16384
   dtensor_cfg:
     activation_checkpointing: true
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt.v3.yaml
@@ -10,7 +10,7 @@ policy:
   tokenizer:
     name: Qwen/Qwen2.5-32B
   train_micro_batch_size: 1
-  logprob_batch_size: 2
+  logprob_batch_size: 1
   max_total_sequence_length: 16384
   dtensor_cfg:
     activation_checkpointing: true
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.yaml
@@ -10,7 +10,7 @@ policy:
   tokenizer:
     name: Qwen/Qwen2.5-7B-Instruct
   train_micro_batch_size: 1
-  logprob_batch_size: 2
+  logprob_batch_size: 1
   max_total_sequence_length: 4096
   dtensor_cfg:
     tensor_parallel_size: 4
diff --git a/examples/configs/recipes/llm/grpo-qwen3-30ba3b-8n8g-megatron.yaml b/examples/configs/recipes/llm/grpo-qwen3-30ba3b-8n8g-megatron.yaml
@@ -8,6 +8,7 @@ checkpointing:
 policy:
   model_name: Qwen/Qwen3-30B-A3B
   train_micro_batch_size: 1
+  logprob_batch_size: 1
   max_total_sequence_length: 4096
   dtensor_cfg:
     enabled: false