all local_batch_size

allenwang28 · allenwang28 · commit 90542b053955 · 2025-10-10T12:26:30.000-07:00
diff --git a/apps/mast/qwen3_14b_mast.yaml b/apps/mast/qwen3_14b_mast.yaml
@@ -3,7 +3,7 @@
 
 # Global configuration
 group_size: 8
-batch_size: 16
+local_batch_size: 16 # per-device batch size
 max_req_tokens: 512
 max_res_tokens: 512
 model: "Qwen/Qwen3-14B"
@@ -61,7 +61,7 @@ trainer:
   lr_scheduler:
     warmup_steps: 1
   training:
-    local_batch_size: ${batch_size}
+    local_batch_size: ${local_batch_size}
     seq_len: 2048
     max_norm: 1.0
     steps: 1000000
@@ -95,7 +95,7 @@ trainer:
 
 # Replay buffer configuration
 replay_buffer:
-  batch_size: ${batch_size}
+  batch_size: ${local_batch_size}
   max_policy_age: ${off_by_n}
   dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
 
diff --git a/apps/mast/qwen3_1_7b_mast.yaml b/apps/mast/qwen3_1_7b_mast.yaml
@@ -3,7 +3,7 @@
 
 # Global configuration
 group_size: 8
-batch_size: 16
+local_batch_size: 16 # per-device batch size
 max_req_tokens: 512
 max_res_tokens: 512
 model: "Qwen/Qwen3-1.7B"
@@ -61,7 +61,7 @@ trainer:
   lr_scheduler:
     warmup_steps: 1
   training:
-    local_batch_size: ${batch_size}
+    local_batch_size: ${local_batch_size}
     seq_len: 2048
     max_norm: 1.0
     steps: 1000000
@@ -95,7 +95,7 @@ trainer:
 
 # Replay buffer configuration
 replay_buffer:
-  batch_size: ${batch_size}
+  batch_size: ${local_batch_size}
   max_policy_age: ${off_by_n}
   dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
 
diff --git a/apps/mast/qwen3_32b_mast.yaml b/apps/mast/qwen3_32b_mast.yaml
@@ -3,7 +3,7 @@
 
 # Global configuration
 group_size: 16
-batch_size: 32
+local_batch_size: 32 # per-device batch size
 max_req_tokens: 1024
 max_res_tokens: 1024
 model: "Qwen/Qwen3-32B"
@@ -61,7 +61,7 @@ trainer:
   lr_scheduler:
     warmup_steps: 1
   training:
-    local_batch_size: ${batch_size}
+    local_batch_size: ${local_batch_size}
     seq_len: 2048
     max_norm: 1.0
     steps: 1000000
@@ -95,7 +95,7 @@ trainer:
 
 # Replay buffer configuration
 replay_buffer:
-  batch_size: ${batch_size}
+  batch_size: ${local_batch_size}
   max_policy_age: ${off_by_n}
   dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
 
diff --git a/apps/mast/qwen3_4b_mast.yaml b/apps/mast/qwen3_4b_mast.yaml
@@ -3,7 +3,7 @@
 
 # Global configuration
 group_size: 8
-batch_size: 16
+local_batch_size: 16 # per-device batch size
 max_req_tokens: 512
 max_res_tokens: 512
 model: "Qwen/Qwen3-4B"
@@ -61,7 +61,7 @@ trainer:
   lr_scheduler:
     warmup_steps: 1
   training:
-    local_batch_size: ${batch_size}
+    local_batch_size: ${local_batch_size}
     seq_len: 2048
     max_norm: 1.0
     steps: 1000000
@@ -95,7 +95,7 @@ trainer:
 
 # Replay buffer configuration
 replay_buffer:
-  batch_size: ${batch_size}
+  batch_size: ${local_batch_size}
   max_policy_age: ${off_by_n}
   dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
 
diff --git a/apps/mast/qwen3_8b_mast.yaml b/apps/mast/qwen3_8b_mast.yaml
@@ -3,7 +3,7 @@
 
 # Global configuration
 group_size: 8
-batch_size: 16
+local_batch_size: 16 # per-device batch size
 max_req_tokens: 512
 max_res_tokens: 512
 model: "Qwen/Qwen3-8B"
@@ -61,7 +61,7 @@ trainer:
   lr_scheduler:
     warmup_steps: 1
   training:
-    local_batch_size: ${batch_size}
+    local_batch_size: ${local_batch_size}
     seq_len: 2048
     max_norm: 1.0
     steps: 1000000
@@ -95,7 +95,7 @@ trainer:
 
 # Replay buffer configuration
 replay_buffer:
-  batch_size: ${batch_size}
+  batch_size: ${local_batch_size}
   max_policy_age: ${off_by_n}
   dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree