Skip to content

Commit 90542b0

Browse files
committed
all local_batch_size
1 parent ed225df commit 90542b0

File tree

5 files changed

+15
-15
lines changed

5 files changed

+15
-15
lines changed

apps/mast/qwen3_14b_mast.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# Global configuration
55
group_size: 8
6-
batch_size: 16
6+
local_batch_size: 16 # per-device batch size
77
max_req_tokens: 512
88
max_res_tokens: 512
99
model: "Qwen/Qwen3-14B"
@@ -61,7 +61,7 @@ trainer:
6161
lr_scheduler:
6262
warmup_steps: 1
6363
training:
64-
local_batch_size: ${batch_size}
64+
local_batch_size: ${local_batch_size}
6565
seq_len: 2048
6666
max_norm: 1.0
6767
steps: 1000000
@@ -95,7 +95,7 @@ trainer:
9595

9696
# Replay buffer configuration
9797
replay_buffer:
98-
batch_size: ${batch_size}
98+
batch_size: ${local_batch_size}
9999
max_policy_age: ${off_by_n}
100100
dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
101101

apps/mast/qwen3_1_7b_mast.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# Global configuration
55
group_size: 8
6-
batch_size: 16
6+
local_batch_size: 16 # per-device batch size
77
max_req_tokens: 512
88
max_res_tokens: 512
99
model: "Qwen/Qwen3-1.7B"
@@ -61,7 +61,7 @@ trainer:
6161
lr_scheduler:
6262
warmup_steps: 1
6363
training:
64-
local_batch_size: ${batch_size}
64+
local_batch_size: ${local_batch_size}
6565
seq_len: 2048
6666
max_norm: 1.0
6767
steps: 1000000
@@ -95,7 +95,7 @@ trainer:
9595

9696
# Replay buffer configuration
9797
replay_buffer:
98-
batch_size: ${batch_size}
98+
batch_size: ${local_batch_size}
9999
max_policy_age: ${off_by_n}
100100
dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
101101

apps/mast/qwen3_32b_mast.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# Global configuration
55
group_size: 16
6-
batch_size: 32
6+
local_batch_size: 32 # per-device batch size
77
max_req_tokens: 1024
88
max_res_tokens: 1024
99
model: "Qwen/Qwen3-32B"
@@ -61,7 +61,7 @@ trainer:
6161
lr_scheduler:
6262
warmup_steps: 1
6363
training:
64-
local_batch_size: ${batch_size}
64+
local_batch_size: ${local_batch_size}
6565
seq_len: 2048
6666
max_norm: 1.0
6767
steps: 1000000
@@ -95,7 +95,7 @@ trainer:
9595

9696
# Replay buffer configuration
9797
replay_buffer:
98-
batch_size: ${batch_size}
98+
batch_size: ${local_batch_size}
9999
max_policy_age: ${off_by_n}
100100
dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
101101

apps/mast/qwen3_4b_mast.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# Global configuration
55
group_size: 8
6-
batch_size: 16
6+
local_batch_size: 16 # per-device batch size
77
max_req_tokens: 512
88
max_res_tokens: 512
99
model: "Qwen/Qwen3-4B"
@@ -61,7 +61,7 @@ trainer:
6161
lr_scheduler:
6262
warmup_steps: 1
6363
training:
64-
local_batch_size: ${batch_size}
64+
local_batch_size: ${local_batch_size}
6565
seq_len: 2048
6666
max_norm: 1.0
6767
steps: 1000000
@@ -95,7 +95,7 @@ trainer:
9595

9696
# Replay buffer configuration
9797
replay_buffer:
98-
batch_size: ${batch_size}
98+
batch_size: ${local_batch_size}
9999
max_policy_age: ${off_by_n}
100100
dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
101101

apps/mast/qwen3_8b_mast.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# Global configuration
55
group_size: 8
6-
batch_size: 16
6+
local_batch_size: 16 # per-device batch size
77
max_req_tokens: 512
88
max_res_tokens: 512
99
model: "Qwen/Qwen3-8B"
@@ -61,7 +61,7 @@ trainer:
6161
lr_scheduler:
6262
warmup_steps: 1
6363
training:
64-
local_batch_size: ${batch_size}
64+
local_batch_size: ${local_batch_size}
6565
seq_len: 2048
6666
max_norm: 1.0
6767
steps: 1000000
@@ -95,7 +95,7 @@ trainer:
9595

9696
# Replay buffer configuration
9797
replay_buffer:
98-
batch_size: ${batch_size}
98+
batch_size: ${local_batch_size}
9999
max_policy_age: ${off_by_n}
100100
dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
101101

0 commit comments

Comments
 (0)