File tree Expand file tree Collapse file tree 5 files changed +15
-15
lines changed Expand file tree Collapse file tree 5 files changed +15
-15
lines changed Original file line number Diff line number Diff line change 33
44# Global configuration
55group_size : 8
6- batch_size : 16
6+ local_batch_size : 16 # per-device batch size
77max_req_tokens : 512
88max_res_tokens : 512
99model : " Qwen/Qwen3-14B"
@@ -61,7 +61,7 @@ trainer:
6161 lr_scheduler :
6262 warmup_steps : 1
6363 training :
64- local_batch_size : ${batch_size }
64+ local_batch_size : ${local_batch_size }
6565 seq_len : 2048
6666 max_norm : 1.0
6767 steps : 1000000
@@ -95,7 +95,7 @@ trainer:
9595
9696# Replay buffer configuration
9797replay_buffer :
98- batch_size : ${batch_size }
98+ batch_size : ${local_batch_size }
9999 max_policy_age : ${off_by_n}
100100 dp_size : ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
101101
Original file line number Diff line number Diff line change 33
44# Global configuration
55group_size : 8
6- batch_size : 16
6+ local_batch_size : 16 # per-device batch size
77max_req_tokens : 512
88max_res_tokens : 512
99model : " Qwen/Qwen3-1.7B"
@@ -61,7 +61,7 @@ trainer:
6161 lr_scheduler :
6262 warmup_steps : 1
6363 training :
64- local_batch_size : ${batch_size }
64+ local_batch_size : ${local_batch_size }
6565 seq_len : 2048
6666 max_norm : 1.0
6767 steps : 1000000
@@ -95,7 +95,7 @@ trainer:
9595
9696# Replay buffer configuration
9797replay_buffer :
98- batch_size : ${batch_size }
98+ batch_size : ${local_batch_size }
9999 max_policy_age : ${off_by_n}
100100 dp_size : ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
101101
Original file line number Diff line number Diff line change 33
44# Global configuration
55group_size : 16
6- batch_size : 32
6+ local_batch_size : 32 # per-device batch size
77max_req_tokens : 1024
88max_res_tokens : 1024
99model : " Qwen/Qwen3-32B"
@@ -61,7 +61,7 @@ trainer:
6161 lr_scheduler :
6262 warmup_steps : 1
6363 training :
64- local_batch_size : ${batch_size }
64+ local_batch_size : ${local_batch_size }
6565 seq_len : 2048
6666 max_norm : 1.0
6767 steps : 1000000
@@ -95,7 +95,7 @@ trainer:
9595
9696# Replay buffer configuration
9797replay_buffer :
98- batch_size : ${batch_size }
98+ batch_size : ${local_batch_size }
9999 max_policy_age : ${off_by_n}
100100 dp_size : ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
101101
Original file line number Diff line number Diff line change 33
44# Global configuration
55group_size : 8
6- batch_size : 16
6+ local_batch_size : 16 # per-device batch size
77max_req_tokens : 512
88max_res_tokens : 512
99model : " Qwen/Qwen3-4B"
@@ -61,7 +61,7 @@ trainer:
6161 lr_scheduler :
6262 warmup_steps : 1
6363 training :
64- local_batch_size : ${batch_size }
64+ local_batch_size : ${local_batch_size }
6565 seq_len : 2048
6666 max_norm : 1.0
6767 steps : 1000000
@@ -95,7 +95,7 @@ trainer:
9595
9696# Replay buffer configuration
9797replay_buffer :
98- batch_size : ${batch_size }
98+ batch_size : ${local_batch_size }
9999 max_policy_age : ${off_by_n}
100100 dp_size : ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
101101
Original file line number Diff line number Diff line change 33
44# Global configuration
55group_size : 8
6- batch_size : 16
6+ local_batch_size : 16 # per-device batch size
77max_req_tokens : 512
88max_res_tokens : 512
99model : " Qwen/Qwen3-8B"
@@ -61,7 +61,7 @@ trainer:
6161 lr_scheduler :
6262 warmup_steps : 1
6363 training :
64- local_batch_size : ${batch_size }
64+ local_batch_size : ${local_batch_size }
6565 seq_len : 2048
6666 max_norm : 1.0
6767 steps : 1000000
@@ -95,7 +95,7 @@ trainer:
9595
9696# Replay buffer configuration
9797replay_buffer :
98- batch_size : ${batch_size }
98+ batch_size : ${local_batch_size }
9999 max_policy_age : ${off_by_n}
100100 dp_size : ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
101101
You can’t perform that action at this time.
0 commit comments