File tree Expand file tree Collapse file tree 13 files changed +1
-14
lines changed
grpo_gsm8k_experience_pipeline Expand file tree Collapse file tree 13 files changed +1
-14
lines changed Original file line number Diff line number Diff line change @@ -7,7 +7,6 @@ actor_rollout_ref:
77 use_remove_padding : True # False
88 actor :
99 strategy : fsdp # This is for backward-compatibility
10- ppo_mini_batch_size : 128
1110 ppo_micro_batch_size_per_gpu : 4
1211 use_dynamic_bsz : True # False
1312 ppo_max_token_len_per_gpu : 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
Original file line number Diff line number Diff line change @@ -7,7 +7,6 @@ actor_rollout_ref:
77 use_remove_padding : False
88 actor :
99 strategy : fsdp # This is for backward-compatibility
10- ppo_mini_batch_size : 32
1110 ppo_micro_batch_size_per_gpu : 2 # NOTE
1211 use_dynamic_bsz : False
1312 ppo_max_token_len_per_gpu : 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
Original file line number Diff line number Diff line change @@ -13,7 +13,7 @@ cluster:
1313 gpu_per_node : 8
1414buffer :
1515 total_epochs : 20
16- batch_size : 4
16+ batch_size : 32
1717 max_retry_times : 3
1818 max_retry_interval : 1
1919 explorer_input :
Original file line number Diff line number Diff line change @@ -7,7 +7,6 @@ actor_rollout_ref:
77 use_remove_padding : False
88 actor :
99 strategy : fsdp # This is for backward-compatibility
10- ppo_mini_batch_size : 1536
1110 ppo_micro_batch_size_per_gpu : 1
1211 use_dynamic_bsz : False
1312 ppo_max_token_len_per_gpu : 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
Original file line number Diff line number Diff line change @@ -7,7 +7,6 @@ actor_rollout_ref:
77 use_remove_padding : True # False
88 actor :
99 strategy : fsdp # This is for backward-compatibility
10- ppo_mini_batch_size : 128
1110 ppo_micro_batch_size_per_gpu : 4
1211 use_dynamic_bsz : True # False
1312 ppo_max_token_len_per_gpu : 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
Original file line number Diff line number Diff line change @@ -7,7 +7,6 @@ actor_rollout_ref:
77 use_remove_padding : True # False
88 actor :
99 strategy : fsdp # This is for backward-compatibility
10- ppo_mini_batch_size : 128
1110 ppo_micro_batch_size_per_gpu : 4
1211 use_dynamic_bsz : True # False
1312 ppo_max_token_len_per_gpu : 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
Original file line number Diff line number Diff line change @@ -7,7 +7,6 @@ actor_rollout_ref:
77 use_remove_padding : True # False
88 actor :
99 strategy : fsdp # This is for backward-compatibility
10- ppo_mini_batch_size : 128
1110 ppo_micro_batch_size_per_gpu : 4
1211 use_dynamic_bsz : True # False
1312 ppo_max_token_len_per_gpu : 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
Original file line number Diff line number Diff line change @@ -7,7 +7,6 @@ actor_rollout_ref:
77 use_remove_padding : True # False
88 actor :
99 strategy : fsdp # This is for backward-compatibility
10- ppo_mini_batch_size : 128
1110 ppo_micro_batch_size_per_gpu : 4
1211 use_dynamic_bsz : True # False
1312 ppo_max_token_len_per_gpu : 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
Original file line number Diff line number Diff line change @@ -7,7 +7,6 @@ actor_rollout_ref:
77 use_remove_padding : False
88 actor :
99 strategy : fsdp # This is for backward-compatibility
10- ppo_mini_batch_size : 1536
1110 ppo_micro_batch_size_per_gpu : 1
1211 use_dynamic_bsz : False
1312 ppo_max_token_len_per_gpu : 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
Original file line number Diff line number Diff line change @@ -7,7 +7,6 @@ actor_rollout_ref:
77 use_remove_padding : False
88 actor :
99 strategy : fsdp # This is for backward-compatibility
10- ppo_mini_batch_size : 1536
1110 ppo_micro_batch_size_per_gpu : 1
1211 use_dynamic_bsz : False
1312 ppo_max_token_len_per_gpu : 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
You can’t perform that action at this time.
0 commit comments