Skip to content

Commit 4568943

Browse files
committed
rm a verl param
1 parent e4f0e90 commit 4568943

File tree

13 files changed

+1
-14
lines changed

13 files changed

+1
-14
lines changed

examples/async_gsm8k/verl_config.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ actor_rollout_ref:
77
use_remove_padding: True # False
88
actor:
99
strategy: fsdp # This is for backward-compatibility
10-
ppo_mini_batch_size: 128
1110
ppo_micro_batch_size_per_gpu: 4
1211
use_dynamic_bsz: True # False
1312
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}

examples/dpo_humanlike/train_dpo.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ actor_rollout_ref:
77
use_remove_padding: False
88
actor:
99
strategy: fsdp # This is for backward-compatibility
10-
ppo_mini_batch_size: 32
1110
ppo_micro_batch_size_per_gpu: 2 # NOTE
1211
use_dynamic_bsz: False
1312
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}

examples/grpo_alfworld/alfworld.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ cluster:
1313
gpu_per_node: 8
1414
buffer:
1515
total_epochs: 20
16-
batch_size: 4
16+
batch_size: 32
1717
max_retry_times: 3
1818
max_retry_interval: 1
1919
explorer_input:

examples/grpo_alfworld/train_alfworld.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ actor_rollout_ref:
77
use_remove_padding: False
88
actor:
99
strategy: fsdp # This is for backward-compatibility
10-
ppo_mini_batch_size: 1536
1110
ppo_micro_batch_size_per_gpu: 1
1211
use_dynamic_bsz: False
1312
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}

examples/grpo_gsm8k/train_gsm8k.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ actor_rollout_ref:
77
use_remove_padding: True # False
88
actor:
99
strategy: fsdp # This is for backward-compatibility
10-
ppo_mini_batch_size: 128
1110
ppo_micro_batch_size_per_gpu: 4
1211
use_dynamic_bsz: True # False
1312
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}

examples/grpo_gsm8k_experience_pipeline/train_gsm8k.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ actor_rollout_ref:
77
use_remove_padding: True # False
88
actor:
99
strategy: fsdp # This is for backward-compatibility
10-
ppo_mini_batch_size: 128
1110
ppo_micro_batch_size_per_gpu: 4
1211
use_dynamic_bsz: True # False
1312
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}

examples/grpo_gsm8k_task_pipeline/train_gsm8k.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ actor_rollout_ref:
77
use_remove_padding: True # False
88
actor:
99
strategy: fsdp # This is for backward-compatibility
10-
ppo_mini_batch_size: 128
1110
ppo_micro_batch_size_per_gpu: 4
1211
use_dynamic_bsz: True # False
1312
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}

examples/grpo_math/train_math.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ actor_rollout_ref:
77
use_remove_padding: True # False
88
actor:
99
strategy: fsdp # This is for backward-compatibility
10-
ppo_mini_batch_size: 128
1110
ppo_micro_batch_size_per_gpu: 4
1211
use_dynamic_bsz: True # False
1312
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}

examples/grpo_sciworld/train_sciworld.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ actor_rollout_ref:
77
use_remove_padding: False
88
actor:
99
strategy: fsdp # This is for backward-compatibility
10-
ppo_mini_batch_size: 1536
1110
ppo_micro_batch_size_per_gpu: 1
1211
use_dynamic_bsz: False
1312
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}

examples/grpo_webshop/train_webshop.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ actor_rollout_ref:
77
use_remove_padding: False
88
actor:
99
strategy: fsdp # This is for backward-compatibility
10-
ppo_mini_batch_size: 1536
1110
ppo_micro_batch_size_per_gpu: 1
1211
use_dynamic_bsz: False
1312
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}

0 commit comments

Comments
 (0)