Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/sphinx_doc/source/tutorial/example_async_mode.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ algorithm:
repeat_times: 8
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 4
Expand Down Expand Up @@ -69,6 +71,8 @@ algorithm:
lr: 1e-6
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 4
Expand Down Expand Up @@ -128,6 +132,8 @@ algorithm:
repeat_times: 8
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 2048
cluster: # important
node_num: 1
gpu_per_node: 8
Expand Down
4 changes: 4 additions & 0 deletions docs/sphinx_doc/source/tutorial/example_dpo.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ algorithm:
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1536
cluster:
node_num: 1
gpu_per_node: 8
Expand Down Expand Up @@ -114,6 +116,8 @@ algorithm:
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 10240
max_model_len: 10752
cluster:
node_num: 1
gpu_per_node: 2
Expand Down
2 changes: 2 additions & 0 deletions docs/sphinx_doc/source/tutorial/example_reasoning_basic.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ algorithm:
lr: 1e-5
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 2
Expand Down
6 changes: 6 additions & 0 deletions docs/sphinx_doc/source_zh/tutorial/example_async_mode.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ algorithm:
repeat_times: 8
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 4
Expand Down Expand Up @@ -69,6 +71,8 @@ algorithm:
lr: 1e-6
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 4
Expand Down Expand Up @@ -128,6 +132,8 @@ algorithm:
repeat_times: 8
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 2048
cluster: # important
node_num: 1
gpu_per_node: 8
Expand Down
4 changes: 4 additions & 0 deletions docs/sphinx_doc/source_zh/tutorial/example_dpo.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ algorithm:
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1536
cluster:
node_num: 1
gpu_per_node: 8
Expand Down Expand Up @@ -116,6 +118,8 @@ algorithm:
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 10240
max_model_len: 10752
cluster:
node_num: 1
gpu_per_node: 2
Expand Down
2 changes: 2 additions & 0 deletions docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ algorithm:
lr: 1e-5
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 2
Expand Down
2 changes: 1 addition & 1 deletion examples/asymre_gsm8k/gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
algorithm:
algorithm_type: asymre
repeat_times: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/asymre_math/math.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct} # the path to your model
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
algorithm:
algorithm_type: asymre
repeat_times: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/async_gsm8k/explorer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ algorithm:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 4
Expand Down
2 changes: 1 addition & 1 deletion examples/async_gsm8k/trainer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ algorithm:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 4
Expand Down
2 changes: 1 addition & 1 deletion examples/cispo_gsm8k/gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ algorithm:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/grpo_gsm8k/gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ algorithm:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ algorithm:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ algorithm:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/grpo_gsm8k_task_pipeline/gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ data_processor:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/grpo_lora_gsm8k/gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ algorithm:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
lora_configs:
- name: lora
lora_rank: 32
Expand Down
2 changes: 1 addition & 1 deletion examples/ppo_countdown/countdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ algorithm:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/ppo_countdown_megatron/countdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ algorithm:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/rec_gsm8k/gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mode: both
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
algorithm:
algorithm_type: rec
repeat_times: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/sppo_gsm8k/gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
algorithm:
algorithm_type: sppo
repeat_times: 8
Expand Down
2 changes: 1 addition & 1 deletion examples/topr_gsm8k/gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ algorithm:
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_response_tokens: 1024
max_model_len: 1280
max_model_len: 2048
cluster:
node_num: 1
gpu_per_node: 8
Expand Down