Fix doc/config issues about prompt/response/sequence lengths (#370)

yanxi-chen · web-flow · commit db9012f27866 · 2025-11-07T15:41:31.000+08:00
diff --git a/docs/sphinx_doc/source/tutorial/example_async_mode.md b/docs/sphinx_doc/source/tutorial/example_async_mode.md
@@ -21,6 +21,8 @@ algorithm:
   repeat_times: 8
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 1024
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 4
@@ -69,6 +71,8 @@ algorithm:
     lr: 1e-6
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 1024
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 4
@@ -128,6 +132,8 @@ algorithm:
   repeat_times: 8
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 1024
+  max_model_len: 2048
 cluster:  # important
   node_num: 1
   gpu_per_node: 8
diff --git a/docs/sphinx_doc/source/tutorial/example_dpo.md b/docs/sphinx_doc/source/tutorial/example_dpo.md
@@ -66,6 +66,8 @@ algorithm:
 checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 1024
+  max_model_len: 1536
 cluster:
   node_num: 1
   gpu_per_node: 8
@@ -114,6 +116,8 @@ algorithm:
 checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 10240
+  max_model_len: 10752
 cluster:
   node_num: 1
   gpu_per_node: 2
diff --git a/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md b/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md
@@ -59,6 +59,8 @@ algorithm:
     lr: 1e-5
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 1024
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 2
diff --git a/docs/sphinx_doc/source_zh/tutorial/example_async_mode.md b/docs/sphinx_doc/source_zh/tutorial/example_async_mode.md
@@ -21,6 +21,8 @@ algorithm:
   repeat_times: 8
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 1024
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 4
@@ -69,6 +71,8 @@ algorithm:
     lr: 1e-6
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 1024
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 4
@@ -128,6 +132,8 @@ algorithm:
   repeat_times: 8
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 1024
+  max_model_len: 2048
 cluster:  # important
   node_num: 1
   gpu_per_node: 8
diff --git a/docs/sphinx_doc/source_zh/tutorial/example_dpo.md b/docs/sphinx_doc/source_zh/tutorial/example_dpo.md
@@ -68,6 +68,8 @@ algorithm:
 checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 1024
+  max_model_len: 1536
 cluster:
   node_num: 1
   gpu_per_node: 8
@@ -116,6 +118,8 @@ algorithm:
 checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 10240
+  max_model_len: 10752
 cluster:
   node_num: 1
   gpu_per_node: 2
diff --git a/docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md b/docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md
@@ -59,6 +59,8 @@ algorithm:
     lr: 1e-5
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
+  max_response_tokens: 1024
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 2
diff --git a/examples/asymre_gsm8k/gsm8k.yaml b/examples/asymre_gsm8k/gsm8k.yaml
@@ -8,7 +8,7 @@ checkpoint_root_dir:  ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 algorithm:
   algorithm_type: asymre
   repeat_times: 8
diff --git a/examples/asymre_math/math.yaml b/examples/asymre_math/math.yaml
@@ -6,8 +6,8 @@ name: asymre_math
 checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct} # the path to your model
-  max_response_tokens: 1024
-  max_model_len: 1280
+  max_prompt_tokens: 1024
+  max_response_tokens: 2048
 algorithm:
   algorithm_type: asymre
   repeat_times: 8
@@ -62,8 +62,6 @@ explorer:
     enable_prefix_caching: false
     enforce_eager: true
     dtype: bfloat16
-    max_prompt_tokens: 1024
-    max_response_tokens: 2048
     seed: 42
 synchronizer:
   sync_method: 'nccl'
diff --git a/examples/async_gsm8k/explorer.yaml b/examples/async_gsm8k/explorer.yaml
@@ -8,7 +8,7 @@ algorithm:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 4
diff --git a/examples/async_gsm8k/trainer.yaml b/examples/async_gsm8k/trainer.yaml
@@ -10,7 +10,7 @@ algorithm:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 4
diff --git a/examples/cispo_gsm8k/gsm8k.yaml b/examples/cispo_gsm8k/gsm8k.yaml
@@ -9,7 +9,7 @@ algorithm:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 8
diff --git a/examples/grpo_gsm8k/gsm8k.yaml b/examples/grpo_gsm8k/gsm8k.yaml
@@ -9,7 +9,7 @@ algorithm:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 8
diff --git a/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml b/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml
@@ -29,7 +29,7 @@ algorithm:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 8
diff --git a/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml b/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml
@@ -11,7 +11,7 @@ algorithm:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 8
diff --git a/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml b/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml
@@ -28,7 +28,7 @@ data_processor:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 8
diff --git a/examples/grpo_lora_gsm8k/gsm8k.yaml b/examples/grpo_lora_gsm8k/gsm8k.yaml
@@ -9,7 +9,7 @@ algorithm:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
   lora_configs:
   - name: lora
     lora_rank: 32
diff --git a/examples/ppo_countdown/countdown.yaml b/examples/ppo_countdown/countdown.yaml
@@ -9,7 +9,7 @@ algorithm:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 8
diff --git a/examples/ppo_countdown_megatron/countdown.yaml b/examples/ppo_countdown_megatron/countdown.yaml
@@ -9,7 +9,7 @@ algorithm:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 8
diff --git a/examples/rec_gsm8k/gsm8k.yaml b/examples/rec_gsm8k/gsm8k.yaml
@@ -5,7 +5,7 @@ mode: both
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 algorithm:
   algorithm_type: rec
   repeat_times: 8
diff --git a/examples/sppo_gsm8k/gsm8k.yaml b/examples/sppo_gsm8k/gsm8k.yaml
@@ -8,7 +8,7 @@ checkpoint_root_dir:  ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 algorithm:
   algorithm_type: sppo
   repeat_times: 8
diff --git a/examples/topr_gsm8k/gsm8k.yaml b/examples/topr_gsm8k/gsm8k.yaml
@@ -9,7 +9,7 @@ algorithm:
 model:
   model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
   max_response_tokens: 1024
-  max_model_len: 1280
+  max_model_len: 2048
 cluster:
   node_num: 1
   gpu_per_node: 8