Skip to content

Commit 2df31f0

Browse files
committed
Add scripts to search context length capacity on given settings.
1 parent 54c2d72 commit 2df31f0

File tree

3 files changed

+527
-0
lines changed

3 files changed

+527
-0
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
mode: both
2+
project: Trinity-RFT-context-length-exp
3+
group: length-test
4+
name: length-test
5+
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints/length-test}
6+
continue_from_checkpoint: false
7+
algorithm:
8+
algorithm_type: grpo
9+
repeat_times: ${oc.env:REPEAT_TIMES,8}
10+
advantage_fn: grpo
11+
sample_strategy: default
12+
policy_loss_fn: ppo
13+
kl_penalty_fn: none
14+
kl_loss_fn: k2
15+
entropy_loss_fn: default
16+
optimizer:
17+
lr: 1.0e-05
18+
lr_warmup_steps_ratio: 0.0
19+
warmup_style: constant
20+
data_processor: {}
21+
model:
22+
model_path: ${oc.env:MODEL_PATH,Qwen/Qwen3-0.6B}
23+
max_prompt_tokens: ${oc.env:PROMPT_LEN,2048}
24+
max_model_len: ${oc.env:MAX_MODEL_LEN,4096}
25+
rope_scaling: ${oc.decode:${oc.env:ROPE_SCALING,null}}
26+
cluster:
27+
node_num: 1
28+
gpu_per_node: ${oc.env:GPU_NUM,8}
29+
buffer:
30+
batch_size: 1
31+
total_steps: 2
32+
explorer_input:
33+
taskset:
34+
name: taskset
35+
storage_type: file
36+
path: openai/gsm8k
37+
split: train
38+
subset_name: main
39+
format:
40+
prompt_key: question
41+
response_key: answer
42+
rollout_args:
43+
temperature: 1.0
44+
logprobs: 0
45+
workflow_args:
46+
prompt_len: ${model.max_prompt_tokens}
47+
max_model_len: ${model.max_model_len}
48+
eval_tasksets: []
49+
default_workflow_type: dummy_exp_workflow
50+
default_reward_fn_type: math_reward
51+
trainer_input:
52+
experience_buffer:
53+
name: experience_buffer
54+
storage_type: queue
55+
replay_buffer:
56+
enable: false
57+
priority_fn: linear_decay
58+
reuse_cooldown_time: null
59+
priority_fn_args:
60+
decay: 2.0
61+
explorer:
62+
runner_per_model: 8
63+
rollout_model:
64+
engine_num: ${oc.env:ENGINE_NUM,1}
65+
tensor_parallel_size: 1
66+
enforce_eager: true
67+
enable_prefix_caching: false
68+
enable_chunked_prefill: false
69+
gpu_memory_utilization: 0.9
70+
dtype: bfloat16
71+
seed: 42
72+
enable_thinking: false
73+
enable_history: false
74+
enable_openai_api: false
75+
enable_auto_tool_choice: false
76+
tool_call_parser: null
77+
reasoning_parser: null
78+
auxiliary_models: []
79+
eval_interval: 1000
80+
trainer:
81+
trainer_type: verl
82+
trainer_strategy: ${oc.env:TRAINER_STRATEGY,fsdp}
83+
save_interval: 100
84+
enable_preview: true
85+
grad_clip: 1.0
86+
ulysses_sequence_parallel_size: ${oc.env:SP_NUM,1}
87+
save_hf_checkpoint: ${oc.env:SAVE_HF_CHECKPOINT,last}
88+
trainer_config:
89+
actor_rollout_ref:
90+
actor:
91+
entropy_from_logits_with_chunking: ${oc.env:ENTROPY_SAVING,false}
92+
entropy_checkpointing: ${oc.env:ENTROPY_SAVING,false}
93+
fsdp_config:
94+
param_offload: ${oc.env:OFFLOAD,false}
95+
optimizer_offload: ${oc.env:OFFLOAD,false}
96+
offload_policy: ${oc.env:OFFLOAD,false}
97+
ref:
98+
entropy_from_logits_with_chunking: ${oc.env:ENTROPY_SAVING,false}
99+
entropy_checkpointing: ${oc.env:ENTROPY_SAVING,false}
100+
fsdp_config:
101+
param_offload: ${oc.env:OFFLOAD,false}
102+
optimizer_offload: ${oc.env:OFFLOAD,false}
103+
offload_policy: ${oc.env:OFFLOAD,false}
104+
monitor:
105+
monitor_type: tensorboard
106+
synchronizer:
107+
sync_method: nccl
108+
sync_style: fixed
109+
sync_interval: 1
110+
sync_timeout: 1200
111+
log:
112+
level: INFO

0 commit comments

Comments
 (0)