Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions scripts/context_length_test/context_length.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
mode: both
project: Trinity-RFT-context-length-exp
group: length-test
name: length-test
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints/length-test}
continue_from_checkpoint: false
algorithm:
algorithm_type: grpo
repeat_times: ${oc.env:REPEAT_TIMES,8}
advantage_fn: grpo
sample_strategy: default
policy_loss_fn: ppo
kl_penalty_fn: none
kl_loss_fn: k2
entropy_loss_fn: default
optimizer:
lr: 1.0e-05
lr_warmup_steps_ratio: 0.0
warmup_style: constant
data_processor: {}
model:
model_path: ${oc.env:MODEL_PATH,Qwen/Qwen3-0.6B}
max_prompt_tokens: ${oc.env:PROMPT_LEN,2048}
max_model_len: ${oc.env:MAX_MODEL_LEN,4096}
rope_scaling: ${oc.decode:${oc.env:ROPE_SCALING,null}}
cluster:
node_num: 1
gpu_per_node: ${oc.env:GPU_NUM,8}
buffer:
batch_size: 1
total_steps: 2
explorer_input:
taskset:
name: taskset
storage_type: file
path: openai/gsm8k
split: train
subset_name: main
format:
prompt_key: question
response_key: answer
rollout_args:
temperature: 1.0
logprobs: 0
workflow_args:
prompt_len: ${model.max_prompt_tokens}
max_model_len: ${model.max_model_len}
eval_tasksets: []
default_workflow_type: dummy_exp_workflow
default_reward_fn_type: math_reward
trainer_input:
experience_buffer:
name: experience_buffer
storage_type: queue
replay_buffer:
enable: false
priority_fn: linear_decay
reuse_cooldown_time: null
priority_fn_args:
decay: 2.0
explorer:
runner_per_model: 8
rollout_model:
engine_num: ${oc.env:ENGINE_NUM,1}
tensor_parallel_size: 1
enforce_eager: true
enable_prefix_caching: false
enable_chunked_prefill: false
gpu_memory_utilization: 0.9
dtype: bfloat16
seed: 42
enable_thinking: false
enable_history: false
enable_openai_api: false
enable_auto_tool_choice: false
tool_call_parser: null
reasoning_parser: null
auxiliary_models: []
eval_interval: 1000
trainer:
trainer_type: verl
trainer_strategy: ${oc.env:TRAINER_STRATEGY,fsdp}
save_interval: 100
enable_preview: true
grad_clip: 1.0
ulysses_sequence_parallel_size: ${oc.env:SP_NUM,1}
save_hf_checkpoint: ${oc.env:SAVE_HF_CHECKPOINT,last}
trainer_config:
actor_rollout_ref:
actor:
entropy_from_logits_with_chunking: ${oc.env:ENTROPY_SAVING,false}
entropy_checkpointing: ${oc.env:ENTROPY_SAVING,false}
fsdp_config:
param_offload: ${oc.env:OFFLOAD,false}
optimizer_offload: ${oc.env:OFFLOAD,false}
offload_policy: ${oc.env:OFFLOAD,false}
ref:
entropy_from_logits_with_chunking: ${oc.env:ENTROPY_SAVING,false}
entropy_checkpointing: ${oc.env:ENTROPY_SAVING,false}
fsdp_config:
param_offload: ${oc.env:OFFLOAD,false}
optimizer_offload: ${oc.env:OFFLOAD,false}
offload_policy: ${oc.env:OFFLOAD,false}
monitor:
monitor_type: tensorboard
synchronizer:
sync_method: nccl
sync_style: fixed
sync_interval: 1
sync_timeout: 1200
log:
level: INFO
Loading
Loading