|
| 1 | +mode: both |
| 2 | +project: Trinity-RFT-context-length-exp |
| 3 | +group: length-test |
| 4 | +name: length-test |
| 5 | +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints/length-test} |
| 6 | +continue_from_checkpoint: false |
| 7 | +algorithm: |
| 8 | + algorithm_type: grpo |
| 9 | + repeat_times: ${oc.env:REPEAT_TIMES,8} |
| 10 | + advantage_fn: grpo |
| 11 | + sample_strategy: default |
| 12 | + policy_loss_fn: ppo |
| 13 | + kl_penalty_fn: none |
| 14 | + kl_loss_fn: k2 |
| 15 | + entropy_loss_fn: default |
| 16 | + optimizer: |
| 17 | + lr: 1.0e-05 |
| 18 | + lr_warmup_steps_ratio: 0.0 |
| 19 | + warmup_style: constant |
| 20 | +data_processor: {} |
| 21 | +model: |
| 22 | + model_path: ${oc.env:MODEL_PATH,Qwen/Qwen3-0.6B} |
| 23 | + max_prompt_tokens: ${oc.env:PROMPT_LEN,2048} |
| 24 | + max_model_len: ${oc.env:MAX_MODEL_LEN,4096} |
| 25 | + rope_scaling: ${oc.decode:${oc.env:ROPE_SCALING,null}} |
| 26 | +cluster: |
| 27 | + node_num: 1 |
| 28 | + gpu_per_node: ${oc.env:GPU_NUM,8} |
| 29 | +buffer: |
| 30 | + batch_size: 1 |
| 31 | + total_steps: 2 |
| 32 | + explorer_input: |
| 33 | + taskset: |
| 34 | + name: taskset |
| 35 | + storage_type: file |
| 36 | + path: openai/gsm8k |
| 37 | + split: train |
| 38 | + subset_name: main |
| 39 | + format: |
| 40 | + prompt_key: question |
| 41 | + response_key: answer |
| 42 | + rollout_args: |
| 43 | + temperature: 1.0 |
| 44 | + logprobs: 0 |
| 45 | + workflow_args: |
| 46 | + prompt_len: ${model.max_prompt_tokens} |
| 47 | + max_model_len: ${model.max_model_len} |
| 48 | + eval_tasksets: [] |
| 49 | + default_workflow_type: dummy_exp_workflow |
| 50 | + default_reward_fn_type: math_reward |
| 51 | + trainer_input: |
| 52 | + experience_buffer: |
| 53 | + name: experience_buffer |
| 54 | + storage_type: queue |
| 55 | + replay_buffer: |
| 56 | + enable: false |
| 57 | + priority_fn: linear_decay |
| 58 | + reuse_cooldown_time: null |
| 59 | + priority_fn_args: |
| 60 | + decay: 2.0 |
| 61 | +explorer: |
| 62 | + runner_per_model: 8 |
| 63 | + rollout_model: |
| 64 | + engine_num: ${oc.env:ENGINE_NUM,1} |
| 65 | + tensor_parallel_size: 1 |
| 66 | + enforce_eager: true |
| 67 | + enable_prefix_caching: false |
| 68 | + enable_chunked_prefill: false |
| 69 | + gpu_memory_utilization: 0.9 |
| 70 | + dtype: bfloat16 |
| 71 | + seed: 42 |
| 72 | + enable_thinking: false |
| 73 | + enable_history: false |
| 74 | + enable_openai_api: false |
| 75 | + enable_auto_tool_choice: false |
| 76 | + tool_call_parser: null |
| 77 | + reasoning_parser: null |
| 78 | + auxiliary_models: [] |
| 79 | + eval_interval: 1000 |
| 80 | +trainer: |
| 81 | + trainer_type: verl |
| 82 | + trainer_strategy: ${oc.env:TRAINER_STRATEGY,fsdp} |
| 83 | + save_interval: 100 |
| 84 | + enable_preview: true |
| 85 | + grad_clip: 1.0 |
| 86 | + ulysses_sequence_parallel_size: ${oc.env:SP_NUM,1} |
| 87 | + save_hf_checkpoint: ${oc.env:SAVE_HF_CHECKPOINT,last} |
| 88 | + trainer_config: |
| 89 | + actor_rollout_ref: |
| 90 | + actor: |
| 91 | + entropy_from_logits_with_chunking: ${oc.env:ENTROPY_SAVING,false} |
| 92 | + entropy_checkpointing: ${oc.env:ENTROPY_SAVING,false} |
| 93 | + fsdp_config: |
| 94 | + param_offload: ${oc.env:OFFLOAD,false} |
| 95 | + optimizer_offload: ${oc.env:OFFLOAD,false} |
| 96 | + offload_policy: ${oc.env:OFFLOAD,false} |
| 97 | + ref: |
| 98 | + entropy_from_logits_with_chunking: ${oc.env:ENTROPY_SAVING,false} |
| 99 | + entropy_checkpointing: ${oc.env:ENTROPY_SAVING,false} |
| 100 | + fsdp_config: |
| 101 | + param_offload: ${oc.env:OFFLOAD,false} |
| 102 | + optimizer_offload: ${oc.env:OFFLOAD,false} |
| 103 | + offload_policy: ${oc.env:OFFLOAD,false} |
| 104 | +monitor: |
| 105 | + monitor_type: tensorboard |
| 106 | +synchronizer: |
| 107 | + sync_method: nccl |
| 108 | + sync_style: fixed |
| 109 | + sync_interval: 1 |
| 110 | + sync_timeout: 1200 |
| 111 | +log: |
| 112 | + level: INFO |
0 commit comments