|
| 1 | +# ------------------ main configuration ------------------ |
| 2 | +ajet: |
| 3 | + project_name: "ajet_default_project" |
| 4 | + experiment_name: "read_yaml_name" |
| 5 | + experiment_dir: "auto" # {exp-dir}/{experiment_name} |
| 6 | + backbone: debug # `debug` or `trinity` or `verl` |
| 7 | + |
| 8 | + |
| 9 | + model: |
| 10 | + # which model should be trained |
| 11 | + path: /path/to/model/such/as/Qwen/Qwen2___5-14B-Instruct |
| 12 | + |
| 13 | + data: |
| 14 | + # max number of tokens for prompt |
| 15 | + max_prompt_length: 3000 |
| 16 | + # max number of tokens for response |
| 17 | + max_response_length: 15000 |
| 18 | + # how many tasks per training batch |
| 19 | + train_batch_size: 32 |
| 20 | + # [Hint]: The final number of samples per update will be: N_{sample} = (data.train_batch_size * rollout.num_repeat * rollout.multi_turn.expected_steps) |
| 21 | + |
| 22 | + |
| 23 | + rollout: |
| 24 | + |
| 25 | + # the path to the workflow class |
| 26 | + user_workflow: tutorial.example_appworld.appworld->ExampleAgentScopeWorkflow |
| 27 | + |
| 28 | + # whether or not to disable all tool calls |
| 29 | + force_disable_toolcalls: False |
| 30 | + |
| 31 | + # maximum number of parallel environments / simulate workers |
| 32 | + max_env_worker: 64 |
| 33 | + |
| 34 | + # step reward gamma (experimental, do not change) |
| 35 | + gamma: 1.0 |
| 36 | + |
| 37 | + # monitor LLM's abormal behaviors during rollout |
| 38 | + compute_madness_checklist: |
| 39 | + - "nonsense" |
| 40 | + # send signal to terminate context tracing when LLM is losing control |
| 41 | + agent_madness_termination: True # terminate_after_gone_mad |
| 42 | + # punish the LLM when it is detected as lost control |
| 43 | + agent_madness_reward: -1.0 |
| 44 | + |
| 45 | + # max response length in one turn |
| 46 | + max_response_length_in_one_turn: 4096 |
| 47 | + |
| 48 | + # max token length allowed for the model during rollout |
| 49 | + max_model_len: 18000 |
| 50 | + |
| 51 | + multi_turn: |
| 52 | + # how many samples should be collected for each task run |
| 53 | + max_sample_per_task: 30 |
| 54 | + # limit the maximum steps for each task |
| 55 | + max_steps: 30 |
| 56 | + # the expected steps for each task, used to calculate the training batch size for trinity |
| 57 | + expected_steps: 1 |
| 58 | + |
| 59 | + # TP size for rollout engine |
| 60 | + tensor_model_parallel_size: 1 |
| 61 | + |
| 62 | + # the number of vllm engines, number of gpus for infer is `n_vllm_engine*tensor_model_parallel_size`, this argument is NOT effective when NOT using trinity |
| 63 | + n_vllm_engine: 1 |
| 64 | + |
| 65 | + # how many sequences are allowed to be processed in parallel by each vllm engine |
| 66 | + max_num_seqs: 10 |
| 67 | + |
| 68 | + # the usage of infer engine, options: (vllm, sglang) |
| 69 | + name: vllm |
| 70 | + |
| 71 | + # how many times a task should be repeated |
| 72 | + num_repeat: 4 |
| 73 | + |
| 74 | + # rollout kwargs |
| 75 | + temperature: 0.9 |
| 76 | + top_p: 1.0 |
| 77 | + |
| 78 | + # validation kwargs |
| 79 | + val_kwargs: |
| 80 | + # when doing validation, the sample setting when generating response |
| 81 | + temperature: 0.0 |
| 82 | + top_k: -1 |
| 83 | + top_p: 1.0 |
| 84 | + do_sample: False |
| 85 | + num_repeat: 1 |
| 86 | + |
| 87 | + |
| 88 | + task_reader: |
| 89 | + # how to read dataset / environment |
| 90 | + type: huggingface_dat_repo # `env_service` or `jsonl_dataset_file` or `huggingface_dat_repo` or `data_generation` or `random_dummy` |
| 91 | + |
| 92 | + # when `type == jsonl_dataset_file` |
| 93 | + jsonl_dataset_file: |
| 94 | + training: |
| 95 | + file_path: "/path/to/training/data.jsonl" |
| 96 | + validation: |
| 97 | + file_path: "/path/to/validation/data.jsonl" |
| 98 | + |
| 99 | + # when `type == env_service` |
| 100 | + env_service: |
| 101 | + env_type: "appworld" |
| 102 | + env_url: "http://127.0.0.1:8080" |
| 103 | + env_action_preference: code # code, text, box |
| 104 | + training_split: train |
| 105 | + validation_split: dev |
| 106 | + |
| 107 | + # when `type == huggingface_dat_repo` |
| 108 | + huggingface_dat_repo: |
| 109 | + dataset_path: "gsm8k" |
| 110 | + training_split: "train" |
| 111 | + validation_split: "validation" |
| 112 | + |
| 113 | + # when `type == data_generation` |
| 114 | + data_generation: |
| 115 | + document_reader: |
| 116 | + document_path: |
| 117 | + - 'dataset/document/your-document1.pdf' |
| 118 | + - 'dataset/document/your-document2.pdf' |
| 119 | + languages: |
| 120 | + - eng |
| 121 | + chunk_size: 5120 |
| 122 | + split_by: "sentence" |
| 123 | + cache_enabled: true |
| 124 | + query_reader: |
| 125 | + type: jsonl_dataset_file |
| 126 | + jsonl_dataset_file: |
| 127 | + training: |
| 128 | + file_path: 'dataset/jsonl/your-queries.jsonl' |
| 129 | + task_num: 10 |
| 130 | + llm_model: qwen-long |
| 131 | + llm_response_length: 8192 |
| 132 | + num_workers: 32 |
| 133 | + sampling_params: |
| 134 | + temperature: 0 |
| 135 | + deduplication_filter: |
| 136 | + enabled: true |
| 137 | + params: |
| 138 | + similarity_threshold: 0.8 |
| 139 | + db_path: ./.similarity_db |
| 140 | + model: text-embedding-v4 |
| 141 | + api_key: null # load from the env |
| 142 | + base_url: https://dashscope.aliyuncs.com/compatible-mode/v1 |
| 143 | + |
| 144 | + |
| 145 | + task_judge: |
| 146 | + judge_type: customized_protocol # Options: 'customized_protocol', 'rubrics_auto_grader' |
| 147 | + |
| 148 | + # when `judge_type == customized_protocol` |
| 149 | + judge_protocol: ajet.task_judge.env_service_as_judge->EnvServiceJudge |
| 150 | + |
| 151 | + # the helper LLM model used for LLM-AS-Judge |
| 152 | + alien_llm_model: qwen3-235b-a22b-instruct-2507 |
| 153 | + alien_llm_response_length: 512 |
| 154 | + |
| 155 | + # when `judge_type == rubrics_auto_grader` |
| 156 | + rubrics_auto_grader: |
| 157 | + model_name: qwen-max |
| 158 | + grader_mode: pointwise |
| 159 | + language: en |
| 160 | + query_specific_generate_number: 1 |
| 161 | + enable_categorization: false |
| 162 | + categories_number: 5 |
| 163 | + grader_name: "auto_grader" |
| 164 | + query_field: main_query |
| 165 | + answer_field: final_answer |
| 166 | + reference_field: answer |
| 167 | + custom_evaluation_prompt: null # dict or PromptTemplate or None |
| 168 | + input_data_type: jsonl_dataset_file # `env_service` or `jsonl_dataset_file` or `huggingface_dat_repo` |
| 169 | + jsonl_dataset_file: |
| 170 | + training: |
| 171 | + file_path: "tutorial/example_rm_auto_grader/rubrics_train.jsonl" |
| 172 | + # Pointwise mode settings |
| 173 | + min_score: 0 |
| 174 | + max_score: 1 |
| 175 | + |
| 176 | + |
| 177 | + |
| 178 | + # context tracker protocol is valid ONLY when `use_agentscope_protocol=False` |
| 179 | + context_tracker: |
| 180 | + |
| 181 | + # timeline merging policy used in Context Tracker |
| 182 | + timeline_merging_policy: |
| 183 | + |
| 184 | + # compare_level = "text": relaxed compare with text, more easier to match, at very little cost |
| 185 | + # compare_level = "token": strict compare with token, cause less aggressive merging |
| 186 | + timeline_compare_level: "text" # options: "text", "token" |
| 187 | + |
| 188 | + # whether or not to ignore tool calls when comparing steps, default to `True` to make merging more aggressive |
| 189 | + ignore_tools: True |
| 190 | + |
| 191 | + # Fix Retokenization Drift: inconsistencies between training and inference token array |
| 192 | + # Related reading: https://github.com/vllm-project/vllm/pull/22587 (note that the implementation is very different) |
| 193 | + fix_retokenization_drift: True |
| 194 | + |
| 195 | + # log tool format check results |
| 196 | + log_tool_format_check: False |
| 197 | + |
| 198 | + # log tool format check results |
| 199 | + log_tool_format_error_detail: False |
| 200 | + |
| 201 | + # detect at which point timeline stop growing linearly and cause a snap during a episode: this will cause additional computation. |
| 202 | + detect_timeline_snap: False |
| 203 | + |
| 204 | + # deprecated |
| 205 | + alien_llm_model: qwen3-235b-a22b-instruct-2507 |
| 206 | + |
| 207 | + # deprecated |
| 208 | + alien_llm_response_length: 512 |
| 209 | + |
| 210 | + |
| 211 | + # when backbone is `debug`, debug related configurations |
| 212 | + debug: |
| 213 | + |
| 214 | + # max parallel runners in debug mode |
| 215 | + debug_max_parallel: 4 |
| 216 | + |
| 217 | + # how many task to sample from training set |
| 218 | + debug_first_n_tasks: 2 |
| 219 | + |
| 220 | + # what is the vllm engine port in the background |
| 221 | + debug_vllm_port: 18000 |
| 222 | + |
| 223 | + # what is the seed of the vllm engine in the background |
| 224 | + debug_vllm_seed: 12345 |
| 225 | + |
| 226 | + # what is the TP size in debug mode |
| 227 | + debug_tensor_parallel_size: 4 |
| 228 | + |
| 229 | + |
| 230 | + # trainer common configurations |
| 231 | + trainer_common: |
| 232 | + |
| 233 | + # validation before training |
| 234 | + val_before_train: False |
| 235 | + val_pass_n: 4 |
| 236 | + |
| 237 | + # save and test frequency (in step) |
| 238 | + save_freq: 20 |
| 239 | + test_freq: 20 |
| 240 | + |
| 241 | + # total training epochs |
| 242 | + total_epochs: 50 |
| 243 | + |
| 244 | + nnodes: 1 |
| 245 | + n_gpus_per_node: 8 |
| 246 | + |
| 247 | + # logger selection |
| 248 | + logger: swanlab |
| 249 | + |
| 250 | + # algorithm setting |
| 251 | + algorithm: |
| 252 | + adv_estimator: grpo |
| 253 | + use_kl_in_reward: False |
| 254 | + |
| 255 | + # number of optimizer.step per big batch |
| 256 | + mini_batch_num: 1 |
| 257 | + |
| 258 | + # verl offload configs |
| 259 | + fsdp_config: |
| 260 | + param_offload: True |
| 261 | + optimizer_offload: True |
| 262 | + |
| 263 | + # learning rate |
| 264 | + optim: |
| 265 | + lr: 1e-6 |
| 266 | + |
| 267 | + # enable KL loss regularization |
| 268 | + use_kl_loss: True |
| 269 | + |
| 270 | + # kl divergence loss coefficient |
| 271 | + kl_loss_coef: 0.002 |
| 272 | + kl_loss_type: low_var_kl |
| 273 | + |
| 274 | + # Ulysses specific configs |
| 275 | + ulysses_sequence_parallel_size: 1 |
| 276 | + |
| 277 | + # base directory to save checkpoints |
| 278 | + checkpoint_base_dir: ./saved_checkpoints |
| 279 | + |
| 280 | + # whether to save train/eval trajectories to JSON files |
| 281 | + save_trajectory_as_json_file: False |
| 282 | + |
| 283 | + |
| 284 | + # the experimental ZeroMQ interchange server feature that allows `tuner.as_oai_baseurl_apikey` feature |
| 285 | + enable_tinkerscript_mode: True |
| 286 | + # both tinkerscript / oai share the same interchange server |
| 287 | + enable_experimental_interchange_server: True |
| 288 | + # interchange server configuration |
| 289 | + interchange_server: |
| 290 | + interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or 'ipc' (1 node) |
| 291 | + interchange_server_port: 10086 |
| 292 | + num_fastapi_process: 4 # 1, 2 or 4 is fine |
| 293 | + max_fastapi_threads: 128 # 64 or 128 is fine |
| 294 | + max_inference_tracker_threads: 64 # recommend to be equal to `ajet.rollout.max_env_worker` |
| 295 | + |
| 296 | + |
| 297 | + task_runner: |
| 298 | + # submit llm infer submit method |
| 299 | + llm_infer_submit_method: "async" # options: "sync", "async" |
| 300 | + |
| 301 | + # how to wrap the user-defined workflow |
| 302 | + wrapper_type: "asyncio-with-gc" |
| 303 | + # - wrapper_type: "asyncio-with-gc": safe, with periodic garbage collection to prevent event loop leaks (recommended) |
| 304 | + # - wrapper_type: "asyncio": fast, but may cause event loop leak in long run |
| 305 | + # - wrapper_type: "multi-processing": safe, but resource consuming |
| 306 | + |
| 307 | + # when `wrapper_type` is `multi-processing`, the timeout for each task |
| 308 | + wrapper_multiprocessing_timeout: 3600 # in seconds |
| 309 | + |
| 310 | + # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN. |
| 311 | + execute_test: False # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN. |
| 312 | + execute_testing_lambda: "" # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN. |
| 313 | + |
| 314 | + |
| 315 | +# ------------------ do not edit ------------------ |
| 316 | +hydra: |
| 317 | + searchpath: |
| 318 | + - file://ajet/default_config/verl |
| 319 | + |
| 320 | +# ------------------ do not edit ------------------ |
| 321 | +defaults: |
| 322 | + - verl_default # verl inherit 1/1 |
| 323 | + - _self_ |
0 commit comments