11# ------------------ 主要配置 ------------------
22ajet :
3- project_name : ajet_deep_finance
4- experiment_name : " ajet_deep_finance "
3+ project_name : " {{PREFIX}} "
4+ experiment_name : " {{SUFFIX}} "
55 # Judge 配置(嵌套结构,对应 self.config.ajet.judge.*)
66 judge :
7- openjudge_llm : qwen-flash # OpenJudge 模型
8- rm_llm : qwen-max # RM Gallery 模型
9- concurrency : 10 # Judge 并发数
7+ openjudge_llm : {{OPENJUDGE_LLM}} # OpenJudge 模型
8+ rm_llm : {{RM_LLM}} # RM Gallery 模型
9+ concurrency : {{JUDGE_CONCURRENCY}} # Judge 并发数
1010 train_ref_ans_path : {{TRAIN_REF_ANS_PATH}} # 训练集 Reference Answer 路径
1111 val_ref_ans_path : {{VAL_REF_ANS_PATH}} # 验证集 Reference Answer 路径
1212 # OpenJudge 权重配置
13- report_resolution_weight : 0.2 # 报告质量评估
14- trajectory_faithfulness_weight : 0.2 # 事实准确性评估
15- citation_audit_weight : 0.2 # 引用审计评估 (覆盖率 + 真实性)
16- rm_weight : 0.4 # RM Gallery 权重
13+ presentation_quality_weight : {{PRESENTATION_QUALITY_WEIGHT}} # 报告呈现质量评估
14+ grounding_weight : {{GROUNDING_WEIGHT}} # 引用规范性评估
15+ rm_weight : {{RM_WEIGHT}} # RM Gallery 权重
1716 task_judge :
1817 # 使用本地 DeepFinanceJudge 进行评估(解耦远程 env_service)
1918 judge_protocol : tutorial.example_deep_finance.deep_finance_judge->DeepFinanceJudgeByOpenJudge
2019 model :
2120 # ✨✨✨✨ 设置待训练的模型
2221 path : {{MODEL_PATH}}
2322 trainer_common :
24- nnodes : 8
23+ nnodes : {{NNODES}}
2524 n_gpus_per_node : 8
2625 val_before_train : True
2726 val_pass_n : 8
@@ -32,44 +31,42 @@ ajet:
3231 rollout :
3332 # ✨✨✨✨ 编写并选择Agent
3433 user_workflow : tutorial.example_deep_finance.deep_finance->ExampleDeepResearchProtocol
35- force_disable_toolcalls : True
34+ force_disable_toolcalls : False
3635 enable_oversample : False
3736 tensor_model_parallel_size : 8
38- num_repeat : 4
37+ num_repeat : {{NUM_REPEAT}}
3938 max_env_worker : 64 # 增加环境并行数
4039 max_num_seqs : 64 # 增加VLLM并发序列数
4140 max_response_length_in_one_turn : 8000
4241 max_model_len : 50000
4342 agent_madness_reward : 0.0
4443 compute_madness_checklist : None
4544 multi_turn :
46- max_steps : 6
45+ max_steps : {{NUM_STEPS}}
4746 interchange_server :
4847 interchange_method : ' tcp' # options: 'tcp' (multi-nodes) or 'ipc' (1 node)
4948 debug :
5049 debug_max_parallel : 1 # 增加并行任务数,充分利用GPU
5150 debug_first_n_tasks : 100 # 增加处理的任务数
5251 data :
53- train_batch_size : 32
52+ train_batch_size : {{TRAIN_BATCH_SIZE}}
5453 max_prompt_length : 8000
5554 max_response_length : 41000
5655
5756 task_reader :
5857 type : deep_finance # 数据从 JSON 加载并组装 init_messages,工具调用走 env_service
5958 deep_finance :
6059 training :
61- file_path : {{TRAIN_PATH }}
60+ file_path : {{TRAIN_DATA_PATH }}
6261 validation :
63- file_path : {{VAL_PATH }}
62+ file_path : {{VAL_DATA_PATH }}
6463 # env_service 仍需配置(用于工具调用)
6564 env_service :
6665 env_type : " finworld"
6766 env_url : {{ENV_SERVICE_URL}}
6867 env_action_preference : code
69-
70-
7168trainer :
72- default_local_dir : {{CKPT_SAVE_PATH}}
69+ default_local_dir : " {{CKPT_SAVE_PATH}}/{{PREFIX}}/{{SUFFIX}} "
7370 # resume_mode: disable # 禁用自动恢复,从头开始训练
7471actor_rollout_ref :
7572 rollout :
0 commit comments