chore(config): parameterize deep finance training configuration

TaoShuchang · TaoShuchang · commit 1bb7f6097dfd · 2026-01-27T19:30:30.000+08:00
diff --git a/tutorial/example_deep_finance/deep_finance.yaml b/tutorial/example_deep_finance/deep_finance.yaml
@@ -1,27 +1,26 @@
 # ------------------ 主要配置 ------------------
 ajet:
-  project_name: ajet_deep_finance
-  experiment_name: "ajet_deep_finance"
+  project_name: "{{PREFIX}}"
+  experiment_name: "{{SUFFIX}}"
   # Judge 配置（嵌套结构，对应 self.config.ajet.judge.*）
   judge:
-    openjudge_llm: qwen-flash     # OpenJudge 模型
-    rm_llm: qwen-max                   # RM Gallery 模型
-    concurrency: 10   # Judge 并发数
+    openjudge_llm: {{OPENJUDGE_LLM}}     # OpenJudge 模型
+    rm_llm: {{RM_LLM}}                   # RM Gallery 模型
+    concurrency: {{JUDGE_CONCURRENCY}}   # Judge 并发数
     train_ref_ans_path: {{TRAIN_REF_ANS_PATH}}   # 训练集 Reference Answer 路径
     val_ref_ans_path: {{VAL_REF_ANS_PATH}}       # 验证集 Reference Answer 路径
   # OpenJudge 权重配置
-  report_resolution_weight: 0.2         # 报告质量评估
-  trajectory_faithfulness_weight: 0.2   # 事实准确性评估
-  citation_audit_weight: 0.2               # 引用审计评估 (覆盖率 + 真实性)
-  rm_weight: 0.4                                       # RM Gallery 权重
+  presentation_quality_weight: {{PRESENTATION_QUALITY_WEIGHT}}   # 报告呈现质量评估
+  grounding_weight: {{GROUNDING_WEIGHT}}                         # 引用规范性评估
+  rm_weight: {{RM_WEIGHT}}                                       # RM Gallery 权重
   task_judge:
     # 使用本地 DeepFinanceJudge 进行评估（解耦远程 env_service）
     judge_protocol: tutorial.example_deep_finance.deep_finance_judge->DeepFinanceJudgeByOpenJudge
   model:
     # ✨✨✨✨ 设置待训练的模型
     path: {{MODEL_PATH}}
   trainer_common:
-    nnodes: 8
+    nnodes: {{NNODES}}
     n_gpus_per_node: 8
     val_before_train: True
     val_pass_n: 8
@@ -32,44 +31,42 @@ ajet:
   rollout:
     # ✨✨✨✨ 编写并选择Agent
     user_workflow: tutorial.example_deep_finance.deep_finance->ExampleDeepResearchProtocol
-    force_disable_toolcalls: True
+    force_disable_toolcalls: False
     enable_oversample: False
     tensor_model_parallel_size: 8
-    num_repeat: 4
+    num_repeat: {{NUM_REPEAT}}
     max_env_worker: 64  # 增加环境并行数
     max_num_seqs: 64    # 增加VLLM并发序列数
     max_response_length_in_one_turn: 8000
     max_model_len: 50000
     agent_madness_reward: 0.0
     compute_madness_checklist: None
     multi_turn:
-      max_steps: 6
+      max_steps: {{NUM_STEPS}}
   interchange_server:
     interchange_method: 'tcp' # options: 'tcp' (multi-nodes) or  'ipc' (1 node)
   debug:
     debug_max_parallel: 1  # 增加并行任务数，充分利用GPU
     debug_first_n_tasks: 100  # 增加处理的任务数
   data:
-    train_batch_size: 32
+    train_batch_size: {{TRAIN_BATCH_SIZE}}
     max_prompt_length: 8000
     max_response_length: 41000
 
   task_reader:
     type: deep_finance  # 数据从 JSON 加载并组装 init_messages，工具调用走 env_service
     deep_finance:
       training:
-        file_path: {{TRAIN_PATH}}
+        file_path: {{TRAIN_DATA_PATH}}
       validation:
-        file_path: {{VAL_PATH}}
+        file_path: {{VAL_DATA_PATH}}
     # env_service 仍需配置（用于工具调用）
     env_service:
       env_type: "finworld"
       env_url: {{ENV_SERVICE_URL}}
       env_action_preference: code
-
-
 trainer:
-  default_local_dir: {{CKPT_SAVE_PATH}}
+  default_local_dir: "{{CKPT_SAVE_PATH}}/{{PREFIX}}/{{SUFFIX}}"
   # resume_mode: disable  # 禁用自动恢复，从头开始训练
 actor_rollout_ref:
   rollout: