|
3 | 3 | #=============================================================================== |
4 | 4 | # 1. 配置区域 - 用户只需修改这里 |
5 | 5 | #=============================================================================== |
6 | | -SUFFIX="ajet_deep_finance" # 实验后缀,影响所有日志和实验名称 |
7 | | -PREFIX="open" # 实验前缀,影响日志和实验所在文件夹 |
| 6 | +SUFFIX="newjudge" # 实验后缀,影响所有日志和实验名称 |
| 7 | +PREFIX="ajet_newjudge" # 实验前缀,影响日志和实验所在文件夹 |
8 | 8 |
|
9 | 9 | # OpenJudge 模型配置 |
10 | 10 | OPENJUDGE_LLM='qwen-flash' # OpenJudge 评分模型 |
11 | 11 | RM_LLM='qwen-max' # RM Gallery 评分模型 |
12 | 12 | JUDGE_CONCURRENCY=10 |
13 | 13 |
|
14 | 14 | # 奖励权重配置 |
15 | | -RM_WEIGHT=0.4 |
16 | | -CITATION_AUDIT_WEIGHT=0.2 |
17 | | -REPORT_RESOLUTION_WEIGHT=0.2 |
18 | | -TRAJECTORY_FAITHFULNESS_WEIGHT=0.2 |
| 15 | +RM_WEIGHT=0.5 |
| 16 | +PRESENTATION_QUALITY_WEIGHT=0.25 |
| 17 | +GROUNDING_WEIGHT=0.25 |
19 | 18 |
|
20 | 19 | # 训练参数配置 |
21 | 20 | NUM_REPEAT=4 # group size,每个query rollout NUM_REPEAT次 |
22 | 21 | TRAIN_BATCH_SIZE=32 # 训练batchsize |
23 | 22 | NUM_STEPS=6 # 每个样本step轮数 |
24 | 23 | DEEPFINANCE_TOOL_RESULT_MAX_CHARS=10000 |
25 | 24 |
|
26 | | -# 主目录 |
| 25 | +# 主目录(需要更改) |
| 26 | +export AJET_ROOT="/mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet_new" |
27 | 27 |
|
28 | 28 | NNODES=${WORLD_SIZE} |
29 | 29 |
|
@@ -55,70 +55,23 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \ |
55 | 55 | -e "s|{{MODEL_PATH}}|${MODEL_PATH}|g" \ |
56 | 56 | -e "s|{{NNODES}}|${NNODES}|g" \ |
57 | 57 | -e "s|{{RM_WEIGHT}}|${RM_WEIGHT}|g" \ |
58 | | - -e "s|{{CITATION_AUDIT_WEIGHT}}|${CITATION_AUDIT_WEIGHT}|g" \ |
| 58 | + -e "s|{{PRESENTATION_QUALITY_WEIGHT}}|${PRESENTATION_QUALITY_WEIGHT}|g" \ |
| 59 | + -e "s|{{GROUNDING_WEIGHT}}|${GROUNDING_WEIGHT}|g" \ |
59 | 60 | -e "s|{{OPENJUDGE_LLM}}|${OPENJUDGE_LLM}|g" \ |
60 | 61 | -e "s|{{RM_LLM}}|${RM_LLM}|g" \ |
61 | 62 | -e "s|{{JUDGE_CONCURRENCY}}|${JUDGE_CONCURRENCY}|g" \ |
62 | | - -e "s|{{REPORT_RESOLUTION_WEIGHT}}|${REPORT_RESOLUTION_WEIGHT}|g" \ |
63 | | - -e "s|{{TRAJECTORY_FAITHFULNESS_WEIGHT}}|${TRAJECTORY_FAITHFULNESS_WEIGHT}|g" \ |
64 | 63 | -e "s|{{NUM_REPEAT}}|${NUM_REPEAT}|g" \ |
65 | 64 | -e "s|{{NUM_STEPS}}|${NUM_STEPS}|g" \ |
66 | 65 | -e "s|{{TRAIN_BATCH_SIZE}}|${TRAIN_BATCH_SIZE}|g" \ |
67 | 66 | -e "s|{{TRAIN_DATA_PATH}}|${TRAIN_DATA_PATH}|g" \ |
68 | 67 | -e "s|{{VAL_DATA_PATH}}|${VAL_DATA_PATH}|g" \ |
69 | | - -e "s|{{ENV_SERVICE_URL}}|${ENV_SERVICE_URL}|g" \ |
70 | 68 | -e "s|{{TRAIN_REF_ANS_PATH}}|${TRAIN_REF_ANS_PATH}|g" \ |
71 | 69 | -e "s|{{VAL_REF_ANS_PATH}}|${VAL_REF_ANS_PATH}|g" \ |
72 | 70 | -e "s|{{CKPT_SAVE_PATH}}|${CKPT_SAVE_PATH}|g" \ |
73 | 71 | ${AJET_ROOT}/${CONFIG_TEMPLATE} > ${CONFIG_FILE} |
74 | 72 |
|
75 | 73 | echo "配置文件已生成: ${CONFIG_FILE}" |
76 | | -echo "参数确认: RM=${RM_WEIGHT}, Citation=${CITATION_AUDIT_WEIGHT}, OpenJudge=${OPENJUDGE_LLM}, RM_LLM=${RM_LLM}" |
77 | | - |
78 | | -#=============================================================================== |
79 | | -# 3. 环境配置 |
80 | | -#=============================================================================== |
81 | | -# MongoDB 缓存配置 |
82 | | -CACHE_TYPE="mongodb" |
83 | | -MONGO_URI="mongodb://${ADDR}:27117/" |
84 | | -MONGO_DB_NAME="finworld_cache" |
85 | | -MONGO_COLLECTION_NAME="tool_cache" |
86 | | -export CACHE_TYPE MONGO_URI MONGO_DB_NAME MONGO_COLLECTION_NAME |
87 | | - |
88 | | -# DeepFinance MCP 配置 |
89 | | -DEEPFINANCE_MCP_CONFIG="${AJET_ROOT}/tutorial/example_deep_finance/config/mcp_finance_tool_generated.json" |
90 | | - |
91 | | -# 动态生成 MCP 配置文件 |
92 | | -mkdir -p $(dirname ${DEEPFINANCE_MCP_CONFIG}) |
93 | | -cat > ${DEEPFINANCE_MCP_CONFIG} << EOF |
94 | | -{ |
95 | | - "mcpServers": { |
96 | | - "flowllm": { |
97 | | - "transport": "sse", |
98 | | - "url": "http://${ADDR}:${MCP_PORT}/sse", |
99 | | - "timeout": 600, |
100 | | - "sse_read_timeout": 1200 |
101 | | - } |
102 | | - } |
103 | | -} |
104 | | -EOF |
105 | | -export DEEPFINANCE_MCP_CONFIG DEEPFINANCE_TOOL_RESULT_MAX_CHARS |
106 | | - |
107 | | -# 其他服务配置 |
108 | | -HF_ENDPOINT="https://hf-mirror.com" |
109 | | -ES_HOSTS="http://11.160.132.46:8200" |
110 | | -export HF_ENDPOINT ES_HOSTS |
111 | | - |
112 | | -# log 文件位置 |
113 | | -CURRENT_TIME=$(date "+%Y%m%d_%H%M%S") |
114 | | -LOG_DIR="${AJET_ROOT}/logs/${PREFIX}" |
115 | | -MASTER_IP_FILE="${LOG_DIR}/master-ip_${SUFFIX}.log" |
116 | | -ENV_SERVICE_LOG="${LOG_DIR}/env_service_${SUFFIX}_${CURRENT_TIME}.log" |
117 | | -TRAIN_LOG="${LOG_DIR}/train_${SUFFIX}_${CURRENT_TIME}.log" |
118 | | - |
119 | | -# 多机训练参数配置 |
120 | | -GPUS_PER_NODE=8 |
121 | | -EXPECTED_WORKERS=$WORLD_SIZE |
| 74 | +echo "参数确认: RM=${RM_WEIGHT}, PresentationQuality=${PRESENTATION_QUALITY_WEIGHT}, Grounding=${GROUNDING_WEIGHT}, OpenJudge=${OPENJUDGE_LLM}, RM_LLM=${RM_LLM}" |
122 | 75 |
|
123 | 76 |
|
124 | 77 | #=============================================================================== |
@@ -162,7 +115,6 @@ export RAY_CLUSTER_MODE="multi_node" |
162 | 115 | #=============================================================================== |
163 | 116 | # 6. 主流程 |
164 | 117 | #=============================================================================== |
165 | | -log "开始多机多卡训练: ${SUFFIX}" |
166 | 118 | log "节点数: ${NNODES}, 每节点GPU数: ${GPUS_PER_NODE}" |
167 | 119 | mkdir -p ${LOG_DIR} |
168 | 120 | mkdir -p $(dirname ${CONFIG_FILE}) |
|
0 commit comments