11set -ex
2+ ray stop --force
23# examples of usage:
34# qwen3_8B_grpo_gsm8k training: bash examples/v1/scripts/run_rl.sh examples/v1/config/rl_qwen3_8B_grpo.py "sglang" $MODEL_PATH $DATA_PATH $EVAL_DATA_PATH
45# qwen2.5_7B_dapo_math training: bash examples/v1/scripts/run_rl.sh examples/v1/config/rl_qwen25_7B_dapo.py "sglang" $MODEL_PATH $DATA_PATH $EVAL_DATA_PATH
@@ -29,7 +30,7 @@ export DATA_PATH=$DATA_PATH
2930export EVAL_DATA_PATH=$EVAL_DATA_PATH
3031export XTUNER_USE_FA3=${XTUNER_USE_FA3:- 1}
3132export XTUNER_LOG_LEVEL=${XTUNER_LOG_LEVEL:- " INFO" }
32-
33+ export PYTHONUNBUFFERED=1
3334
3435infer_backend_lower=$( echo " $INFER_BACKEND " | tr ' [:upper:]' ' [:lower:]' )
3536if [ " $infer_backend_lower " = " sglang" ]; then
4849 exit 1
4950fi
5051
52+ current_time=$( date " +%m%d%H" )
53+ # 取模型路径的最后一级作为model_name,取数据路径的倒数第二级作为data_name
54+ model_dir_name=$( basename " $MODEL_PATH " )
55+ data_dir_name=$( basename " $( dirname " $DATA_PATH " ) " )
56+ DIR=$( pwd)
57+ export WORK_DIR=" ${DIR} /work_dirs/${model_dir_name} _${data_dir_name} _${infer_backend_lower} "
58+ if [ ! -d " $WORK_DIR " ]; then
59+ mkdir -p " $WORK_DIR "
60+ fi
61+ export LMDEPLOY_LOG_FILE=" ${WORK_DIR} /lmdeploy_log_${current_time} .txt"
62+ export XTUNER_RL_MEM_DIR=" ${WORK_DIR} /mem_${current_time} "
63+
5164# 2. Launch Ray cluster
5265# 根据 NODE_COUNT 分配 num_cpus, 防止内存OOM
5366node_count=${NODE_COUNT:- 1}
5467total_cpus=$(( node_count * 128 ))
5568
5669if [ " $RAY_RANK " -eq 0 ]; then
70+ rm -rf /tmp/ray_log
71+ export RAY_LOG_DIR=" ${WORK_DIR} /ray_${current_time} /"
72+ mkdir -p ${RAY_LOG_DIR}
73+ ln -sfn " ${RAY_LOG_DIR} " /tmp/ray_log
5774 ray start --head \
5875 --node-ip-address=" $RAY_MASTER_ADDR " \
5976 --port=" $RAY_HEAD_PORT " \
6077 --dashboard-host=0.0.0.0 \
6178 --dashboard-port=$RAY_DASHBOARD_PORT \
6279 --include-dashboard=true \
6380 --disable-usage-stats \
64- --num-cpus=$total_cpus
81+ --num-cpus=$total_cpus \
82+ --temp-dir=" /tmp/ray_log/"
6583else
6684 while true ; do
6785 if curl --connect-timeout 2 " http://${RAY_MASTER_ADDR} :${RAY_DASHBOARD_PORT} " > /dev/null 2>&1 ; then
@@ -86,23 +104,12 @@ while true; do
86104 fi
87105done
88106
89- # 3. Prepare work directory and log file
90- current_time=$( date " +%m%d%H" )
91- # 取模型路径的最后一级作为model_name,取数据路径的倒数第二级作为data_name
92- model_dir_name=$( basename " $MODEL_PATH " )
93- data_dir_name=$( basename " $( dirname " $DATA_PATH " ) " )
94- export WORK_DIR=" work_dirs/${model_dir_name} _${data_dir_name} _${infer_backend_lower} "
95-
96- if [ ! -d " $WORK_DIR " ]; then
97- mkdir -p " $WORK_DIR "
98- fi
99-
100107SCRIPT_NAME=$( basename " $0 " )
101108cp " $0 " " ${WORK_DIR} /${SCRIPT_NAME} "
102109cp " $CONFIG_PATH " " ${WORK_DIR} /config.py"
103110LOG_FILE=" ${WORK_DIR} /training_log_${current_time} .txt"
104111
105- # 4 . Submit training job on Head node
112+ # 3 . Submit training job on Head node
106113if [ " $RAY_RANK " -eq 0 ]; then
107114 RUNTIME_ENV_JSON=" {
108115 \" env_vars\" : {
0 commit comments