File tree Expand file tree Collapse file tree 2 files changed +7
-6
lines changed
Expand file tree Collapse file tree 2 files changed +7
-6
lines changed Original file line number Diff line number Diff line change @@ -130,11 +130,11 @@ jobs:
130130 - name : Running GSM8K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=4, USP=2)
131131 run : |
132132 ray stop --force
133- VAL_BEFORE_TRAIN=True NUM_GPUS=8 FSDP_SIZE=4 SP_SIZE=2 EP_SIZE=1 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size4" bash tests/special_e2e/run_ppo_trainer_veomni.sh
133+ FSDP_SIZE=4 SP_SIZE=2 bash tests/special_e2e/run_ppo_trainer_veomni.sh
134134 - name : Running GEO3K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=8, USP=1)
135135 run : |
136136 ray stop --force
137- MODEL_ID=Qwen/Qwen3-VL-2B-Instruct TRAIN_FILES=${HOME}/data/geo3k/train.parquet VAL_FILES=${HOME}/data/gsm8k/test.parquet VAL_BEFORE_TRAIN=True NUM_GPUS=8 FSDP_SIZE=4 SP_SIZE=2 EP_SIZE=1 VERL_EXP_NAME="qwen3-2b-vl-function-reward-minimal-fsdp-size8" bash tests/special_e2e/run_ppo_trainer_veomni.sh
137+ MODEL_ID=Qwen/Qwen3-VL-2B-Instruct TRAIN_FILES=${HOME}/data/geo3k/train.parquet VAL_FILES=${HOME}/data/gsm8k/test.parquet FSDP_SIZE=8 SP_SIZE=1 bash tests/special_e2e/run_ppo_trainer_veomni.sh
138138
139139 cleanup :
140140 runs-on : ubuntu-latest
Original file line number Diff line number Diff line change @@ -8,12 +8,13 @@ MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
88
99TRAIN_FILES=${TRAIN_FILES:- ${HOME} / data/ gsm8k/ train.parquet}
1010VAL_FILES=${VAL_FILES:- ${HOME} / data/ gsm8k/ test.parquet}
11- VAL_BEFORE_TRAIN=${VAL_BEFORE_TRAIN:- False }
11+ VAL_BEFORE_TRAIN=${VAL_BEFORE_TRAIN:- True }
1212NUM_GPUS=${NUM_GPUS:- 8}
1313FSDP_SIZE=${FSDP_SIZE:- 4}
1414SP_SIZE=${SP_SIZE:- 2}
15- EP_SIZE=${EP_SIZE:- 2}
16- VERL_EXP_NAME=${VERL_EXP_NAME:- qwen2.5-0.5b-function-reward-minimal-fsdp-size8}
15+ EP_SIZE=${EP_SIZE:- 1}
16+ MODEL_NAME_ONLY=${MODEL_ID##*/ }
17+ VERL_EXP_NAME=${VERL_EXP_NAME:- ${MODEL_NAME_ONLY} -function-reward-minimal-fsdp-size${FSDP_SIZE} }
1718
1819python3 -m verl.trainer.main_ppo \
1920 model_engine=veomni \
@@ -59,7 +60,7 @@ python3 -m verl.trainer.main_ppo \
5960 trainer.use_legacy_worker_impl=disable \
6061 trainer.critic_warmup=0 \
6162 trainer.logger=console \
62- trainer.project_name=' verl_grpo_example_gsm8k ' \
63+ trainer.project_name=' verl_veomni_test ' \
6364 trainer.experiment_name=" ${VERL_EXP_NAME} " \
6465 trainer.n_gpus_per_node=" ${NUM_GPUS} " \
6566 trainer.val_before_train=" ${VAL_BEFORE_TRAIN} " \
You can’t perform that action at this time.
0 commit comments