Skip to content

Commit 9599a36

Browse files
committed
fix minor issue in veomni ci
Signed-off-by: 0oshowero0 <o0shower0o@outlook.com> fix Signed-off-by: 0oshowero0 <o0shower0o@outlook.com>
1 parent 2815eea commit 9599a36

File tree

2 files changed

+7
-6
lines changed

2 files changed

+7
-6
lines changed

.github/workflows/e2e_ppo_trainer_veomni_vllm.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,11 @@ jobs:
130130
- name: Running GSM8K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=4, USP=2)
131131
run: |
132132
ray stop --force
133-
VAL_BEFORE_TRAIN=True NUM_GPUS=8 FSDP_SIZE=4 SP_SIZE=2 EP_SIZE=1 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size4" bash tests/special_e2e/run_ppo_trainer_veomni.sh
133+
FSDP_SIZE=4 SP_SIZE=2 bash tests/special_e2e/run_ppo_trainer_veomni.sh
134134
- name: Running GEO3K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=8, USP=1)
135135
run: |
136136
ray stop --force
137-
MODEL_ID=Qwen/Qwen3-VL-2B-Instruct TRAIN_FILES=${HOME}/data/geo3k/train.parquet VAL_FILES=${HOME}/data/gsm8k/test.parquet VAL_BEFORE_TRAIN=True NUM_GPUS=8 FSDP_SIZE=4 SP_SIZE=2 EP_SIZE=1 VERL_EXP_NAME="qwen3-2b-vl-function-reward-minimal-fsdp-size8" bash tests/special_e2e/run_ppo_trainer_veomni.sh
137+
MODEL_ID=Qwen/Qwen3-VL-2B-Instruct TRAIN_FILES=${HOME}/data/geo3k/train.parquet VAL_FILES=${HOME}/data/gsm8k/test.parquet FSDP_SIZE=8 SP_SIZE=1 bash tests/special_e2e/run_ppo_trainer_veomni.sh
138138
139139
cleanup:
140140
runs-on: ubuntu-latest

tests/special_e2e/run_ppo_trainer_veomni.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
88

99
TRAIN_FILES=${TRAIN_FILES:-${HOME}/data/gsm8k/train.parquet}
1010
VAL_FILES=${VAL_FILES:-${HOME}/data/gsm8k/test.parquet}
11-
VAL_BEFORE_TRAIN=${VAL_BEFORE_TRAIN:-False}
11+
VAL_BEFORE_TRAIN=${VAL_BEFORE_TRAIN:-True}
1212
NUM_GPUS=${NUM_GPUS:-8}
1313
FSDP_SIZE=${FSDP_SIZE:-4}
1414
SP_SIZE=${SP_SIZE:-2}
15-
EP_SIZE=${EP_SIZE:-2}
16-
VERL_EXP_NAME=${VERL_EXP_NAME:-qwen2.5-0.5b-function-reward-minimal-fsdp-size8}
15+
EP_SIZE=${EP_SIZE:-1}
16+
MODEL_NAME_ONLY=${MODEL_ID##*/}
17+
VERL_EXP_NAME=${VERL_EXP_NAME:-${MODEL_NAME_ONLY}-function-reward-minimal-fsdp-size${FSDP_SIZE}}
1718

1819
python3 -m verl.trainer.main_ppo \
1920
model_engine=veomni \
@@ -59,7 +60,7 @@ python3 -m verl.trainer.main_ppo \
5960
trainer.use_legacy_worker_impl=disable \
6061
trainer.critic_warmup=0 \
6162
trainer.logger=console \
62-
trainer.project_name='verl_grpo_example_gsm8k' \
63+
trainer.project_name='verl_veomni_test' \
6364
trainer.experiment_name="${VERL_EXP_NAME}" \
6465
trainer.n_gpus_per_node="${NUM_GPUS}" \
6566
trainer.val_before_train="${VAL_BEFORE_TRAIN}" \

0 commit comments

Comments
 (0)