TransferQueue
diff --git a/‎.github/workflows/vllm.yml‎
Lines changed: 0 additions & 6 deletions b/‎.github/workflows/vllm.yml‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎examples/grpo_trainer/run_qwen2-7b_math_megatron.sh‎
Lines changed: 3 additions & 5 deletions b/‎examples/grpo_trainer/run_qwen2-7b_math_megatron.sh‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎examples/grpo_trainer/run_qwen2-7b_seq_balance.sh‎
Lines changed: 2 additions & 2 deletions b/‎examples/grpo_trainer/run_qwen2-7b_seq_balance.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/grpo_trainer/run_qwen2_5_vl-7b-sglang.sh‎
Lines changed: 1 addition & 1 deletion b/‎examples/grpo_trainer/run_qwen2_5_vl-7b-sglang.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/gspo_trainer/run_qwen30b_gspo.sh‎
Lines changed: 3 additions & 0 deletions b/‎examples/gspo_trainer/run_qwen30b_gspo.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/gspo_trainer/test_gspo_3b_math.sh‎
Lines changed: 6 additions & 2 deletions b/‎examples/gspo_trainer/test_gspo_3b_math.sh‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎examples/gspo_trainer/test_gspo_3b_math_slurm.sh‎
Lines changed: 6 additions & 2 deletions b/‎examples/gspo_trainer/test_gspo_3b_math_slurm.sh‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎examples/gspo_trainer/test_gspo_qwen30b_a3b_ep.sh‎
Lines changed: 3 additions & 0 deletions b/‎examples/gspo_trainer/test_gspo_qwen30b_a3b_ep.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/ppo_trainer/run_qwen2-7b_seq_balance.sh‎
Lines changed: 2 additions & 4 deletions b/‎examples/ppo_trainer/run_qwen2-7b_seq_balance.sh‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎recipe/dapo/test_dapo_gptoss_20b_megatron.sh‎
Lines changed: 3 additions & 6 deletions b/‎recipe/dapo/test_dapo_gptoss_20b_megatron.sh‎
Lines changed: 3 additions & 6 deletions
@@ -124,12 +124,6 @@ jobs:
       - name: Test the latest vLLM Rollout async with agent loop
         run: |
           ROLLOUT_NAME=vllm pytest -svvv tests/experimental/agent_loop
-      - name: Test the latest vLLM
-        run: |
-          torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s tests/workers/rollout/rollout_vllm/test_vllm_spmd.py
-      - name: Test the latest vLLM on model with rope scaling
-        run: |
-          torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s tests/workers/rollout/rollout_vllm/test_vllm_model_rope_scaling.py
       # Note(haibin.lin): for any new test, please update gpu_unit_tests.yaml to avoid repeated tests
 
   cleanup:
 
@@ -2,11 +2,9 @@ set -x
 
 export CUDA_DEVICE_MAX_CONNECTIONS=1 # For megatron communication/computation overlapping
 
-rollout_mode="sync"
-if [ "$rollout_mode" = "async" ]; then
-    export VLLM_USE_V1=1
-    return_raw_chat="True"
-fi
+rollout_mode="async"
+export VLLM_USE_V1=1
+return_raw_chat="True"
 
 gsm8k_train_path=$HOME/data/gsm8k/train.parquet
 gsm8k_test_path=$HOME/data/gsm8k/test.parquet
 
@@ -4,9 +4,9 @@ set -x
 # For async rollout mode, dataset should return raw chat.
 rollout_mode="async"
 rollout_name="sglang" # sglang or vllm
-if [ "$rollout_mode" = "async" ]; then
+return_raw_chat="True"
+if [ "$rollout_name" = "vllm" ]; then
     export VLLM_USE_V1=1
-    return_raw_chat="True"
 fi
 
 python3 -m verl.trainer.main_ppo \
 
@@ -40,7 +40,7 @@ python3 -m verl.trainer.main_ppo \
     actor_rollout_ref.rollout.n=5 \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=20 \
     actor_rollout_ref.ref.fsdp_config.param_offload=True \
-    actor_rollout_ref.rollout.mode=sync \
+    actor_rollout_ref.rollout.mode=async \
     algorithm.use_kl_in_reward=False \
     trainer.critic_warmup=0 \
     trainer.logger='["console","wandb"]' \
 
@@ -131,6 +131,9 @@ fi
 
 # ===================================== Inference =====================================
 rollout_name=vllm
+if [ "$rollout_name" = "vllm" ]; then
+    export VLLM_USE_V1=1
+fi
 infer_tp=4
 infer_dp=1
 infer_ep=1
 
@@ -47,7 +47,11 @@ loss_agg_mode="seq-mean-token-mean"
 MODEL_PATH=Qwen/Qwen2.5-3B-Instruct
 offload=false # it's a small model, offloading will just slow-down training
 rollout_engine=vllm
-rollout_mode=sync # can be async to speedup large scale xps
+rollout_mode=async
+return_raw_chat="True"
+if [ "$rollout_engine" = "vllm" ]; then
+    export VLLM_USE_V1=1
+fi
 gpu_memory_utilization=0.8
 reward_manager=dapo
 adv_estimator=grpo
@@ -121,6 +125,7 @@ python3 -m verl.trainer.main_ppo \
     data.prompt_key=prompt \
     data.truncation='error' \
     data.filter_overlong_prompts=true \
+    data.return_raw_chat=${return_raw_chat} \
     data.train_batch_size=${train_batch_size} \
     data.max_prompt_length=${max_prompt_length} \
     data.max_response_length=${max_response_length} \
@@ -138,7 +143,6 @@ python3 -m verl.trainer.main_ppo \
     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len} \
     actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
     actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
-    actor_rollout_ref.rollout.name=vllm \
     actor_rollout_ref.rollout.name=${rollout_engine} \
     actor_rollout_ref.rollout.mode=${rollout_mode} \
     actor_rollout_ref.model.path="${MODEL_PATH}" \
 
@@ -51,7 +51,11 @@ loss_agg_mode="seq-mean-token-mean"
 MODEL_PATH=Qwen/Qwen2.5-3B-Instruct
 offload=false # it's a small model, offloading will just slow-down training
 rollout_engine=vllm
-rollout_mode=sync # can be async to speedup large scale xps
+rollout_mode=async
+return_raw_chat="True"
+if [ "$rollout_engine" = "vllm" ]; then
+    export VLLM_USE_V1=1
+fi
 gpu_memory_utilization=0.8
 reward_manager=dapo
 adv_estimator=grpo
@@ -125,6 +129,7 @@ python3 -m verl.trainer.main_ppo \
     data.prompt_key=prompt \
     data.truncation='error' \
     data.filter_overlong_prompts=true \
+    data.return_raw_chat=${return_raw_chat} \
     data.train_batch_size=${train_batch_size} \
     data.max_prompt_length=${max_prompt_length} \
     data.max_response_length=${max_response_length} \
@@ -142,7 +147,6 @@ python3 -m verl.trainer.main_ppo \
     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len} \
     actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
     actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
-    actor_rollout_ref.rollout.name=vllm \
     actor_rollout_ref.rollout.name=${rollout_engine} \
     actor_rollout_ref.rollout.mode=${rollout_mode} \
     actor_rollout_ref.model.path="${MODEL_PATH}" \
 
@@ -64,6 +64,9 @@ offload=True
 
 # gen
 rollout_name=vllm # vllm or sglang
+if [ "$rollout_name" = "vllm" ]; then
+    export VLLM_USE_V1=1
+fi
 gen_tp=1
 gen_dp=4
 gen_ep=4
 
@@ -9,10 +9,8 @@ train_files="['$gsm8k_train_path', '$math_train_path']"
 test_files="['$gsm8k_test_path', '$math_test_path']"
 
 # For async rollout mode, dataset should return raw chat.
-rollout_mode="sync"
-if [ "$rollout_mode" = "async" ]; then
-    return_raw_chat="True"
-fi
+rollout_mode="async"
+return_raw_chat="True"
 
 python3 -m verl.trainer.main_ppo \
     algorithm.adv_estimator=gae \
 
@@ -58,13 +58,10 @@ use_dynamic_bsz=False # recommended but not necessary
 
 ################################################### quick config ###################################################
 
-rollout_mode="sync"
+rollout_mode="async"
 rollout_name="vllm" # sglang or vllm
-return_raw_chat="False"
-if [ "$rollout_mode" = "async" ]; then
-    export VLLM_USE_V1=1
-    return_raw_chat="True"
-fi
+export VLLM_USE_V1=1
+return_raw_chat="True"
 dtype="bfloat16" # ["bfloat16", "float16"]
 
 project_name='DAPO'