diff --git a/tests/llm/test_grpo.py b/tests/llm/test_grpo.py
index 92dd919f06d5..dd58d6c89b20 100644
--- a/tests/llm/test_grpo.py
+++ b/tests/llm/test_grpo.py
@@ -92,7 +92,7 @@ def test_grpo(self):
             # 运行主逻辑
             cmd = 'python -u -m paddle.distributed.launch \
                     --devices "$CUDA_VISIBLE_DEVICES" run_rl.py \
-                    ../../config/qwen/reinforce_plus_plus_argument.yaml \
+                    ../../config/qwen/grpo_argument.yaml \
                     --actor_model_name_or_path "Qwen/Qwen2-1.5B" \
                     --max_dec_len 128 \
                     --max_steps 3 \
diff --git a/tests/llm/test_reinforce_plus_plus.py b/tests/llm/test_reinforce_plus_plus.py
index 8e74e663d886..b0f1e332c64a 100644
--- a/tests/llm/test_reinforce_plus_plus.py
+++ b/tests/llm/test_reinforce_plus_plus.py
@@ -92,7 +92,7 @@ def test_reinforce_plus_plus(self):
             # 运行主逻辑
             cmd = 'python -u -m paddle.distributed.launch \
                     --devices "$CUDA_VISIBLE_DEVICES" run_rl.py \
-                    ../../config/qwen/reinforce_plus_plus_argument.yaml \
+                    ../../config/qwen/grpo_argument.yaml \
                     --rl_algorithm "reinforce_plus_plus" \
                     --actor_model_name_or_path "Qwen/Qwen2-1.5B" \
                     --max_dec_len 128 \