diff --git a/tests/llm/test_grpo.py b/tests/llm/test_grpo.py index 92dd919f06d5..dd58d6c89b20 100644 --- a/tests/llm/test_grpo.py +++ b/tests/llm/test_grpo.py @@ -92,7 +92,7 @@ def test_grpo(self): # 运行主逻辑 cmd = 'python -u -m paddle.distributed.launch \ --devices "$CUDA_VISIBLE_DEVICES" run_rl.py \ - ../../config/qwen/reinforce_plus_plus_argument.yaml \ + ../../config/qwen/grpo_argument.yaml \ --actor_model_name_or_path "Qwen/Qwen2-1.5B" \ --max_dec_len 128 \ --max_steps 3 \ diff --git a/tests/llm/test_reinforce_plus_plus.py b/tests/llm/test_reinforce_plus_plus.py index 8e74e663d886..b0f1e332c64a 100644 --- a/tests/llm/test_reinforce_plus_plus.py +++ b/tests/llm/test_reinforce_plus_plus.py @@ -92,7 +92,7 @@ def test_reinforce_plus_plus(self): # 运行主逻辑 cmd = 'python -u -m paddle.distributed.launch \ --devices "$CUDA_VISIBLE_DEVICES" run_rl.py \ - ../../config/qwen/reinforce_plus_plus_argument.yaml \ + ../../config/qwen/grpo_argument.yaml \ --rl_algorithm "reinforce_plus_plus" \ --actor_model_name_or_path "Qwen/Qwen2-1.5B" \ --max_dec_len 128 \