1+ name : e2e_grpo
2+
3+ on :
4+ # Trigger the workflow on push or pull request,
5+ # but only for the main branch
6+ push :
7+ branches :
8+ - main
9+ - v0.2.x
10+ paths :
11+ - " **/*.py"
12+ - .github/workflows/e2e_grpo.yml
13+ pull_request :
14+ branches :
15+ - main
16+ - v0.2.x
17+ paths :
18+ - " **/*.py"
19+ - " verl/trainer/config/*.yaml"
20+ - .github/workflows/e2e_grpo.yml
21+ - " tests/e2e/*.sh"
22+
23+ # Cancel jobs on the same ref if a new one is triggered
24+ concurrency :
25+ group : ${{ github.workflow }}-${{ github.ref }}
26+ cancel-in-progress : ${{ github.ref != 'refs/heads/main' }}
27+
28+ # Declare permissions just read content.
29+ permissions :
30+ contents : read
31+
32+ jobs :
33+ e2e_gsm8k_megatron :
34+ runs-on : [self-hosted, l20-0]
35+ timeout-minutes : 60 # Increase this timeout value as needed
36+ env :
37+ HTTP_PROXY : ${{ secrets.PROXY_HTTP }}
38+ HTTPS_PROXY : ${{ secrets.PROXY_HTTPS }}
39+ NO_PROXY : " localhost,127.0.0.1"
40+ HF_HUB_ENABLE_HF_TRANSFER : 1
41+ container :
42+ image : whatcanyousee/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te2.0-megatron0.11.0-v0.0.6
43+ options : --gpus all --shm-size=10g
44+ steps :
45+ - uses : actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
46+ with :
47+ fetch-depth : 0
48+ - name : Install the current repository
49+ run : |
50+ pip3 install hf_transfer
51+ pip3 install -e .[test]
52+ - name : Prepare gsm8k dataset
53+ run : |
54+ python3 examples/data_preprocess/gsm8k.py
55+ - name : Running GRPO gsm8k e2e training tests with FSDP on 8 L20 GPUs (Deepseek)
56+ run : |
57+ ray stop --force
58+ bash tests/e2e/run_deepseek_grpo.sh
59+ - name : Running GRPO gsm8k e2e training tests with 3D parallelism on 8 L20 GPUs with Megatron (Deepseek)
60+ run : |
61+ ray stop --force
62+ bash tests/e2e/run_deepseek_grpo_megatron.sh
63+ - name : Running GRPO gsm8k e2e training tests with FSDP on 8 L20 GPUs (Qwen)
64+ run : |
65+ ray stop --force
66+ bash tests/e2e/run_qwen_grpo.sh
67+ - name : Running GRPO gsm8k e2e training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen)
68+ run : |
69+ ray stop --force
70+ bash tests/e2e/run_qwen_grpo_megatron.sh
0 commit comments