Skip to content

Commit d5ed908

Browse files
authored
Add unittest on self-host runner (#15)
1 parent e4a356b commit d5ed908

File tree

25 files changed

+195
-264
lines changed

25 files changed

+195
-264
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
services:
2+
trinity-node-1:
3+
image: trinity-rft:latest-unittest
4+
pull_policy: never
5+
command: sh -c "pip install -e .[dev] && ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
6+
environment:
7+
- HF_ENDPOINT=https://hf-mirror.com
8+
- RAY_ADDRESS=auto
9+
- CHECKPOINT_ROOT_DIR=/mnt/checkpoints
10+
- DATA_ROOT_DIR=/mnt/data
11+
- MODEL_PATH=/mnt/checkpoints/Qwen2.5-1.5B-Instruct
12+
working_dir: /workspace
13+
networks:
14+
- trinity-network
15+
volumes:
16+
- trinity-volume:/mnt
17+
- ../../..:/workspace
18+
shm_size: "64G"
19+
deploy:
20+
resources:
21+
reservations:
22+
devices:
23+
- driver: nvidia
24+
device_ids: ['4', '5']
25+
capabilities: [gpu]
26+
27+
trinity-node-2:
28+
image: trinity-rft:latest-unittest
29+
pull_policy: never
30+
command: sh -c "pip install -e .[dev] && ray start --address=trinity-node-1:6379 --block"
31+
environment:
32+
- HF_ENDPOINT=https://hf-mirror.com
33+
- CHECKPOINT_ROOT_DIR=/mnt/checkpoints
34+
- DATA_ROOT_DIR=/mnt/data
35+
- MODEL_PATH=/mnt/checkpoints/Qwen2.5-1.5B-Instruct
36+
working_dir: /workspace
37+
volumes:
38+
- trinity-volume:/mnt
39+
- ../../..:/workspace
40+
depends_on:
41+
- trinity-node-1
42+
networks:
43+
- trinity-network
44+
shm_size: "64G"
45+
deploy:
46+
resources:
47+
reservations:
48+
devices:
49+
- driver: nvidia
50+
device_ids: ['6', '7']
51+
capabilities: [gpu]
52+
53+
networks:
54+
trinity-network:
55+
driver: bridge
56+
57+
volumes:
58+
trinity-volume:
59+
external: true

.github/workflows/unittest.yaml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: unittest
2+
3+
on:
4+
issue_comment:
5+
types: [created]
6+
7+
permissions:
8+
contents: read
9+
10+
jobs:
11+
unittest:
12+
# only run on pull request
13+
if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/run-unittest') && github.event.comment.author_association == 'COLLABORATOR' }}
14+
runs-on: self-hosted
15+
16+
steps:
17+
- uses: actions/checkout@v3
18+
with:
19+
path: trinity-${{ github.run_id }}
20+
fetch-depth: 0
21+
22+
- name: Setup docker compose
23+
working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
24+
run: |
25+
docker compose up -d
26+
27+
- name: Run unittest
28+
working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
29+
run: |
30+
docker compose exec trinity-node-1 pytest tests --ignore=tests/data --junitxml=pytest.xml
31+
continue-on-error: true
32+
33+
- name: Upload test results
34+
uses: actions/upload-artifact@v2
35+
with:
36+
name: pytest-results
37+
path: trinity-${{ github.run_id }}/pytest.xml
38+
39+
- name: Pytest coverage comment
40+
uses: MishaKav/pytest-coverage-comment@main
41+
with:
42+
junitxml-title: Unittest Result Summary
43+
junitxml-path: trinity-${{ github.run_id }}/pytest.xml
44+
# TODO: run data tests after the dependency conflict is resolved

docs/sphinx_doc/source/tutorial/trinity_configs.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,6 @@ critic:
307307
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
308308
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
309309
ppo_micro_batch_size_per_gpu: 8
310-
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
311310
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
312311
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
313312
ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

examples/dpo_humanlike/train_dpo.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ critic:
118118
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
119119
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
120120
ppo_micro_batch_size_per_gpu: 1
121-
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
122121
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
123122
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
124123
ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

examples/grpo_alfworld/train_alfworld.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ critic:
117117
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
118118
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
119119
ppo_micro_batch_size_per_gpu: 1
120-
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
121120
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
122121
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
123122
ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

examples/grpo_gsm8k/train_gsm8k.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ critic:
122122
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
123123
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
124124
ppo_micro_batch_size_per_gpu: 64
125-
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
126125
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
127126
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
128127
ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

examples/grpo_math/math.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ model:
1818
max_prompt_tokens: 1024
1919
max_response_tokens: 3072
2020
checkpoint_path: /PATH/TO/CHECKPOINT/
21-
load_checkpoint: true
2221
cluster:
2322
node_num: 1
2423
gpu_per_node: 8

examples/grpo_math/train_math.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ actor_rollout_ref:
2525
actor:
2626
strategy: fsdp # This is for backward-compatibility
2727
ppo_mini_batch_size: 128
28-
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
2928
ppo_micro_batch_size_per_gpu: 4
3029
use_dynamic_bsz: True # False
3130
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
@@ -62,7 +61,6 @@ actor_rollout_ref:
6261
wrap_policy:
6362
# transformer_layer_cls_to_wrap: None
6463
min_num_params: 0
65-
# log_prob_micro_batch_size: 4 # will be deprecated, use log_prob_micro_batch_size_per_gpu
6664
log_prob_micro_batch_size_per_gpu: 16
6765
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
6866
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -86,7 +84,6 @@ actor_rollout_ref:
8684
max_num_batched_tokens: 8192
8785
max_model_len: null
8886
max_num_seqs: 1024
89-
# log_prob_micro_batch_size: 8 # will be deprecated, use log_prob_micro_batch_size_per_gpu
9087
log_prob_micro_batch_size_per_gpu: 4
9188
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
9289
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -120,9 +117,7 @@ critic:
120117
min_num_params: 0
121118
fsdp_size: -1
122119
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
123-
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
124120
ppo_micro_batch_size_per_gpu: 64
125-
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
126121
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
127122
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
128123
ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

examples/grpo_sciworld/train_sciworld.yaml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ actor_rollout_ref:
2525
actor:
2626
strategy: fsdp # This is for backward-compatibility
2727
ppo_mini_batch_size: 1536
28-
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
2928
ppo_micro_batch_size_per_gpu: 1
3029
use_dynamic_bsz: False
3130
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
@@ -57,7 +56,6 @@ actor_rollout_ref:
5756
wrap_policy:
5857
# transformer_layer_cls_to_wrap: None
5958
min_num_params: 0
60-
# log_prob_micro_batch_size: 4 # will be deprecated, use log_prob_micro_batch_size_per_gpu
6159
log_prob_micro_batch_size_per_gpu: 1
6260
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
6361
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -81,7 +79,6 @@ actor_rollout_ref:
8179
max_num_batched_tokens: 8192
8280
max_model_len: null
8381
max_num_seqs: 1024
84-
# log_prob_micro_batch_size: 8 # will be deprecated, use log_prob_micro_batch_size_per_gpu
8582
log_prob_micro_batch_size_per_gpu: 1
8683
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
8784
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -115,9 +112,7 @@ critic:
115112
min_num_params: 0
116113
fsdp_size: -1
117114
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
118-
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
119115
ppo_micro_batch_size_per_gpu: 1
120-
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
121116
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
122117
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
123118
ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
@@ -140,7 +135,6 @@ reward_model:
140135
min_num_params: 0
141136
param_offload: False
142137
fsdp_size: -1
143-
# micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
144138
# micro_batch_size_per_gpu: 2 # set a number
145139
# max_length: null
146140
ulysses_sequence_parallel_size: 1 # sp size

examples/grpo_webshop/train_webshop.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ critic:
117117
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
118118
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
119119
ppo_micro_batch_size_per_gpu: 1
120-
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
121120
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
122121
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
123122
ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

0 commit comments

Comments
 (0)