Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions .github/workflows/docker/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
services:
trinity-node-1:
image: trinity-rft:latest-unittest
pull_policy: never
command: sh -c "pip install -e .[dev] && ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
environment:
- HF_ENDPOINT=https://hf-mirror.com
- RAY_ADDRESS=auto
- CHECKPOINT_ROOT_DIR=/mnt/checkpoints
- DATA_ROOT_DIR=/mnt/data
- MODEL_PATH=/mnt/checkpoints/Qwen2.5-1.5B-Instruct
working_dir: /workspace
networks:
- trinity-network
volumes:
- trinity-volume:/mnt
- ../../..:/workspace
shm_size: "64G"
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['4', '5']
capabilities: [gpu]

trinity-node-2:
image: trinity-rft:latest-unittest
pull_policy: never
command: sh -c "pip install -e .[dev] && ray start --address=trinity-node-1:6379 --block"
environment:
- HF_ENDPOINT=https://hf-mirror.com
- CHECKPOINT_ROOT_DIR=/mnt/checkpoints
- DATA_ROOT_DIR=/mnt/data
- MODEL_PATH=/mnt/checkpoints/Qwen2.5-1.5B-Instruct
working_dir: /workspace
volumes:
- trinity-volume:/mnt
- ../../..:/workspace
depends_on:
- trinity-node-1
networks:
- trinity-network
shm_size: "64G"
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['6', '7']
capabilities: [gpu]

networks:
trinity-network:
driver: bridge

volumes:
trinity-volume:
external: true
44 changes: 44 additions & 0 deletions .github/workflows/unittest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: unittest

on:
issue_comment:
types: [created]

permissions:
contents: read

jobs:
unittest:
# only run on pull request
if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/run-unittest') && github.event.comment.author_association == 'COLLABORATOR' }}
runs-on: self-hosted

steps:
- uses: actions/checkout@v3
with:
path: trinity-${{ github.run_id }}
fetch-depth: 0

- name: Setup docker compose
working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
run: |
docker compose up -d

- name: Run unittest
working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
run: |
docker compose exec trinity-node-1 pytest tests --ignore=tests/data --junitxml=pytest.xml
continue-on-error: true

- name: Upload test results
uses: actions/upload-artifact@v2
with:
name: pytest-results
path: trinity-${{ github.run_id }}/pytest.xml

- name: Pytest coverage comment
uses: MishaKav/pytest-coverage-comment@main
with:
junitxml-title: Unittest Result Summary
junitxml-path: trinity-${{ github.run_id }}/pytest.xml
# TODO: run data tests after the dependency conflict is resolved
1 change: 0 additions & 1 deletion docs/sphinx_doc/source/tutorial/trinity_configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ critic:
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 8
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
Expand Down
1 change: 0 additions & 1 deletion examples/dpo_humanlike/train_dpo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ critic:
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 1
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
Expand Down
1 change: 0 additions & 1 deletion examples/grpo_alfworld/train_alfworld.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ critic:
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 1
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
Expand Down
1 change: 0 additions & 1 deletion examples/grpo_gsm8k/train_gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ critic:
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 64
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
Expand Down
1 change: 0 additions & 1 deletion examples/grpo_math/math.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ model:
max_prompt_tokens: 1024
max_response_tokens: 3072
checkpoint_path: /PATH/TO/CHECKPOINT/
load_checkpoint: true
cluster:
node_num: 1
gpu_per_node: 8
Expand Down
5 changes: 0 additions & 5 deletions examples/grpo_math/train_math.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ actor_rollout_ref:
actor:
strategy: fsdp # This is for backward-compatibility
ppo_mini_batch_size: 128
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 4
use_dynamic_bsz: True # False
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
Expand Down Expand Up @@ -62,7 +61,6 @@ actor_rollout_ref:
wrap_policy:
# transformer_layer_cls_to_wrap: None
min_num_params: 0
# log_prob_micro_batch_size: 4 # will be deprecated, use log_prob_micro_batch_size_per_gpu
log_prob_micro_batch_size_per_gpu: 16
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
Expand All @@ -86,7 +84,6 @@ actor_rollout_ref:
max_num_batched_tokens: 8192
max_model_len: null
max_num_seqs: 1024
# log_prob_micro_batch_size: 8 # will be deprecated, use log_prob_micro_batch_size_per_gpu
log_prob_micro_batch_size_per_gpu: 4
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
Expand Down Expand Up @@ -120,9 +117,7 @@ critic:
min_num_params: 0
fsdp_size: -1
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 64
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
Expand Down
6 changes: 0 additions & 6 deletions examples/grpo_sciworld/train_sciworld.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ actor_rollout_ref:
actor:
strategy: fsdp # This is for backward-compatibility
ppo_mini_batch_size: 1536
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 1
use_dynamic_bsz: False
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
Expand Down Expand Up @@ -57,7 +56,6 @@ actor_rollout_ref:
wrap_policy:
# transformer_layer_cls_to_wrap: None
min_num_params: 0
# log_prob_micro_batch_size: 4 # will be deprecated, use log_prob_micro_batch_size_per_gpu
log_prob_micro_batch_size_per_gpu: 1
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
Expand All @@ -81,7 +79,6 @@ actor_rollout_ref:
max_num_batched_tokens: 8192
max_model_len: null
max_num_seqs: 1024
# log_prob_micro_batch_size: 8 # will be deprecated, use log_prob_micro_batch_size_per_gpu
log_prob_micro_batch_size_per_gpu: 1
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
Expand Down Expand Up @@ -115,9 +112,7 @@ critic:
min_num_params: 0
fsdp_size: -1
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 1
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
Expand All @@ -140,7 +135,6 @@ reward_model:
min_num_params: 0
param_offload: False
fsdp_size: -1
# micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
# micro_batch_size_per_gpu: 2 # set a number
# max_length: null
ulysses_sequence_parallel_size: 1 # sp size
Expand Down
1 change: 0 additions & 1 deletion examples/grpo_webshop/train_webshop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ critic:
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 1
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
Expand Down
1 change: 0 additions & 1 deletion examples/opmd_gsm8k/train_opmd_gsm8k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ critic:
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 64
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
Expand Down
1 change: 0 additions & 1 deletion examples/ppo_countdown/train_countdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ critic:
ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
# ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: 8
forward_micro_batch_size: ${critic.ppo_micro_batch_size}
forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
Expand Down
Empty file added tests/buffer/__init__.py
Empty file.
Empty file added tests/common/__init__.py
Empty file.
24 changes: 14 additions & 10 deletions tests/common/config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
class TestConfig(unittest.TestCase):
def test_load_default_config(self):
config = load_config(config_yaml_path)
print(config.data)
config.check_and_update()
self.assertIsNotNone(config.trainer.trainer_config)
self.assertEqual(config.trainer.trainer_config.trainer.n_gpus_per_node, 4)
self.assertEqual(config.trainer.trainer_config.trainer.nnodes, 1)
Expand All @@ -22,13 +24,15 @@ def test_load_default_config(self):
config.synchronizer.sync_iteration_interval,
)

def test_all_examples_are_valid(self): # TODO: useless
example_dir = os.path.join(os.path.dirname(__file__), "..", "..", "scripts", "config")
for filename in ["countdown", "gsm8k"]:
if filename.endswith(".yaml"):
config_path = os.path.join(example_dir, filename)
try:
load_config(config_path)
except Exception as e:
print(f"Error loading config {config_path}: {e}")
raise e
def test_all_examples_are_valid(self):
example_dir = os.path.join(os.path.dirname(__file__), "..", "..", "examples")
for example_name in os.listdir(example_dir):
for filename in os.listdir(os.path.join(example_dir, example_name)):
if filename.endswith(".yaml") and not filename.startswith("train"):
print(f"Checking config: {filename}")
config_path = os.path.join(example_dir, example_name, filename)
try:
load_config(config_path)
except Exception as e:
print(f"Error loading config {config_path}: {e}")
raise e
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_experience_model_experience_conversion(self):
tokens = torch.tensor([1, 2, 3], dtype=torch.int32)
reward = 0.6
prompt_length = 2
logprobs = torch.tensor([0.1], dtype=torch.float32)
logprobs = torch.tensor([0, 0, 0.1], dtype=torch.float32)
action_mask = torch.tensor([1, 0, 1], dtype=torch.bool)
experience = Experience(
tokens=tokens,
Expand All @@ -44,28 +44,28 @@ def test_batch_conversion(self):
tokens=torch.tensor([1, 2]),
prompt_length=1,
reward=float(0.1),
logprobs=torch.tensor([0.1]),
logprobs=torch.tensor([0, 0.1]),
action_mask=torch.tensor([1, 0]),
),
Experience(
tokens=torch.tensor([1, 2, 3]),
prompt_length=2,
reward=float(0.2),
logprobs=torch.tensor([0.1]),
logprobs=torch.tensor([0, 0, 0.1]),
action_mask=torch.tensor([1, 0, 1]),
),
Experience(
tokens=torch.tensor([1, 2, 3, 4]),
prompt_length=2,
reward=float(0.3),
logprobs=torch.tensor([0.1, 0.2]),
logprobs=torch.tensor([0, 0, 0.1, 0.2]),
action_mask=torch.tensor([1, 0, 1, 0]),
),
Experience(
tokens=torch.tensor([1, 2, 3, 4]),
prompt_length=3,
reward=float(0.4),
logprobs=torch.tensor([0.1]),
logprobs=torch.tensor([0, 0, 0, 0.1]),
action_mask=torch.tensor([1, 0, 1, 0]),
),
]
Expand All @@ -89,7 +89,8 @@ def test_batch_conversion(self):
self.assertTrue(
torch.all(
batch.logprobs[i][
prompt_length : prompt_length
prompt_length
- exps[i].prompt_length : prompt_length
+ exps[i].tokens.size(0)
- exps[i].prompt_length
]
Expand Down
24 changes: 3 additions & 21 deletions tests/common/tmp/template_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,11 @@ mode: both
data:
dataset_path: ''
total_epoch: 1
batch_size: 1
batch_size: 32
train_split: 'train'
eval_split: ''
default_workflow_type: ''
default_reward_fn_type: ''
dataset_config: {}
format_config:
prompt_key: ''
response_key: ''
chat_template: ''
reward_fn_key: ''
workflow_key: ''
solution_key: ''
reward_key: ''
chosen_key: ''
rejected_key: ''
label_key: ''
dj_config_path: null
dj_process_desc: null
clean_strategy: iterative
min_size_ratio: null
model:
model_path: ''
max_prompt_tokens: 2048
Expand All @@ -32,8 +16,6 @@ cluster:
node_num: 1
gpu_per_node: 8
buffer:
storage_type: sql
db_url: ''
read_batch_size: 32
max_retry_times: 3
max_retry_interval: 1
Expand All @@ -46,8 +28,8 @@ explorer:
enable_prefix_caching: false
enforce_eager: true
dtype: bfloat16
temperature: 0.0
top_p: 1.0
temperature: 0.2
top_p: 0.95
top_k: -1
seed: 42
logprobs: 0
Expand Down
Loading