Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/unittest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ jobs:
with:
name: pytest-results
path: trinity-${{ github.run_id }}/report.json
continue-on-error: true

- name: Publish Test Report
if: env.tests_run == 'true'
Expand All @@ -106,12 +107,14 @@ jobs:
issue: ${{ github.event.issue.number }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
continue-on-error: true

- name: Remove docker compose
working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
if: always()
run: |
docker compose down --remove-orphans
continue-on-error: true

- name: Cleanup workspace
if: always()
Expand Down
4 changes: 2 additions & 2 deletions docs/sphinx_doc/source/tutorial/example_dpo.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ trainer:

### Configuration for SFT

We set the `algorithm_type` as `sft` to run SFT process. Then we modify the config file `sft.yaml` with the following changes:
We set the `algorithm_type` as `sft` to run SFT process. Then we modify the config file [`examples/sft_mot/sft.yaml`](https://github.com/modelscope/Trinity-RFT/tree/main/examples/sft_mot/sft.yaml) with the following changes:

```yaml
project: <project_name>
Expand Down Expand Up @@ -120,5 +120,5 @@ trinity run --config examples/dpo_humanlike/dpo.yaml
or, for SFT:

```shell
trinity run --config /PATH/TO/sft.yaml
trinity run --config examples/sft_mot/sft.yaml
```
7 changes: 7 additions & 0 deletions examples/sft_mot/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SFT on Mixture-of-Thoughts Dataset

This example shows the usage of SFT on the [Mixture-of-Thoughts](https://huggingface.co/datasets/open-r1/Mixture-of-Thoughts) dataset.

For more detailed information, please refer to the [documentation](../../docs/sphinx_doc/source/tutorial/example_dpo.md).

The config files are located in [`sft.yaml`](sft.yaml) and [`train_sft.yaml`](train_sft.yaml).
34 changes: 34 additions & 0 deletions examples/sft_mot/sft.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
mode: train
project: "Trinity-RFT-example"
name: "sft_mot"
checkpoint_root_dir: /PATH/TO/CHECKPOINT/
algorithm:
algorithm_type: sft
model:
model_path: /PATH/TO/MODEL/
max_prompt_tokens: 512
max_response_tokens: 10240
cluster:
node_num: 1
gpu_per_node: 8
buffer:
total_epochs: 1
batch_size: 32
max_retry_times: 3
max_retry_interval: 1
trainer_input:
experience_buffer:
name: MoT
storage_type: file
path: open-r1/Mixture-of-Thoughts
subset_name: math
format:
prompt_type: messages
messages_key: messages
synchronizer:
sync_method: 'checkpoint'
sync_interval: 10
trainer:
trainer_type: 'verl'
trainer_config_path: 'examples/sft_mot/train_sft.yaml'
save_interval: 10
47 changes: 47 additions & 0 deletions examples/sft_mot/train_sft.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
actor_rollout_ref:
hybrid_engine: True
model:
external_lib: null
override_config: { }
enable_gradient_checkpointing: True
use_remove_padding: True # False
actor:
strategy: fsdp # This is for backward-compatibility
ppo_micro_batch_size_per_gpu: 4
use_dynamic_bsz: True # False
ppo_max_token_len_per_gpu: 22000 # n * ${data.max_prompt_length} + ${data.max_response_length}
grad_clip: 1.0
ppo_epochs: 1
shuffle: False
ulysses_sequence_parallel_size: 1 # sp size
optim:
lr: 1e-5
lr_warmup_steps_ratio: 0. # the total steps will be injected during runtime
# min_lr_ratio: null # only useful for warmup with cosine
warmup_style: constant # select from constant/cosine
total_training_steps: -1 # must be override by program
fsdp_config:
wrap_policy:
# transformer_layer_cls_to_wrap: None
min_num_params: 0
param_offload: False
optimizer_offload: False
fsdp_size: -1
ref:
fsdp_config:
param_offload: False
wrap_policy:
# transformer_layer_cls_to_wrap: None
min_num_params: 0
log_prob_micro_batch_size_per_gpu: 16
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size

trainer:
balance_batch: True
resume_mode: auto # or auto or resume_path if
default_hdfs_dir: null
remove_previous_ckpt_in_save: False
del_local_ckpt_after_load: False
val_before_train: False
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ requires-python = ">=3.10"
dependencies = [
"verl==0.4.1",
"ray[default]>=2.45.0",
"vllm>=0.9.1",
"vllm>=0.9.1,<=0.9.2",
"tensordict==0.6.2",
"wandb",
"omegaconf",
Expand Down