diff --git a/.github/workflows/unittest.yaml b/.github/workflows/unittest.yaml index e1a497c9de..d32581e9d9 100644 --- a/.github/workflows/unittest.yaml +++ b/.github/workflows/unittest.yaml @@ -95,6 +95,7 @@ jobs: with: name: pytest-results path: trinity-${{ github.run_id }}/report.json + continue-on-error: true - name: Publish Test Report if: env.tests_run == 'true' @@ -106,12 +107,14 @@ jobs: issue: ${{ github.event.issue.number }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + continue-on-error: true - name: Remove docker compose working-directory: trinity-${{ github.run_id }}/.github/workflows/docker if: always() run: | docker compose down --remove-orphans + continue-on-error: true - name: Cleanup workspace if: always() diff --git a/docs/sphinx_doc/source/tutorial/example_dpo.md b/docs/sphinx_doc/source/tutorial/example_dpo.md index cd0c214725..f457c6e888 100644 --- a/docs/sphinx_doc/source/tutorial/example_dpo.md +++ b/docs/sphinx_doc/source/tutorial/example_dpo.md @@ -79,7 +79,7 @@ trainer: ### Configuration for SFT -We set the `algorithm_type` as `sft` to run SFT process. Then we modify the config file `sft.yaml` with the following changes: +We set the `algorithm_type` as `sft` to run SFT process. Then we modify the config file [`examples/sft_mot/sft.yaml`](https://github.com/modelscope/Trinity-RFT/tree/main/examples/sft_mot/sft.yaml) with the following changes: ```yaml project: @@ -120,5 +120,5 @@ trinity run --config examples/dpo_humanlike/dpo.yaml or, for SFT: ```shell -trinity run --config /PATH/TO/sft.yaml +trinity run --config examples/sft_mot/sft.yaml ``` diff --git a/examples/sft_mot/README.md b/examples/sft_mot/README.md new file mode 100644 index 0000000000..f346136f58 --- /dev/null +++ b/examples/sft_mot/README.md @@ -0,0 +1,7 @@ +# SFT on Mixture-of-Thoughts Dataset + +This example shows the usage of SFT on the [Mixture-of-Thoughts](https://huggingface.co/datasets/open-r1/Mixture-of-Thoughts) dataset. + +For more detailed information, please refer to the [documentation](../../docs/sphinx_doc/source/tutorial/example_dpo.md). + +The config files are located in [`sft.yaml`](sft.yaml) and [`train_sft.yaml`](train_sft.yaml). diff --git a/examples/sft_mot/sft.yaml b/examples/sft_mot/sft.yaml new file mode 100644 index 0000000000..90ed374f6a --- /dev/null +++ b/examples/sft_mot/sft.yaml @@ -0,0 +1,34 @@ +mode: train +project: "Trinity-RFT-example" +name: "sft_mot" +checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +algorithm: + algorithm_type: sft +model: + model_path: /PATH/TO/MODEL/ + max_prompt_tokens: 512 + max_response_tokens: 10240 +cluster: + node_num: 1 + gpu_per_node: 8 +buffer: + total_epochs: 1 + batch_size: 32 + max_retry_times: 3 + max_retry_interval: 1 + trainer_input: + experience_buffer: + name: MoT + storage_type: file + path: open-r1/Mixture-of-Thoughts + subset_name: math + format: + prompt_type: messages + messages_key: messages +synchronizer: + sync_method: 'checkpoint' + sync_interval: 10 +trainer: + trainer_type: 'verl' + trainer_config_path: 'examples/sft_mot/train_sft.yaml' + save_interval: 10 diff --git a/examples/sft_mot/train_sft.yaml b/examples/sft_mot/train_sft.yaml new file mode 100644 index 0000000000..a33509c74c --- /dev/null +++ b/examples/sft_mot/train_sft.yaml @@ -0,0 +1,47 @@ +actor_rollout_ref: + hybrid_engine: True + model: + external_lib: null + override_config: { } + enable_gradient_checkpointing: True + use_remove_padding: True # False + actor: + strategy: fsdp # This is for backward-compatibility + ppo_micro_batch_size_per_gpu: 4 + use_dynamic_bsz: True # False + ppo_max_token_len_per_gpu: 22000 # n * ${data.max_prompt_length} + ${data.max_response_length} + grad_clip: 1.0 + ppo_epochs: 1 + shuffle: False + ulysses_sequence_parallel_size: 1 # sp size + optim: + lr: 1e-5 + lr_warmup_steps_ratio: 0. # the total steps will be injected during runtime + # min_lr_ratio: null # only useful for warmup with cosine + warmup_style: constant # select from constant/cosine + total_training_steps: -1 # must be override by program + fsdp_config: + wrap_policy: + # transformer_layer_cls_to_wrap: None + min_num_params: 0 + param_offload: False + optimizer_offload: False + fsdp_size: -1 + ref: + fsdp_config: + param_offload: False + wrap_policy: + # transformer_layer_cls_to_wrap: None + min_num_params: 0 + log_prob_micro_batch_size_per_gpu: 16 + log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz} + log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu} + ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size + +trainer: + balance_batch: True + resume_mode: auto # or auto or resume_path if + default_hdfs_dir: null + remove_previous_ckpt_in_save: False + del_local_ckpt_after_load: False + val_before_train: False diff --git a/pyproject.toml b/pyproject.toml index 53ae5e7f4e..4abb70446a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ requires-python = ">=3.10" dependencies = [ "verl==0.4.1", "ray[default]>=2.45.0", - "vllm>=0.9.1", + "vllm>=0.9.1,<=0.9.2", "tensordict==0.6.2", "wandb", "omegaconf",