agentscope-ai · pan-x-c · Jul 23, 2025 · Jul 23, 2025 · Jul 23, 2025 · Jul 23, 2025
diff --git a/.github/workflows/unittest.yaml b/.github/workflows/unittest.yaml
@@ -95,6 +95,7 @@ jobs:
       with:
         name: pytest-results
         path: trinity-${{ github.run_id }}/report.json
+      continue-on-error: true
 
     - name: Publish Test Report
       if: env.tests_run == 'true'
@@ -106,12 +107,14 @@ jobs:
         issue: ${{ github.event.issue.number }}
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      continue-on-error: true
 
     - name: Remove docker compose
       working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
       if: always()
       run: |
         docker compose down --remove-orphans
+      continue-on-error: true
 
     - name: Cleanup workspace
       if: always()

diff --git a/docs/sphinx_doc/source/tutorial/example_dpo.md b/docs/sphinx_doc/source/tutorial/example_dpo.md
@@ -79,7 +79,7 @@ trainer:
 
 ### Configuration for SFT
 
-We set the `algorithm_type` as `sft` to run SFT process. Then we modify the config file `sft.yaml` with the following changes:
+We set the `algorithm_type` as `sft` to run SFT process. Then we modify the config file [`examples/sft_mot/sft.yaml`](https://github.com/modelscope/Trinity-RFT/tree/main/examples/sft_mot/sft.yaml) with the following changes:
 
 ```yaml
 project: <project_name>
@@ -120,5 +120,5 @@ trinity run --config examples/dpo_humanlike/dpo.yaml
 or, for SFT:
 
 ```shell
-trinity run --config /PATH/TO/sft.yaml
+trinity run --config examples/sft_mot/sft.yaml
 ```
diff --git a/examples/sft_mot/README.md b/examples/sft_mot/README.md
@@ -0,0 +1,7 @@
+# SFT on Mixture-of-Thoughts Dataset
+
+This example shows the usage of SFT on the [Mixture-of-Thoughts](https://huggingface.co/datasets/open-r1/Mixture-of-Thoughts) dataset.
+
+For more detailed information, please refer to the [documentation](../../docs/sphinx_doc/source/tutorial/example_dpo.md).
+
+The config files are located in [`sft.yaml`](sft.yaml) and [`train_sft.yaml`](train_sft.yaml).
diff --git a/examples/sft_mot/sft.yaml b/examples/sft_mot/sft.yaml
@@ -0,0 +1,34 @@
+mode: train
+project: "Trinity-RFT-example"
+name: "sft_mot"
+checkpoint_root_dir: /PATH/TO/CHECKPOINT/
+algorithm:
+  algorithm_type: sft
+model:
+  model_path: /PATH/TO/MODEL/
+  max_prompt_tokens: 512
+  max_response_tokens: 10240
+cluster:
+  node_num: 1
+  gpu_per_node: 8
+buffer:
+  total_epochs: 1
+  batch_size: 32
+  max_retry_times: 3
+  max_retry_interval: 1
+  trainer_input:
+    experience_buffer:
+      name: MoT
+      storage_type: file
+      path: open-r1/Mixture-of-Thoughts
+      subset_name: math
+      format:
+        prompt_type: messages
+        messages_key: messages
+synchronizer:
+  sync_method: 'checkpoint'
+  sync_interval: 10
+trainer:
+  trainer_type: 'verl'
+  trainer_config_path: 'examples/sft_mot/train_sft.yaml'
+  save_interval: 10
diff --git a/examples/sft_mot/train_sft.yaml b/examples/sft_mot/train_sft.yaml
@@ -0,0 +1,47 @@
+actor_rollout_ref:
+  hybrid_engine: True
+  model:
+    external_lib: null
+    override_config: { }
+    enable_gradient_checkpointing: True
+    use_remove_padding: True  # False
+  actor:
+    strategy: fsdp  # This is for backward-compatibility
+    ppo_micro_batch_size_per_gpu: 4
+    use_dynamic_bsz: True # False
+    ppo_max_token_len_per_gpu: 22000 # n * ${data.max_prompt_length} + ${data.max_response_length}
+    grad_clip: 1.0
+    ppo_epochs: 1
+    shuffle: False
+    ulysses_sequence_parallel_size: 1 # sp size
+    optim:
+      lr: 1e-5
+      lr_warmup_steps_ratio: 0.  # the total steps will be injected during runtime
+      # min_lr_ratio: null   # only useful for warmup with cosine
+      warmup_style: constant  # select from constant/cosine
+      total_training_steps: -1  # must be override by program
+    fsdp_config:
+      wrap_policy:
+        # transformer_layer_cls_to_wrap: None
+        min_num_params: 0
+      param_offload: False
+      optimizer_offload: False
+      fsdp_size: -1
+  ref:
+    fsdp_config:
+      param_offload: False
+      wrap_policy:
+        # transformer_layer_cls_to_wrap: None
+        min_num_params: 0
+    log_prob_micro_batch_size_per_gpu: 16
+    log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
+    log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
+    ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
+
+trainer:
+  balance_batch: True
+  resume_mode: auto # or auto or resume_path if
+  default_hdfs_dir: null
+  remove_previous_ckpt_in_save: False
+  del_local_ckpt_after_load: False
+  val_before_train: False
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,7 +23,7 @@ requires-python = ">=3.10"
 dependencies = [
     "verl==0.4.1",
     "ray[default]>=2.45.0",
-    "vllm>=0.9.1",
+    "vllm>=0.9.1,<=0.9.2",
     "tensordict==0.6.2",
     "wandb",
     "omegaconf",