modelscope · pan-x-c · Apr 28, 2025 · Apr 22, 2025 · Apr 22, 2025 · Apr 25, 2025
diff --git a/docs/sphinx_doc/source/tutorial/trinity_configs.md b/docs/sphinx_doc/source/tutorial/trinity_configs.md
@@ -3,6 +3,18 @@
 The following is the main config file for Trinity-RFT. Take `countdown.yaml` as an example.
 
 
+## Monitor
+
+```yaml
+monitor:
+  project: "Trinity-RFT-countdown"
+  name: "qwen2.5-1.5B-countdown"
+```
+
+- `monitor.project`: The project name. It must be set manually.
+- `monitor.name`: The name of the experiment. It must be set manually.
+
+
 ## Monitor
 
 ```yaml

diff --git a/examples/dpo_humanlike/dpo.yaml b/examples/dpo_humanlike/dpo.yaml
@@ -22,7 +22,6 @@ buffer:
   train_dataset:
     name: dpo_buffer
     storage_type: file
-    algorithm_type: dpo
     path: '/PATH/TO/DATASET/'
     kwargs:
       prompt_type: plaintext # plaintext/messages

diff --git a/examples/grpo_alfworld/alfworld.yaml b/examples/grpo_alfworld/alfworld.yaml
@@ -21,7 +21,6 @@ buffer:
   train_dataset:
     name: alfworld_buffer
     storage_type: queue
-    algorithm_type: ppo
     path: 'sqlite:///alfworld.db'
 explorer:
   engine_type: vllm_async

diff --git a/examples/grpo_gsm8k/gsm8k.yaml b/examples/grpo_gsm8k/gsm8k.yaml
@@ -34,12 +34,10 @@ buffer:
   train_dataset:
     name: gsm8k_buffer
     storage_type: queue
-    algorithm_type: ppo
     path: 'sqlite:////gsm8k.db'
   # sft_warmup_dataset: # Uncomment these to enable sft warmup
   #   name: warmup_data
   #   storage_type: file
-  #   algorithm_type: sft
   #   path: '/PATH/TO/WARMUP_DATA/'
   #   kwargs:
   #     prompt_type: plaintext

diff --git a/examples/grpo_webshop/webshop.yaml b/examples/grpo_webshop/webshop.yaml
@@ -21,7 +21,6 @@ buffer:
   train_dataset:
     name: webshop_buffer
     storage_type: queue
-    algorithm_type: ppo
     path: 'sqlite:///webshop.db'
 explorer:
   engine_type: vllm_async

diff --git a/examples/opmd_gsm8k/opmd_gsm8k.yaml b/examples/opmd_gsm8k/opmd_gsm8k.yaml
@@ -20,7 +20,6 @@ buffer:
   train_dataset:
     name: gsm8k_buffer
     storage_type: queue
-    algorithm_type: opmd
     path: 'sqlite:///gsm8k_opmd.db'
 explorer:
   engine_type: vllm_async

diff --git a/examples/ppo_countdown/countdown.yaml b/examples/ppo_countdown/countdown.yaml
@@ -23,7 +23,6 @@ buffer:
   train_dataset:
     name: countdown_buffer
     storage_type: queue
-    algorithm_type: ppo
     path: 'sqlite:////countdown.db'
 explorer:
   engine_type: vllm_async

diff --git a/pyproject.toml b/pyproject.toml
@@ -32,6 +32,7 @@ dependencies = [
     "math_verify",
     "ninja",
     "fire",
+    "streamlit",
     "flask",
     "requests",
     "tensorboard",

diff --git a/trinity/common/config.py b/trinity/common/config.py
@@ -142,7 +142,7 @@ class ExplorerConfig:
     # For async engine (vllm_async), it can be larger than `engine_num`, e.g. 16 * `engine_num`
     runner_num: int = 1
 
-    # repeat each task for `repeat_times` times (for GPRO-like algrorithms)
+    # repeat each task for `repeat_times` times (for GPRO-like algorithms)
     repeat_times: int = 1
 
     # for rollout tokneize
@@ -265,11 +265,9 @@ def _check_buffer(self) -> None:
         else:
             if self.buffer.train_dataset is None:
                 raise ValueError("buffer.train_dataset is required when mode is not 'both'")
-            if self.buffer.train_dataset.algorithm_type != self.trainer.algorithm_type:
-                raise ValueError(
-                    f"buffer.train_dataset.algorithm_type ({self.buffer.train_dataset.algorithm_type}) "
-                    f"is not consistent with trainer.algorithm_type ({self.trainer.algorithm_type})"
-                )
+            self.buffer.train_dataset.algorithm_type = self.trainer.algorithm_type
+        if self.buffer.sft_warmup_dataset is not None:
+            self.buffer.sft_warmup_dataset.algorithm_type = AlgorithmType.SFT
         self.buffer.read_batch_size = self.data.batch_size * self.explorer.repeat_times
 
     def check_and_update(self) -> None: