modelscope · pan-x-c · Jun 20, 2025 · May 28, 2025 · May 28, 2025 · Jun 3, 2025
diff --git a/.gitignore b/.gitignore
@@ -84,6 +84,7 @@ ENV/
 logs/
 
 # data-juicer
+tmp/
 outputs/
 # agentscope
 runs/

diff --git a/README.md b/README.md
@@ -148,8 +148,11 @@ pip install -e .\[dev\]
 
 # Install flash-attn after all dependencies are installed
 # Note: flash-attn will take a long time to compile, please be patient.
-pip install flash-attn -v
-# Try the following command if you encounter errors during installation
+# for bash
+pip install -e .[flash_attn]
+# for zsh
+pip install -e .\[flash_attn\]
+# Try the following command if you encounter errors during flash-attn installation
 # pip install flash-attn -v --no-build-isolation
 ```
 
@@ -263,7 +266,7 @@ Then, for command-line users, run the RFT process with the following command:
 trinity run --config <config_path>
 ```
 
-> For example, below is the command for fine-tuning Qwen-2.5-1.5B-Instruct on GSM8k dataset using GRPO algorithm:
+> For example, below is the command for fine-tuning Qwen2.5-1.5B-Instruct on GSM8k dataset using GRPO algorithm:
 > ```shell
 > trinity run --config examples/grpo_gsm8k/gsm8k.yaml
 > ```
@@ -276,7 +279,7 @@ For more detailed examples about how to use Trinity-RFT, please refer to the fol
 + [Off-policy mode of RFT](./docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md)
 + [Asynchronous mode of RFT](./docs/sphinx_doc/source/tutorial/example_async_mode.md)
 + [Multi-turn tasks](./docs/sphinx_doc/source/tutorial/example_multi_turn.md)
-+ [Offline learning by DPO](./docs/sphinx_doc/source/tutorial/example_dpo.md)
++ [Offline learning by DPO or SFT](./docs/sphinx_doc/source/tutorial/example_dpo.md)
 + [Advanced data processing / human-in-the-loop](./docs/sphinx_doc/source/tutorial/example_data_functionalities.md)
 
 

diff --git a/docs/sphinx_doc/source/conf.py b/docs/sphinx_doc/source/conf.py
@@ -22,12 +22,13 @@
     "sphinx.ext.napoleon",
     "sphinx.ext.autosectionlabel",
     "myst_parser",
+    "sphinx.ext.mathjax",
 ]
 source_suffix = {
     ".rst": "restructuredtext",
     ".md": "markdown",
 }
-myst_enable_extensions = ["colon_fence"]
+myst_enable_extensions = ["colon_fence", "amsmath", "dollarmath"]
 
 # Prefix document path to section labels, otherwise autogenerated labels would
 # look like 'heading' rather than 'path/to/file:heading'

diff --git a/docs/sphinx_doc/source/index.rst b/docs/sphinx_doc/source/index.rst
@@ -14,16 +14,24 @@ Welcome to Trinity-RFT's documentation!
    :maxdepth: 1
    :glob:
    :hidden:
-   :caption: Tutorial
+   :caption: Examples
 
    tutorial/example_reasoning_basic.md
    tutorial/example_reasoning_advanced.md
    tutorial/example_async_mode.md
    tutorial/example_multi_turn.md
    tutorial/example_dpo.md
    tutorial/example_data_functionalities.md
-   tutorial/trinity_configs.md
+
+.. toctree::
+   :maxdepth: 2
+   :glob:
+   :hidden:
+   :caption: Guidelines
+
    tutorial/trinity_programming_guide.md
+   tutorial/trinity_configs.md
+   tutorial/example_mix_algo.md
 
 .. toctree::
    :maxdepth: 1
@@ -33,6 +41,7 @@ Welcome to Trinity-RFT's documentation!
    build_api/trinity.buffer
    build_api/trinity.explorer
    build_api/trinity.trainer
+   build_api/trinity.algorithm
    build_api/trinity.manager
    build_api/trinity.common
    build_api/trinity.utils
diff --git a/docs/sphinx_doc/source/main.md b/docs/sphinx_doc/source/main.md
@@ -84,15 +84,18 @@ e.g., utilizing NCCL (when feasible) for model weight synchronization, sequence
 
 ## Getting started
 
-
-*Note: this project is currently under active development; comments and suggestions are welcome!*
-
+```{note}
+Note: This project is currently under active development; comments and suggestions are welcome!
+```
 
 
 
 ### Step 1: preparations
 
-
+Trinity-RFT requires
+Python version >= 3.10,
+CUDA version >= 12.4,
+and at least 2 GPUs.
 
 
 Installation from source (recommended):
@@ -146,11 +149,6 @@ docker build -f scripts/docker/Dockerfile -t trinity-rft:latest .
 docker run -it --gpus all --shm-size="64g" --rm -v $PWD:/workspace -v <root_path_of_data_and_checkpoints>:/data trinity-rft:latest
 ```
 
-Trinity-RFT requires
-Python version >= 3.10,
-CUDA version >= 12.4,
-and at least 2 GPUs.
-
 
 ### Step 2: prepare dataset and model
 
@@ -243,15 +241,15 @@ trinity run --config <config_path>
 
 
 
-For example, below is the command for fine-tuning Qwen-2.5-1.5B-Instruct on GSM8k dataset using GRPO algorithm:
+For example, below is the command for fine-tuning Qwen2.5-1.5B-Instruct on GSM8k dataset using GRPO algorithm:
 
 ```shell
 trinity run --config examples/grpo_gsm8k/gsm8k.yaml
 ```
 
 
 
-More example config files can be found in `examples`.
+More example config files can be found in [`examples`](https://github.com/modelscope/Trinity-RFT/tree/main/examples/).
 
 
 
@@ -260,7 +258,7 @@ For more detailed examples about how to use Trinity-RFT, please refer to the fol
 + [Off-policy mode of RFT](tutorial/example_reasoning_advanced.md)
 + [Asynchronous mode of RFT](tutorial/example_async_mode.md)
 + [Multi-turn tasks](tutorial/example_multi_turn.md)
-+ [Offline learning by DPO](tutorial/example_dpo.md)
++ [Offline learning by DPO or SFT](tutorial/example_dpo.md)
 + [Advanced data processing / human-in-the-loop](tutorial/example_data_functionalities.md)
 
 

diff --git a/docs/sphinx_doc/source/tutorial/example_async_mode.md b/docs/sphinx_doc/source/tutorial/example_async_mode.md
@@ -1,6 +1,6 @@
 # Asynchronous RFT
 
-This example shows how to run RFT in a fully asynchronous mode with the GRPO algorithm, Qwen-2.5-1.5B-Instruct model and GSM8K dataset.
+This example shows how to run RFT in a fully asynchronous mode with the GRPO algorithm, Qwen2.5-1.5B-Instruct model and GSM8K dataset.
 
 Trinity-RFT supports an asynchronous mode by running the trainer and explorer in separate processes.
-Original file line number
+Diff line change
@@ Expand Up / @@ -84,6 +84,7 @@ ENV/ @@
     logs/
     # data-juicer
+    tmp/
     outputs/
     # agentscope
     runs/
@@ Expand Down @@