From 7a1f4c94004e8bc9a2f749d12d925265aa5bf56e Mon Sep 17 00:00:00 2001 From: hiyuchang Date: Thu, 24 Jul 2025 15:47:03 +0800 Subject: [PATCH 1/5] add env intro for alfworld --- .../source/tutorial/example_multi_turn.md | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/docs/sphinx_doc/source/tutorial/example_multi_turn.md b/docs/sphinx_doc/source/tutorial/example_multi_turn.md index 7169731b9c..10dd871acf 100644 --- a/docs/sphinx_doc/source/tutorial/example_multi_turn.md +++ b/docs/sphinx_doc/source/tutorial/example_multi_turn.md @@ -14,7 +14,36 @@ To run the ALFworld and WebShop env, you need to setup the corresponding environ - ALFworld is a text-based interactive environment that simulates household scenarios. Agents need to understand natural language instructions and complete various domestic tasks like finding objects, moving items, and operating devices in a virtual home environment. - WebShop is a simulated online shopping environment where AI agents learn to shop based on user requirements. The platform allows agents to browse products, compare options, and make purchase decisions, mimicking real-world e-commerce interactions. -You may refer to their original environment to complete the setup. +
+Guidelines for preparing ALFWorld environment + +1. Pip install: `pip install alfworld[full]` + +2. Export the path: `export ALFWORLD_DATA=/path/to/alfworld/data` + +3. Download the environment: `alfworld-download` + +Now you can find the environment in `$ALFWORLD_DATA` and continue with the following steps. +
+ +
+Guidelines for preparing WebShop environment + +1. Install Python 3.8.13 + +2. Install Java + +3. Download the source code: `git clone https://github.com/princeton-nlp/webshop.git webshop` + +4. Create a virtual environment: `conda create -n webshop python=3.8.13` and `conda activate webshop` + +5. Install requirements into the `webshop` virtual environment via the `setup.sh` script: `./setup.sh [-d small|all]` + +Now you can continue with the following steps. +
+ + +You may refer to their original environment for more details. - For ALFWorld, refer to the [ALFWorld](https://github.com/alfworld/alfworld) repository. - For WebShop, refer to the [WebShop](https://github.com/princeton-nlp/WebShop) repository. From 40a9cf3e7acf62f793593bf26a03cfb9d5d5e9f0 Mon Sep 17 00:00:00 2001 From: hiyuchang Date: Thu, 24 Jul 2025 18:18:43 +0800 Subject: [PATCH 2/5] change theme --- docs/sphinx_doc/source/api_reference.rst | 59 +++++++++++++++++++ docs/sphinx_doc/source/conf.py | 2 +- docs/sphinx_doc/source/index.rst | 13 ++-- docs/sphinx_doc/source/main.md | 28 ++++----- .../tutorial/example_data_functionalities.md | 2 +- 5 files changed, 78 insertions(+), 26 deletions(-) create mode 100644 docs/sphinx_doc/source/api_reference.rst diff --git a/docs/sphinx_doc/source/api_reference.rst b/docs/sphinx_doc/source/api_reference.rst new file mode 100644 index 0000000000..cf88126f05 --- /dev/null +++ b/docs/sphinx_doc/source/api_reference.rst @@ -0,0 +1,59 @@ +.. _api-reference: + +API Reference +============= + +This page shows the most useful APIs of Trinity-RFT. + +.. contents:: + :local: + :depth: 2 + +trinity.buffer +-------------- +.. automodule:: trinity.buffer + :members: + :undoc-members: + :show-inheritance: + +trinity.explorer +---------------- +.. automodule:: trinity.explorer + :members: + :undoc-members: + :show-inheritance: + +trinity.trainer +--------------- +.. automodule:: trinity.trainer + :members: + :undoc-members: + :show-inheritance: + +trinity.algorithm +----------------- +.. automodule:: trinity.algorithm + :members: + :undoc-members: + :show-inheritance: + +trinity.manager +--------------- +.. automodule:: trinity.manager + :members: + :undoc-members: + :show-inheritance: + +trinity.common +-------------- +.. automodule:: trinity.common + :members: + :undoc-members: + :show-inheritance: + +trinity.utils +------------- +.. automodule:: trinity.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/sphinx_doc/source/conf.py b/docs/sphinx_doc/source/conf.py index ffaabf72c9..bde1be3681 100644 --- a/docs/sphinx_doc/source/conf.py +++ b/docs/sphinx_doc/source/conf.py @@ -49,7 +49,7 @@ # -- Options for HTML output ------------------------------------------------- -html_theme = "sphinx_rtd_theme" +html_theme = "sphinxawesome_theme" html_theme_options = { "navigation_depth": 3, diff --git a/docs/sphinx_doc/source/index.rst b/docs/sphinx_doc/source/index.rst index fc085215b0..062e9e9e7f 100644 --- a/docs/sphinx_doc/source/index.rst +++ b/docs/sphinx_doc/source/index.rst @@ -35,19 +35,14 @@ Welcome to Trinity-RFT's documentation! .. toctree:: :maxdepth: 2 + :hidden: :caption: FAQ tutorial/faq.md .. toctree:: - :maxdepth: 1 - :glob: + :maxdepth: 2 + :hidden: :caption: API Reference - build_api/trinity.buffer - build_api/trinity.explorer - build_api/trinity.trainer - build_api/trinity.algorithm - build_api/trinity.manager - build_api/trinity.common - build_api/trinity.utils + api_reference diff --git a/docs/sphinx_doc/source/main.md b/docs/sphinx_doc/source/main.md index bdbf75eae8..2f30f89960 100644 --- a/docs/sphinx_doc/source/main.md +++ b/docs/sphinx_doc/source/main.md @@ -3,8 +3,6 @@ -# Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models - ## 🚀 News @@ -90,12 +88,12 @@ It is designed to support diverse application scenarios and serve as a unified p * **Adaptation to New Scenarios:** - Implement agent-environment interaction logic in a single `Workflow` or `MultiTurnWorkflow` class. ([Example](./docs/sphinx_doc/source/tutorial/example_multi_turn.md)) + Implement agent-environment interaction logic in a single `Workflow` or `MultiTurnWorkflow` class. ([Example](/tutorial/example_multi_turn.md)) * **RL Algorithm Development:** - Develop custom RL algorithms (loss design, sampling, data processing) in compact, plug-and-play classes. ([Example](./docs/sphinx_doc/source/tutorial/example_mix_algo.md)) + Develop custom RL algorithms (loss design, sampling, data processing) in compact, plug-and-play classes. ([Example](/tutorial/example_mix_algo.md)) * **Low-Code Usage:** @@ -301,39 +299,39 @@ For studio users, click "Run" in the web interface. Tutorials for running different RFT modes: -+ [Quick example: GRPO on GSM8k](./docs/sphinx_doc/source/tutorial/example_reasoning_basic.md) -+ [Off-policy RFT](./docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md) -+ [Fully asynchronous RFT](./docs/sphinx_doc/source/tutorial/example_async_mode.md) -+ [Offline learning by DPO or SFT](./docs/sphinx_doc/source/tutorial/example_dpo.md) ++ [Quick example: GRPO on GSM8k](/tutorial/example_reasoning_basic.md) ++ [Off-policy RFT](/tutorial/example_reasoning_advanced.md) ++ [Fully asynchronous RFT](/tutorial/example_async_mode.md) ++ [Offline learning by DPO or SFT](/tutorial/example_dpo.md) Tutorials for adapting Trinity-RFT to a new multi-turn agentic scenario: -+ [Multi-turn tasks](./docs/sphinx_doc/source/tutorial/example_multi_turn.md) ++ [Multi-turn tasks](/tutorial/example_multi_turn.md) Tutorials for data-related functionalities: -+ [Advanced data processing & human-in-the-loop](./docs/sphinx_doc/source/tutorial/example_data_functionalities.md) ++ [Advanced data processing & human-in-the-loop](/tutorial/example_data_functionalities.md) Tutorials for RL algorithm development/research with Trinity-RFT: -+ [RL algorithm development with Trinity-RFT](./docs/sphinx_doc/source/tutorial/example_mix_algo.md) ++ [RL algorithm development with Trinity-RFT](/tutorial/example_mix_algo.md) -Guidelines for full configurations: see [this document](./docs/sphinx_doc/source/tutorial/trinity_configs.md) +Guidelines for full configurations: see [this document](/tutorial/trinity_configs.md) Guidelines for developers and researchers: -+ [Build new RL scenarios](./docs/sphinx_doc/source/tutorial/trinity_programming_guide.md#workflows-for-rl-environment-developers) -+ [Implement new RL algorithms](./docs/sphinx_doc/source/tutorial/trinity_programming_guide.md#algorithms-for-rl-algorithm-developers) ++ [Build new RL scenarios](/tutorial/trinity_programming_guide.md#workflows-for-rl-environment-developers) ++ [Implement new RL algorithms](/tutorial/trinity_programming_guide.md#algorithms-for-rl-algorithm-developers) -For some frequently asked questions, see [FAQ](./docs/sphinx_doc/source/tutorial/faq.md). +For some frequently asked questions, see [FAQ](/tutorial/faq.md). diff --git a/docs/sphinx_doc/source/tutorial/example_data_functionalities.md b/docs/sphinx_doc/source/tutorial/example_data_functionalities.md index bb73298ae3..2ddd135694 100644 --- a/docs/sphinx_doc/source/tutorial/example_data_functionalities.md +++ b/docs/sphinx_doc/source/tutorial/example_data_functionalities.md @@ -8,7 +8,7 @@ In this example, you will learn how to apply the data processor of Trinity-RFT t 2. how to configure the data processor 3. what the data processor can do -Before getting started, you need to prepare the main environment of Trinity-RFT according to the [installation section of the README file](../main.md), +Before getting started, you need to prepare the main environment of Trinity-RFT according to the [installation section of Quickstart](example_reasoning_basic.md), and store the base url and api key in the environment variables `OPENAI_BASE_URL` and `OPENAI_API_KEY` for some agentic or API-model usages if necessary. ### Data Preparation From 06d0bc02d01f7111c481144ce124c4d88fea661d Mon Sep 17 00:00:00 2001 From: hiyuchang Date: Thu, 24 Jul 2025 19:06:56 +0800 Subject: [PATCH 3/5] fix pytoml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4abb70446a..07de52ca84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ dev = [ doc = [ "sphinx", "sphinx-autobuild", - "sphinx_rtd_theme", + "sphinxawesome-theme", "myst-parser", ] From 1c8c7fd3590b824298e206ca8c0cf4183bf24a40 Mon Sep 17 00:00:00 2001 From: hiyuchang Date: Fri, 25 Jul 2025 15:50:09 +0800 Subject: [PATCH 4/5] change theme back --- docs/sphinx_doc/source/api_reference.rst | 67 ++++--------------- docs/sphinx_doc/source/conf.py | 2 +- .../source/tutorial/example_multi_turn.md | 3 +- 3 files changed, 16 insertions(+), 56 deletions(-) diff --git a/docs/sphinx_doc/source/api_reference.rst b/docs/sphinx_doc/source/api_reference.rst index cf88126f05..2fa2c8f692 100644 --- a/docs/sphinx_doc/source/api_reference.rst +++ b/docs/sphinx_doc/source/api_reference.rst @@ -3,57 +3,16 @@ API Reference ============= -This page shows the most useful APIs of Trinity-RFT. - -.. contents:: - :local: - :depth: 2 - -trinity.buffer --------------- -.. automodule:: trinity.buffer - :members: - :undoc-members: - :show-inheritance: - -trinity.explorer ----------------- -.. automodule:: trinity.explorer - :members: - :undoc-members: - :show-inheritance: - -trinity.trainer ---------------- -.. automodule:: trinity.trainer - :members: - :undoc-members: - :show-inheritance: - -trinity.algorithm ------------------ -.. automodule:: trinity.algorithm - :members: - :undoc-members: - :show-inheritance: - -trinity.manager ---------------- -.. automodule:: trinity.manager - :members: - :undoc-members: - :show-inheritance: - -trinity.common --------------- -.. automodule:: trinity.common - :members: - :undoc-members: - :show-inheritance: - -trinity.utils -------------- -.. automodule:: trinity.utils - :members: - :undoc-members: - :show-inheritance: +This page shows some useful APIs of Trinity-RFT. Click the API name to see the detailed documentation. + +.. toctree:: + :maxdepth: 1 + :glob: + + build_api/trinity.buffer + build_api/trinity.explorer + build_api/trinity.trainer + build_api/trinity.algorithm + build_api/trinity.manager + build_api/trinity.common + build_api/trinity.utils diff --git a/docs/sphinx_doc/source/conf.py b/docs/sphinx_doc/source/conf.py index bde1be3681..ffaabf72c9 100644 --- a/docs/sphinx_doc/source/conf.py +++ b/docs/sphinx_doc/source/conf.py @@ -49,7 +49,7 @@ # -- Options for HTML output ------------------------------------------------- -html_theme = "sphinxawesome_theme" +html_theme = "sphinx_rtd_theme" html_theme_options = { "navigation_depth": 3, diff --git a/docs/sphinx_doc/source/tutorial/example_multi_turn.md b/docs/sphinx_doc/source/tutorial/example_multi_turn.md index 10dd871acf..1212b9dcf4 100644 --- a/docs/sphinx_doc/source/tutorial/example_multi_turn.md +++ b/docs/sphinx_doc/source/tutorial/example_multi_turn.md @@ -14,6 +14,7 @@ To run the ALFworld and WebShop env, you need to setup the corresponding environ - ALFworld is a text-based interactive environment that simulates household scenarios. Agents need to understand natural language instructions and complete various domestic tasks like finding objects, moving items, and operating devices in a virtual home environment. - WebShop is a simulated online shopping environment where AI agents learn to shop based on user requirements. The platform allows agents to browse products, compare options, and make purchase decisions, mimicking real-world e-commerce interactions. +
Guidelines for preparing ALFWorld environment @@ -41,7 +42,7 @@ Now you can find the environment in `$ALFWORLD_DATA` and continue with the follo Now you can continue with the following steps.
- +
You may refer to their original environment for more details. - For ALFWorld, refer to the [ALFWorld](https://github.com/alfworld/alfworld) repository. From 5ff76e4b7adc63d70b288a1d36b64593110dd298 Mon Sep 17 00:00:00 2001 From: hiyuchang Date: Fri, 25 Jul 2025 15:55:29 +0800 Subject: [PATCH 5/5] minor revision --- docs/sphinx_doc/source/main.md | 2 ++ pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/sphinx_doc/source/main.md b/docs/sphinx_doc/source/main.md index 2f30f89960..665c125b4f 100644 --- a/docs/sphinx_doc/source/main.md +++ b/docs/sphinx_doc/source/main.md @@ -3,6 +3,7 @@ +# Trinity-RFT: A General-Purpose and Unified Framework for Reinforcement Fine-Tuning of Large Language Models ## 🚀 News @@ -80,6 +81,7 @@ It is designed to support diverse application scenarios and serve as a unified p ![Trinity-RFT-data-pipelines](../assets/trinity-data-pipelines.png) +
diff --git a/pyproject.toml b/pyproject.toml index 07de52ca84..4abb70446a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ dev = [ doc = [ "sphinx", "sphinx-autobuild", - "sphinxawesome-theme", + "sphinx_rtd_theme", "myst-parser", ]