agentscope-ai
diff --git a/‎.github/workflows/unittest.yaml‎
Lines changed: 55 additions & 3 deletions b/‎.github/workflows/unittest.yaml‎
Lines changed: 55 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 9 additions & 8 deletions b/‎README.md‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎README_zh.md‎
Lines changed: 8 additions & 7 deletions b/‎README_zh.md‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎docs/sphinx_doc/source/tutorial/example_mix_algo.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/sphinx_doc/source/tutorial/example_mix_algo.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/sphinx_doc/source/tutorial/faq.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/sphinx_doc/source/tutorial/faq.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/sphinx_doc/source/tutorial/trinity_configs.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/sphinx_doc/source/tutorial/trinity_configs.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/buffer/queue_test.py‎
Lines changed: 1 addition & 2 deletions b/‎tests/buffer/queue_test.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎tests/buffer/sql_test.py‎
Lines changed: 0 additions & 2 deletions b/‎tests/buffer/sql_test.py‎
Lines changed: 0 additions & 2 deletions
@@ -12,12 +12,13 @@ permissions:
 jobs:
   unittest:
     # only run on pull request
-    if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/run-unittest') && github.event.comment.author_association == 'COLLABORATOR' }}
+    if: ${{ github.event.issue.pull_request && (startsWith(github.event.comment.body, '/unittest')) && github.event.comment.author_association == 'COLLABORATOR' }}
     runs-on: self-hosted
 
     steps:
     - uses: actions/checkout@v4
       with:
+        fetch-depth: 0
         path: trinity-${{ github.run_id }}
         ref: refs/pull/${{ github.event.issue.number }}/head
 
@@ -33,18 +34,70 @@ jobs:
         docker compose exec trinity-node-1 ray status
         docker compose exec trinity-node-2 ray status
 
+    - name: Decide test type
+      id: test_type
+      working-directory: trinity-${{ github.run_id }}
+      run: |
+        COMMENT="${{ github.event.comment.body }}"
+        if [[ "$COMMENT" == "/unittest-all"* ]]; then
+          echo "type=all" >> $GITHUB_OUTPUT
+        elif [[ "$COMMENT" == "/unittest-diff"* ]]; then
+          echo "type=diff" >> $GITHUB_OUTPUT
+        elif [[ "$COMMENT" =~ ^/unittest-module-(.+)$ ]]; then
+          MODULE=$(echo "$COMMENT" | sed -n 's/\/unittest-module-\(.*\)/\1/p')
+          echo "type=module" >> $GITHUB_OUTPUT
+          echo "module=$MODULE" >> $GITHUB_OUTPUT
+        else
+          echo "type=all" >> $GITHUB_OUTPUT
+        fi
+
+    - name: Get changed modules (for diff)
+      if: steps.test_type.outputs.type == 'diff'
+      id: diff
+      working-directory: trinity-${{ github.run_id }}
+      run: |
+        git fetch origin main
+        git diff --name-only origin/main...HEAD > changed_files.txt
+        awk -F/ '/^(trinity)\// {print $2}' changed_files.txt | sort | uniq > changed_modules.txt
+        awk '{print "tests/"$1}' changed_modules.txt > test_dirs.txt
+
     - name: Run unittest
       working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
       run: |
-        docker compose exec trinity-node-1 pytest tests -v -s --ignore=tests/data --ctrf report.json
+        TYPE="${{ steps.test_type.outputs.type }}"
+        if [ "$TYPE" = "all" ]; then
+          docker compose exec trinity-node-1 pytest tests -v -s --ignore=tests/data --ctrf report.json
+          echo "tests_run=true" >> $GITHUB_ENV
+        elif [ "$TYPE" = "diff" ]; then
+          ROOT_DIR=trinity-${{ github.run_id }}
+          if [ -s "$ROOT_DIR/test_dirs.txt" ]; then
+            TEST_DIRS=$(cat "$ROOT_DIR/test_dirs.txt" | xargs)
+            docker compose exec trinity-node-1 pytest $TEST_DIRS -v -s --ignore=tests/data --ctrf report.json
+            echo "tests_run=true" >> $GITHUB_ENV
+          else
+            echo "No changed modules detected, skipping tests."
+            echo "tests_run=false" >> $GITHUB_ENV
+          fi
+        elif [ "$TYPE" = "module" ]; then
+          MODULE="${{ steps.test_type.outputs.module }}"
+          if [ -n "$MODULE" ]; then
+            docker compose exec trinity-node-1 pytest tests/$MODULE -v -s --ignore=tests/data --ctrf report.json
+            echo "tests_run=true" >> $GITHUB_ENV
+          else
+            echo "No module specified, skipping tests."
+            echo "tests_run=false" >> $GITHUB_ENV
+          fi
+        fi
 
     - name: Upload test results
+      if: env.tests_run == 'true'
       uses: actions/upload-artifact@v4
       with:
         name: pytest-results
         path: trinity-${{ github.run_id }}/report.json
 
     - name: Publish Test Report
+      if: env.tests_run == 'true'
       uses: ctrf-io/github-test-reporter@v1
       with:
         report-path: trinity-${{ github.run_id }}/report.json
@@ -53,7 +106,6 @@ jobs:
         issue: ${{ github.event.issue.number }}
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      if: always()
 
     - name: Remove docker compose
       working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
 
@@ -152,6 +152,12 @@ It is designed to support diverse application scenarios and serve as a unified p
 ### Step 1: installation
 
 
+Requirements:
+- Python version >= 3.10, <= 3.12
+- CUDA version >= 12.4, <= 12.8
+- At least 2 GPUs
+
+
 Installation from source **(recommended)**:
 
 ```shell
@@ -181,13 +187,15 @@ pip install -e .[flash_attn]
 # for zsh
 pip install -e .\[flash_attn\]
 # Try the following command if you encounter errors during flash-attn installation
-# pip install flash-attn -v --no-build-isolation
+# pip install flash-attn==2.8.0.post2 -v --no-build-isolation
 ```
 
 Installation using pip:
 
 ```shell
 pip install trinity-rft==0.2.0
+# install flash-attn separately
+pip install flash-attn==2.8.0.post2
 ```
 
 Installation from docker:
@@ -206,13 +214,6 @@ docker build -f scripts/docker/Dockerfile -t trinity-rft:latest .
 docker run -it --gpus all --shm-size="64g" --rm -v $PWD:/workspace -v <root_path_of_data_and_checkpoints>:/data trinity-rft:latest
 ```
 
-
-**Requirements:**
-Python version >= 3.10,
-CUDA version >= 12.4,
-and at least 2 GPUs.
-
-
 ### Step 2: prepare dataset and model
 
 
 
@@ -151,6 +151,11 @@ Trinity-RFT是一个通用、灵活且易于使用的大语言模型强化微调
 
 ### 第一步：安装
 
+环境要求:
+- Python >= 3.10, <= 3.12
+- CUDA >= 12.4, <= 12.8
+- 至少 2 块 GPU
+
 
 源码安装 **（推荐）**：
 
@@ -181,13 +186,15 @@ pip install -e .[flash_attn]
 # 适用于 zsh
 pip install -e .\[flash_attn\]
 # 如果安装 flash-attn 时遇到错误，可以尝试以下命令
-# pip install flash-attn -v --no-build-isolation
+# pip install flash-attn==2.8.0.post2 -v --no-build-isolation
 ```
 
 使用 pip 安装：
 
 ```shell
 pip install trinity-rft==0.2.0
+# flash-attn 需要单独安装
+pip install flash-attn==2.8.0.post2
 ```
 
 使用 Docker 安装：
@@ -207,12 +214,6 @@ docker run -it --gpus all --shm-size="64g" --rm -v $PWD:/workspace -v <root_path
 ```
 
 
-**环境要求：**
-Python 版本 >= 3.10，
-CUDA 版本 >= 12.4，
-以及至少 2 块 GPU。
-
-
 ### 第二步：准备数据集和模型
 
 
 
@@ -142,7 +142,8 @@ We also need to add an `is_expert_mask` field when transforming to DataProto to
     cumsum = torch.cumsum(attention_mask, dim=-1)
     position_ids = torch.clip(cumsum - 1, 0, None).long()
     batch_dict = {
-        "uid": np.array(experiences.group_ids),
+        "uid": np.array([eid.tid for eid in experiences.eids]),
+        "unique_ids": np.array([eid.uid for eid in experiences.eids]),
         "position_ids": position_ids,
         "input_ids": experiences.tokens.long(),
         "responses": experiences.tokens[:, experiences.prompt_length :].long(),
 
@@ -65,7 +65,7 @@ File ".../flash_attn/flash_attn_interface.py", line 15, in ‹module>
 ImportError: ...
 ```
 
-**A:** The `flash-attn` module is not properly installed. Try to fix it by running `pip install flash-attn` or `pip install flash-attn -v --no-build-isolation`.
+**A:** The `flash-attn` module is not properly installed. Try to fix it by running `pip install flash-attn==2.8.0.post2` or `pip install flash-attn==2.8.0.post2 -v --no-build-isolation`.
 
 ---
 
 
@@ -90,6 +90,7 @@ algorithm:
   kl_penalty_fn: "none"
   kl_loss_fn: "k2"
   entropy_loss_fn: "default"
+  add_strategy: null
 ```
 
 - `algorithm_type`: Type of reinforcement learning algorithm. Supported types: `ppo`, `grpo`, `opmd`, `dpo`, `sft`, `mix`.
@@ -99,7 +100,7 @@ algorithm:
 - `kl_penalty_fn`: The KL penalty function used for computing KL penalty applied in reward.
 - `kl_loss_fn`: The KL loss function used for computing KL loss.
 - `entropy_loss_fn`: The entropy loss function used for computing entropy loss.
-
+- `add_strategy`: Strategy for adding new experiences to the experience buffer. If set, explorer will collect experiences from workflow runners and pre-process them before adding to the buffer.
 
 ---
 
 
@@ -30,7 +30,7 @@ class TestQueueBuffer(RayUnittestBaseAysnc):
     )
     async def test_queue_buffer(self, name, use_priority_queue):
         meta = StorageConfig(
-            name="test_buffer",
+            name=name,
             algorithm_type="ppo",
             storage_type=StorageType.QUEUE,
             max_read_timeout=3,
@@ -60,7 +60,6 @@ async def test_queue_buffer(self, name, use_priority_queue):
         exps = [
             Experience(
                 tokens=torch.tensor([float(j) for j in range(i + 1)]),
-                prompt_length=i,
                 reward=float(i),
                 logprobs=torch.tensor([0.1]),
                 action_mask=torch.tensor([j % 2 for j in range(i + 1)]),
 
@@ -38,7 +38,6 @@ async def test_create_sql_buffer(self) -> None:
                 prompt_length=i,
                 reward=float(i),
                 logprobs=torch.tensor([0.1]),
-                action_mask=torch.tensor([j % 2 for j in range(i + 1)]),
             )
             for i in range(1, put_batch_size + 1)
         ]
@@ -54,7 +53,6 @@ async def test_create_sql_buffer(self) -> None:
             [
                 Experience(
                     tokens=torch.tensor([float(j) for j in range(i + 1)]),
-                    prompt_length=i,
                     reward=float(i),
                     logprobs=torch.tensor([0.1]),
                     action_mask=torch.tensor([j % 2 for j in range(i + 1)]),
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,6 @@ async def test_create_sql_buffer(self) -> None:`
`38`	`38`	`prompt_length=i,`
`39`	`39`	`reward=float(i),`
`40`	`40`	`logprobs=torch.tensor([0.1]),`
`41`		`- action_mask=torch.tensor([j % 2 for j in range(i + 1)]),`
`42`	`41`	`)`
`43`	`42`	`for i in range(1, put_batch_size + 1)`
`44`	`43`	`]`
`@@ -54,7 +53,6 @@ async def test_create_sql_buffer(self) -> None:`
`54`	`53`	`[`
`55`	`54`	`Experience(`
`56`	`55`	`tokens=torch.tensor([float(j) for j in range(i + 1)]),`
`57`		`- prompt_length=i,`
`58`	`56`	`reward=float(i),`
`59`	`57`	`logprobs=torch.tensor([0.1]),`
`60`	`58`	`action_mask=torch.tensor([j % 2 for j in range(i + 1)]),`