agentscope-ai
diff --git a/‎.github/workflows/docker/docker-compose.yaml‎
Lines changed: 59 additions & 0 deletions b/‎.github/workflows/docker/docker-compose.yaml‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎.github/workflows/unittest.yaml‎
Lines changed: 44 additions & 0 deletions b/‎.github/workflows/unittest.yaml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎docs/sphinx_doc/source/tutorial/trinity_configs.md‎
Lines changed: 0 additions & 1 deletion b/‎docs/sphinx_doc/source/tutorial/trinity_configs.md‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/dpo_humanlike/train_dpo.yaml‎
Lines changed: 0 additions & 1 deletion b/‎examples/dpo_humanlike/train_dpo.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/grpo_alfworld/train_alfworld.yaml‎
Lines changed: 0 additions & 1 deletion b/‎examples/grpo_alfworld/train_alfworld.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/grpo_gsm8k/train_gsm8k.yaml‎
Lines changed: 0 additions & 1 deletion b/‎examples/grpo_gsm8k/train_gsm8k.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/grpo_math/math.yaml‎
Lines changed: 0 additions & 1 deletion b/‎examples/grpo_math/math.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/grpo_math/train_math.yaml‎
Lines changed: 0 additions & 5 deletions b/‎examples/grpo_math/train_math.yaml‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎examples/grpo_sciworld/train_sciworld.yaml‎
Lines changed: 0 additions & 6 deletions b/‎examples/grpo_sciworld/train_sciworld.yaml‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎examples/grpo_webshop/train_webshop.yaml‎
Lines changed: 0 additions & 1 deletion b/‎examples/grpo_webshop/train_webshop.yaml‎
Lines changed: 0 additions & 1 deletion
@@ -0,0 +1,59 @@
+services:
+  trinity-node-1:
+    image: trinity-rft:latest-unittest
+    pull_policy: never
+    command: sh -c "pip install -e .[dev] && ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
+    environment:
+      - HF_ENDPOINT=https://hf-mirror.com
+      - RAY_ADDRESS=auto
+      - CHECKPOINT_ROOT_DIR=/mnt/checkpoints
+      - DATA_ROOT_DIR=/mnt/data
+      - MODEL_PATH=/mnt/checkpoints/Qwen2.5-1.5B-Instruct
+    working_dir: /workspace
+    networks:
+      - trinity-network
+    volumes:
+      - trinity-volume:/mnt
+      - ../../..:/workspace
+    shm_size: "64G"
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            device_ids: ['4', '5']
+            capabilities: [gpu]
+
+  trinity-node-2:
+    image: trinity-rft:latest-unittest
+    pull_policy: never
+    command: sh -c "pip install -e .[dev] && ray start --address=trinity-node-1:6379 --block"
+    environment:
+      - HF_ENDPOINT=https://hf-mirror.com
+      - CHECKPOINT_ROOT_DIR=/mnt/checkpoints
+      - DATA_ROOT_DIR=/mnt/data
+      - MODEL_PATH=/mnt/checkpoints/Qwen2.5-1.5B-Instruct
+    working_dir: /workspace
+    volumes:
+      - trinity-volume:/mnt
+      - ../../..:/workspace
+    depends_on:
+      - trinity-node-1
+    networks:
+      - trinity-network
+    shm_size: "64G"
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            device_ids: ['6', '7']
+            capabilities: [gpu]
+
+networks:
+  trinity-network:
+    driver: bridge
+
+volumes:
+  trinity-volume:
+    external: true
@@ -0,0 +1,44 @@
+name: unittest
+
+on:
+  issue_comment:
+    types: [created]
+
+permissions:
+  contents: read
+
+jobs:
+  unittest:
+    # only run on pull request
+    if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/run-unittest') && github.event.comment.author_association == 'COLLABORATOR' }}
+    runs-on: self-hosted
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        path: trinity-${{ github.run_id }}
+        fetch-depth: 0
+
+    - name: Setup docker compose
+      working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
+      run: |
+        docker compose up -d
+
+    - name: Run unittest
+      working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
+      run: |
+        docker compose exec trinity-node-1 pytest tests --ignore=tests/data --junitxml=pytest.xml
+      continue-on-error: true
+
+    - name: Upload test results
+      uses: actions/upload-artifact@v2
+      with:
+        name: pytest-results
+        path: trinity-${{ github.run_id }}/pytest.xml
+
+    - name: Pytest coverage comment
+      uses: MishaKav/pytest-coverage-comment@main
+      with:
+        junitxml-title: Unittest Result Summary
+        junitxml-path: trinity-${{ github.run_id }}/pytest.xml
+# TODO: run data tests after the dependency conflict is resolved
@@ -307,7 +307,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 8
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
 
@@ -118,7 +118,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 1
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
 
@@ -117,7 +117,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 1
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
 
@@ -122,7 +122,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 64
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
 
@@ -18,7 +18,6 @@ model:
   max_prompt_tokens: 1024
   max_response_tokens: 3072
   checkpoint_path: /PATH/TO/CHECKPOINT/
-  load_checkpoint: true
 cluster:
   node_num: 1
   gpu_per_node: 8
 
@@ -25,7 +25,6 @@ actor_rollout_ref:
   actor:
     strategy: fsdp  # This is for backward-compatibility
     ppo_mini_batch_size: 128
-    # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
     ppo_micro_batch_size_per_gpu: 4
     use_dynamic_bsz: True # False
     ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
@@ -62,7 +61,6 @@ actor_rollout_ref:
       wrap_policy:
         # transformer_layer_cls_to_wrap: None
         min_num_params: 0
-    # log_prob_micro_batch_size: 4 # will be deprecated, use log_prob_micro_batch_size_per_gpu
     log_prob_micro_batch_size_per_gpu: 16
     log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
     log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -86,7 +84,6 @@ actor_rollout_ref:
     max_num_batched_tokens: 8192
     max_model_len: null
     max_num_seqs: 1024
-    # log_prob_micro_batch_size: 8 # will be deprecated, use log_prob_micro_batch_size_per_gpu
     log_prob_micro_batch_size_per_gpu: 4
     log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
     log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -120,9 +117,7 @@ critic:
         min_num_params: 0
       fsdp_size: -1
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
-  # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 64
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
 
@@ -25,7 +25,6 @@ actor_rollout_ref:
   actor:
     strategy: fsdp  # This is for backward-compatibility
     ppo_mini_batch_size: 1536
-    # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
     ppo_micro_batch_size_per_gpu: 1
     use_dynamic_bsz: False
     ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
@@ -57,7 +56,6 @@ actor_rollout_ref:
       wrap_policy:
         # transformer_layer_cls_to_wrap: None
         min_num_params: 0
-    # log_prob_micro_batch_size: 4 # will be deprecated, use log_prob_micro_batch_size_per_gpu
     log_prob_micro_batch_size_per_gpu: 1
     log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
     log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -81,7 +79,6 @@ actor_rollout_ref:
     max_num_batched_tokens: 8192
     max_model_len: null
     max_num_seqs: 1024
-    # log_prob_micro_batch_size: 8 # will be deprecated, use log_prob_micro_batch_size_per_gpu
     log_prob_micro_batch_size_per_gpu: 1
     log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
     log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -115,9 +112,7 @@ critic:
         min_num_params: 0
       fsdp_size: -1
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
-  # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 1
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
@@ -140,7 +135,6 @@ reward_model:
       min_num_params: 0
       param_offload: False
       fsdp_size: -1
-  # micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
   # micro_batch_size_per_gpu: 2 # set a number
   # max_length: null
   ulysses_sequence_parallel_size: 1 # sp size
 
@@ -117,7 +117,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 1
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2