modelscope · pan-x-c · Apr 24, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 23, 2025
diff --git a/.github/workflows/docker/docker-compose.yaml b/.github/workflows/docker/docker-compose.yaml
@@ -0,0 +1,59 @@
+services:
+  trinity-node-1:
+    image: trinity-rft:latest-unittest
+    pull_policy: never
+    command: sh -c "pip install -e .[dev] && ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
+    environment:
+      - HF_ENDPOINT=https://hf-mirror.com
+      - RAY_ADDRESS=auto
+      - CHECKPOINT_ROOT_DIR=/mnt/checkpoints
+      - DATA_ROOT_DIR=/mnt/data
+      - MODEL_PATH=/mnt/checkpoints/Qwen2.5-1.5B-Instruct
+    working_dir: /workspace
+    networks:
+      - trinity-network
+    volumes:
+      - trinity-volume:/mnt
+      - ../../..:/workspace
+    shm_size: "64G"
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            device_ids: ['4', '5']
+            capabilities: [gpu]
+
+  trinity-node-2:
+    image: trinity-rft:latest-unittest
+    pull_policy: never
+    command: sh -c "pip install -e .[dev] && ray start --address=trinity-node-1:6379 --block"
+    environment:
+      - HF_ENDPOINT=https://hf-mirror.com
+      - CHECKPOINT_ROOT_DIR=/mnt/checkpoints
+      - DATA_ROOT_DIR=/mnt/data
+      - MODEL_PATH=/mnt/checkpoints/Qwen2.5-1.5B-Instruct
+    working_dir: /workspace
+    volumes:
+      - trinity-volume:/mnt
+      - ../../..:/workspace
+    depends_on:
+      - trinity-node-1
+    networks:
+      - trinity-network
+    shm_size: "64G"
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            device_ids: ['6', '7']
+            capabilities: [gpu]
+
+networks:
+  trinity-network:
+    driver: bridge
+
+volumes:
+  trinity-volume:
+    external: true
diff --git a/.github/workflows/unittest.yaml b/.github/workflows/unittest.yaml
@@ -0,0 +1,44 @@
+name: unittest
+
+on:
+  issue_comment:
+    types: [created]
+
+permissions:
+  contents: read
+
+jobs:
+  unittest:
+    # only run on pull request
+    if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/run-unittest') && github.event.comment.author_association == 'COLLABORATOR' }}
+    runs-on: self-hosted
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        path: trinity-${{ github.run_id }}
+        fetch-depth: 0
+
+    - name: Setup docker compose
+      working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
+      run: |
+        docker compose up -d
+
+    - name: Run unittest
+      working-directory: trinity-${{ github.run_id }}/.github/workflows/docker
+      run: |
+        docker compose exec trinity-node-1 pytest tests --ignore=tests/data --junitxml=pytest.xml
+      continue-on-error: true
+
+    - name: Upload test results
+      uses: actions/upload-artifact@v2
+      with:
+        name: pytest-results
+        path: trinity-${{ github.run_id }}/pytest.xml
+
+    - name: Pytest coverage comment
+      uses: MishaKav/pytest-coverage-comment@main
+      with:
+        junitxml-title: Unittest Result Summary
+        junitxml-path: trinity-${{ github.run_id }}/pytest.xml
+# TODO: run data tests after the dependency conflict is resolved
diff --git a/docs/sphinx_doc/source/tutorial/trinity_configs.md b/docs/sphinx_doc/source/tutorial/trinity_configs.md
@@ -307,7 +307,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 8
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

diff --git a/examples/dpo_humanlike/train_dpo.yaml b/examples/dpo_humanlike/train_dpo.yaml
@@ -118,7 +118,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 1
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

diff --git a/examples/grpo_alfworld/train_alfworld.yaml b/examples/grpo_alfworld/train_alfworld.yaml
@@ -117,7 +117,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 1
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

diff --git a/examples/grpo_gsm8k/train_gsm8k.yaml b/examples/grpo_gsm8k/train_gsm8k.yaml
@@ -122,7 +122,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 64
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

diff --git a/examples/grpo_math/math.yaml b/examples/grpo_math/math.yaml
@@ -18,7 +18,6 @@ model:
   max_prompt_tokens: 1024
   max_response_tokens: 3072
   checkpoint_path: /PATH/TO/CHECKPOINT/
-  load_checkpoint: true
 cluster:
   node_num: 1
   gpu_per_node: 8

diff --git a/examples/grpo_math/train_math.yaml b/examples/grpo_math/train_math.yaml
@@ -25,7 +25,6 @@ actor_rollout_ref:
   actor:
     strategy: fsdp  # This is for backward-compatibility
     ppo_mini_batch_size: 128
-    # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
     ppo_micro_batch_size_per_gpu: 4
     use_dynamic_bsz: True # False
     ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
@@ -62,7 +61,6 @@ actor_rollout_ref:
       wrap_policy:
         # transformer_layer_cls_to_wrap: None
         min_num_params: 0
-    # log_prob_micro_batch_size: 4 # will be deprecated, use log_prob_micro_batch_size_per_gpu
     log_prob_micro_batch_size_per_gpu: 16
     log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
     log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -86,7 +84,6 @@ actor_rollout_ref:
     max_num_batched_tokens: 8192
     max_model_len: null
     max_num_seqs: 1024
-    # log_prob_micro_batch_size: 8 # will be deprecated, use log_prob_micro_batch_size_per_gpu
     log_prob_micro_batch_size_per_gpu: 4
     log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
     log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -120,9 +117,7 @@ critic:
         min_num_params: 0
       fsdp_size: -1
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
-  # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 64
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

diff --git a/examples/grpo_sciworld/train_sciworld.yaml b/examples/grpo_sciworld/train_sciworld.yaml
@@ -25,7 +25,6 @@ actor_rollout_ref:
   actor:
     strategy: fsdp  # This is for backward-compatibility
     ppo_mini_batch_size: 1536
-    # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
     ppo_micro_batch_size_per_gpu: 1
     use_dynamic_bsz: False
     ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
@@ -57,7 +56,6 @@ actor_rollout_ref:
       wrap_policy:
         # transformer_layer_cls_to_wrap: None
         min_num_params: 0
-    # log_prob_micro_batch_size: 4 # will be deprecated, use log_prob_micro_batch_size_per_gpu
     log_prob_micro_batch_size_per_gpu: 1
     log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
     log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -81,7 +79,6 @@ actor_rollout_ref:
     max_num_batched_tokens: 8192
     max_model_len: null
     max_num_seqs: 1024
-    # log_prob_micro_batch_size: 8 # will be deprecated, use log_prob_micro_batch_size_per_gpu
     log_prob_micro_batch_size_per_gpu: 1
     log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
     log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
@@ -115,9 +112,7 @@ critic:
         min_num_params: 0
       fsdp_size: -1
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
-  # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 1
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
@@ -140,7 +135,6 @@ reward_model:
       min_num_params: 0
       param_offload: False
       fsdp_size: -1
-  # micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
   # micro_batch_size_per_gpu: 2 # set a number
   # max_length: null
   ulysses_sequence_parallel_size: 1 # sp size

diff --git a/examples/grpo_webshop/train_webshop.yaml b/examples/grpo_webshop/train_webshop.yaml
@@ -117,7 +117,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 1
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 16384 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

diff --git a/examples/opmd_gsm8k/train_opmd_gsm8k.yaml b/examples/opmd_gsm8k/train_opmd_gsm8k.yaml
@@ -149,7 +149,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 64
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

diff --git a/examples/ppo_countdown/train_countdown.yaml b/examples/ppo_countdown/train_countdown.yaml
@@ -124,7 +124,6 @@ critic:
   ppo_mini_batch_size: ${actor_rollout_ref.actor.ppo_mini_batch_size}
   # ppo_micro_batch_size: 8 # will be deprecated, use ppo_micro_batch_size_per_gpu
   ppo_micro_batch_size_per_gpu: 8
-  forward_micro_batch_size: ${critic.ppo_micro_batch_size}
   forward_micro_batch_size_per_gpu: ${critic.ppo_micro_batch_size_per_gpu}
   use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
   ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2

diff --git a/tests/buffer/__init__.py b/tests/buffer/__init__.py
diff --git a/tests/common/__init__.py b/tests/common/__init__.py
diff --git a/tests/common/config_test.py b/tests/common/config_test.py
@@ -11,6 +11,8 @@
 class TestConfig(unittest.TestCase):
     def test_load_default_config(self):
         config = load_config(config_yaml_path)
+        print(config.data)
+        config.check_and_update()
         self.assertIsNotNone(config.trainer.trainer_config)
         self.assertEqual(config.trainer.trainer_config.trainer.n_gpus_per_node, 4)
         self.assertEqual(config.trainer.trainer_config.trainer.nnodes, 1)
@@ -22,13 +24,15 @@ def test_load_default_config(self):
             config.synchronizer.sync_iteration_interval,
         )
 
-    def test_all_examples_are_valid(self):  # TODO: useless
-        example_dir = os.path.join(os.path.dirname(__file__), "..", "..", "scripts", "config")
-        for filename in ["countdown", "gsm8k"]:
-            if filename.endswith(".yaml"):
-                config_path = os.path.join(example_dir, filename)
-                try:
-                    load_config(config_path)
-                except Exception as e:
-                    print(f"Error loading config {config_path}: {e}")
-                    raise e
+    def test_all_examples_are_valid(self):
+        example_dir = os.path.join(os.path.dirname(__file__), "..", "..", "examples")
+        for example_name in os.listdir(example_dir):
+            for filename in os.listdir(os.path.join(example_dir, example_name)):
+                if filename.endswith(".yaml") and not filename.startswith("train"):
+                    print(f"Checking config: {filename}")
+                    config_path = os.path.join(example_dir, example_name, filename)
+                    try:
+                        load_config(config_path)
+                    except Exception as e:
+                        print(f"Error loading config {config_path}: {e}")
+                        raise e
diff --git a/tests/manager/storage_test.py → tests/common/experience_test.py b/tests/manager/storage_test.py → tests/common/experience_test.py
@@ -20,7 +20,7 @@ def test_experience_model_experience_conversion(self):
         tokens = torch.tensor([1, 2, 3], dtype=torch.int32)
         reward = 0.6
         prompt_length = 2
-        logprobs = torch.tensor([0.1], dtype=torch.float32)
+        logprobs = torch.tensor([0, 0, 0.1], dtype=torch.float32)
         action_mask = torch.tensor([1, 0, 1], dtype=torch.bool)
         experience = Experience(
             tokens=tokens,
@@ -44,28 +44,28 @@ def test_batch_conversion(self):
                 tokens=torch.tensor([1, 2]),
                 prompt_length=1,
                 reward=float(0.1),
-                logprobs=torch.tensor([0.1]),
+                logprobs=torch.tensor([0, 0.1]),
                 action_mask=torch.tensor([1, 0]),
             ),
             Experience(
                 tokens=torch.tensor([1, 2, 3]),
                 prompt_length=2,
                 reward=float(0.2),
-                logprobs=torch.tensor([0.1]),
+                logprobs=torch.tensor([0, 0, 0.1]),
                 action_mask=torch.tensor([1, 0, 1]),
             ),
             Experience(
                 tokens=torch.tensor([1, 2, 3, 4]),
                 prompt_length=2,
                 reward=float(0.3),
-                logprobs=torch.tensor([0.1, 0.2]),
+                logprobs=torch.tensor([0, 0, 0.1, 0.2]),
                 action_mask=torch.tensor([1, 0, 1, 0]),
             ),
             Experience(
                 tokens=torch.tensor([1, 2, 3, 4]),
                 prompt_length=3,
                 reward=float(0.4),
-                logprobs=torch.tensor([0.1]),
+                logprobs=torch.tensor([0, 0, 0, 0.1]),
                 action_mask=torch.tensor([1, 0, 1, 0]),
             ),
         ]
@@ -89,7 +89,8 @@ def test_batch_conversion(self):
             self.assertTrue(
                 torch.all(
                     batch.logprobs[i][
-                        prompt_length : prompt_length
+                        prompt_length
+                        - exps[i].prompt_length : prompt_length
                         + exps[i].tokens.size(0)
                         - exps[i].prompt_length
                     ]

diff --git a/tests/common/tmp/template_config.yaml b/tests/common/tmp/template_config.yaml
@@ -2,27 +2,11 @@ mode: both
 data:
   dataset_path: ''
   total_epoch: 1
-  batch_size: 1
+  batch_size: 32
   train_split: 'train'
   eval_split: ''
   default_workflow_type: ''
   default_reward_fn_type: ''
-  dataset_config: {}
-  format_config:
-    prompt_key: ''
-    response_key: ''
-    chat_template: ''
-    reward_fn_key: ''
-    workflow_key: ''
-    solution_key: ''
-    reward_key: ''
-    chosen_key: ''
-    rejected_key: ''
-    label_key: ''
-  dj_config_path: null
-  dj_process_desc: null
-  clean_strategy: iterative
-  min_size_ratio: null
 model:
   model_path: ''
   max_prompt_tokens: 2048
@@ -32,8 +16,6 @@ cluster:
   node_num: 1
   gpu_per_node: 8
 buffer:
-  storage_type: sql
-  db_url: ''
   read_batch_size: 32
   max_retry_times: 3
   max_retry_interval: 1
@@ -46,8 +28,8 @@ explorer:
   enable_prefix_caching: false
   enforce_eager: true
   dtype: bfloat16
-  temperature: 0.0
-  top_p: 1.0
+  temperature: 0.2
+  top_p: 0.95
   top_k: -1
   seed: 42
   logprobs: 0