fix unittest

chenyushuo · chenyushuo · commit 10dcadfead7c · 2025-12-25T20:30:55.000+08:00
diff --git a/tests/buffer/sample_strategy_test.py b/tests/buffer/sample_strategy_test.py
@@ -58,7 +58,9 @@ def _init_buffer_writer_and_sample_strategy(self):
     async def _verify_model_version(self, step, expected_versions):
         batch, metrics, _ = await self.sample_strategy.sample(step=step)
         self.assertEqual(
-            batch.rewards.tolist(), expected_versions, f"Model versions mismatch at step {step}"
+            [exp.reward for exp in batch],
+            expected_versions,
+            f"Model versions mismatch at step {step}",
         )
         self.assertEqual(
             metrics["sample/model_version/min"],
diff --git a/tests/common/vllm_test.py b/tests/common/vllm_test.py
@@ -125,17 +125,13 @@ def setUp(self):
         self.config.algorithm.repeat_times = self.repeat_times
         self.config.explorer.rollout_model.enable_history = self.enable_history
         self.config.check_and_update()
-        from pprint import pprint
 
-        pprint(self.config)
         self.engines, self.auxiliary_engines = create_inference_models(self.config)
         self.model_wrapper = ModelWrapper(
             self.engines[0], engine_type="vllm", enable_history=self.enable_history
         )
 
-    async def test_generate(
-        self,
-    ):
+    async def test_generate(self):
         await prepare_engines(self.engines, self.auxiliary_engines)
         await self.model_wrapper.prepare()
         self.assertEqual(self.model_wrapper.model_path, self.config.model.model_path)
diff --git a/tests/trainer/trainer_test.py b/tests/trainer/trainer_test.py
@@ -1325,7 +1325,7 @@ def tearDown(self):
 
 
 class TestTinkerTrainer(BaseTrainerCase):
-    # @unittest.skip("Require tinker API key")
+    @unittest.skip("Require tinker API key")
     def test_trainer(self):
         """Test GSM8K on tinker."""
         # test both mode
diff --git a/trinity/trainer/tinker_trainer.py b/trinity/trainer/tinker_trainer.py
@@ -45,6 +45,10 @@ def _init_algorithm(self):
             self.kl_fn = KL_FN.get(algorithm_config.kl_penalty_fn)(
                 **algorithm_config.kl_penalty_fn_args
             )
+            # TODO
+            raise NotImplementedError(
+                "`compute_advantage_in_trainer` is not implemented yet in tinker"
+            )
         self.loss_agg_mode = algorithm_config.loss_agg_mode
         self.policy_loss_fn = POLICY_LOSS_FN.get(algorithm_config.policy_loss_fn)(
             backend="tinker", **algorithm_config.policy_loss_fn_args
@@ -227,12 +231,9 @@ async def train_step(self, batch_exps: List[Experience]) -> Dict:
 
             if self.algorithm.compute_advantage_in_trainer:
                 # TODO: following is verl format, which is not compatible with tinker
-                with marked_timer("adv", timing_raw):
-                    # compute kl penalty
-                    batch, kl_metrics = self.kl_fn.apply_kl_penalty_to_reward(batch)
-                    metrics.update(prefix_metrics(kl_metrics, prefix="critic"))
-                    # compute advantages, executed on the driver process
-                    batch, _ = self.advantage_fn(batch)
+                raise NotImplementedError(
+                    "`compute_advantage_in_trainer` is not implemented yet in tinker"
+                )
             else:
                 # skip token_level_scores for sft/dpo
                 for model_inputs in model_inputs_list: