add benchmark mode

hiyuchang · hiyuchang · commit 94c34601e90c · 2025-05-15T20:50:53.000+08:00
diff --git a/trinity/cli/launcher.py b/trinity/cli/launcher.py
@@ -19,11 +19,11 @@ def bench(config: Config) -> None:
     try:
         ray.get(explorer.prepare.remote())
         ray.get(explorer.sync_weight.remote())
-        _, step = ray.get(explorer.eval.remote())
-        logger.info("Evaluation finished.")
+        _, step = ray.get(explorer.benchmark.remote())
+        logger.info("Benchmark finished.")
         ray.get(explorer.flush_log.remote(step=step))
     except Exception as e:
-        logger.error(f"Evaluation failed: {e}")
+        logger.error(f"Benchmark failed: {e}")
         raise e
 
 
diff --git a/trinity/common/config.py b/trinity/common/config.py
@@ -107,6 +107,7 @@ class GlobalConfig:
     total_epochs: int = 1
     batch_size: int = 1
     eval_interval: int = 100
+    eval_on_latest_ckp: bool = True
 
 
 @dataclass
diff --git a/trinity/explorer/explorer.py b/trinity/explorer/explorer.py
@@ -261,6 +261,25 @@ def wait():
         self.monitor.log(log_metrics, step=self.step_num)  # type: ignore
         return True, self.step_num
 
+    def benchmark(self) -> Tuple[bool, int]:
+        """Benchmark the model checkpoints."""
+        latest_step = self.step_num
+
+        # benchmark on the latest checkpoint
+        if self.config.global_config.eval_on_latest_ckp:
+            self.eval()
+            return True, self.step_num
+
+        # benchmark on all checkoints
+        for step_num in range(latest_step + 1):
+            path = os.path.join(self.config.model.checkpoint_path, f"global_step_{step_num}")
+            if os.path.isdir(path) and os.listdir(path):
+                self.logger.info(f"{path} exists.")
+                self.step_num = step_num
+                self._checkpoint_weights_update(step_num=step_num)
+                self.eval()
+        return True, self.step_num
+
     def sync_weight(self) -> None:
         """Synchronize model weights."""
         # call this method before training start to load the latest model weights