agentscope-ai
diff --git a/‎tests/common/config_test.py‎
Lines changed: 0 additions & 4 deletions b/‎tests/common/config_test.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎tests/tools.py‎
Lines changed: 3 additions & 1 deletion b/‎tests/tools.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tests/trainer/trainer_test.py‎
Lines changed: 3 additions & 2 deletions b/‎tests/trainer/trainer_test.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎trinity/cli/launcher.py‎
Lines changed: 5 additions & 2 deletions b/‎trinity/cli/launcher.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎trinity/common/synchronizer.py‎
Lines changed: 51 additions & 19 deletions b/‎trinity/common/synchronizer.py‎
Lines changed: 51 additions & 19 deletions
diff --git a/‎trinity/explorer/explorer.py‎
Lines changed: 41 additions & 44 deletions b/‎trinity/explorer/explorer.py‎
Lines changed: 41 additions & 44 deletions
diff --git a/‎trinity/explorer/scheduler.py‎
Lines changed: 1 addition & 1 deletion b/‎trinity/explorer/scheduler.py‎
Lines changed: 1 addition & 1 deletion
@@ -35,10 +35,6 @@ def test_load_default_config(self):
         )
         self.assertEqual(config.model.model_path, config.model.critic_model_path)
         self.assertEqual(config.model.model_path, config.explorer.rollout_model.model_path)
-        self.assertEqual(
-            config.trainer.trainer_config.trainer.save_freq,
-            config.synchronizer.sync_interval,
-        )
 
     def test_all_examples_are_valid(self):
         example_dir = os.path.join(os.path.dirname(__file__), "..", "..", "examples")
 
@@ -18,7 +18,9 @@
 
 def get_template_config() -> Config:
     config_path = os.path.join(os.path.dirname(__file__), "template", "config.yaml")
-    return load_config(config_path)
+    config = load_config(config_path)
+    config.ray_namespace = ray.get_runtime_context().namespace
+    return config
 
 
 def get_model_path() -> str:
 
@@ -22,7 +22,7 @@
 )
 from trinity.cli.launcher import bench, both, explore, train
 from trinity.common.config import Config, StorageConfig
-from trinity.common.constants import StorageType, SyncMethod
+from trinity.common.constants import StorageType, SyncMethod, SyncStyle
 from trinity.common.models.utils import get_checkpoint_dir_with_step_num
 from trinity.manager.manager import CacheManager
 
@@ -99,7 +99,7 @@ def test_trainer(self):
         self.assertTrue(len(os.listdir(os.path.join(checkpoint_step_8, "actor"))) > 0)
         self.assertEqual(step_num, 8)
         # TODO: Reinit will fail when using v1 engine, find a way to fix it
-        ray.init(ignore_reinit_error=True)
+        ray.init(ignore_reinit_error=True, namespace=self.config.ray_namespace)
         # test bench mode
         self.config.mode = "bench"
         self.config.synchronizer.sync_method = SyncMethod.CHECKPOINT
@@ -332,6 +332,7 @@ def test_fully_async_mode(self, name, use_priority_queue):
             use_priority_queue=use_priority_queue,
         )
         config.synchronizer.sync_method = SyncMethod.CHECKPOINT
+        config.synchronizer.sync_style = SyncStyle.DYNAMIC_BY_EXPLORER
         config.synchronizer.sync_interval = 8
         config.monitor.monitor_type = "tensorboard"
         trainer_config = deepcopy(config)
 
@@ -150,8 +150,11 @@ def both(config: Config) -> None:
             "============================================================"
         )
         ray.wait(wait_ref, timeout=config.synchronizer.sync_timeout)
-    explorer.shutdown.remote()
-    trainer.shutdown.remote()
+    ray.wait(
+        [explorer.shutdown.remote(), trainer.shutdown.remote()],
+        timeout=config.synchronizer.sync_timeout,
+        num_returns=2,
+    )
 
 
 def run(config_path: str, dlc: bool = False, plugin_dir: str = None):
 
@@ -33,8 +33,8 @@ class Synchronizer:
     def __init__(self, config: Config):
         self.logger = get_logger(__name__)
         self.config = config
-        self.trainer_status = RunningStatus.RUNNING
-        self.explorer_status_counter: Dict[RunningStatus, int] = {}
+        self.trainer_status = RunningStatus.STOPPED
+        self.explorer_status_counter: Dict[RunningStatus, int] = defaultdict(lambda: 0)
         self._ready_condition = asyncio.Condition()
         self.model_state_dict = None
         self.model_version = 0
@@ -62,9 +62,11 @@ def set_explorer_status(
             assert (
                 old_status in self.explorer_status_counter
             ), f"Invalid explorer status {old_status}"
-            assert old_status != status
+            assert old_status != status, f"Invalid status change from {old_status} to {status}"
             self.explorer_status_counter[old_status] -= 1
-            assert self.explorer_status_counter[old_status] >= 0
+            assert (
+                self.explorer_status_counter[old_status] >= 0
+            ), f"Invalid status count {old_status} (new status {status})"
         if status not in self.explorer_status_counter:
             self.explorer_status_counter[status] = 0
         self.explorer_status_counter[status] += 1
@@ -88,9 +90,10 @@ async def set_model_state_dict_with_step_num(
         """
         if world_size is not None:  # Used when trainer updates the model
             assert step_num is not None
+            assert self.checkpoint_shard_counter[step_num] < world_size, "World size mismatch!"
             self.checkpoint_shard_counter[step_num] += 1
             self.logger.info(
-                f"Synchronizer received checkpoint {self.checkpoint_shard_counter[step_num]} of {world_size} shards"
+                f"Synchronizer has received {self.checkpoint_shard_counter[step_num]} out of {world_size} shards from the checkpoint {step_num}."
             )
             if self.checkpoint_shard_counter[step_num] < world_size:
                 return step_num
@@ -100,11 +103,14 @@ async def set_model_state_dict_with_step_num(
             trainer_type=self.config.trainer.trainer_type,
             step_num=step_num,
         )
-        model_state_dict = load_state_dict(os.path.join(checkpoint_dir, "actor"))  # TODO: to thread
-        await self.set_model_state_dict(model_state_dict, checkpoint_step_num)
+        if checkpoint_step_num != self.model_version:
+            model_state_dict = load_state_dict(
+                os.path.join(checkpoint_dir, "actor")
+            )  # TODO: to thread
+            await self.set_model_state_dict(model_state_dict, checkpoint_step_num)
         return checkpoint_step_num
 
-    async def set_model_state_dict(self, model_state_dict, trainer_step):
+    async def set_model_state_dict(self, model_state_dict: Union[dict, None], trainer_step: int):
         """
         Set the new model state and update the version.
 
@@ -152,7 +158,7 @@ async def setup_weight_sync_group(
         explorer = ray.get_actor(self.config.explorer_name)
         await explorer.setup_weight_sync_group.remote(master_address, master_port, state_dict_meta)
 
-    async def wait_new_model_state_dict(self, current_version: int) -> int:
+    async def wait_new_model_state_dict(self, current_version: int, no_wait: bool = False) -> int:
         """
         Wait until a new model state is available.
 
@@ -163,14 +169,21 @@ async def wait_new_model_state_dict(self, current_version: int) -> int:
             The new model version after it has been updated.
         """
         async with self._ready_condition:
-            if self.model_version <= current_version:
+            assert (
+                self.model_version >= current_version
+            ), f"The model version in Synchronizer ({self.model_version}) should be greater than that in Explorer ({current_version})!"
+            if self.model_version == current_version:
+                if not no_wait and self.trainer_status != RunningStatus.STOPPED:
+                    # TODO: explorer need support no wait
+                    # TODO: handle timeout
+                    await asyncio.wait_for(
+                        self._ready_condition.wait(),
+                        timeout=self.config.synchronizer.sync_timeout,
+                    )
+            if self.model_version > current_version:
                 self.set_explorer_status(
                     RunningStatus.WAITING_SYNC, old_status=RunningStatus.REQUIRE_SYNC
                 )
-                await asyncio.wait_for(
-                    self._ready_condition.wait(),
-                    timeout=self.config.synchronizer.sync_timeout,
-                )
             return self.model_version
 
     async def ready_to_nccl_sync(
@@ -191,6 +204,29 @@ async def ready_to_nccl_sync(
         assert (
             sum(self.explorer_status_counter.values()) == 1
         ), "NCCL sync is only supported for one explorer."
+
+        def sync_failed():
+            if module == "explorer":
+                another_module = "Trainer"
+                self.set_explorer_status(
+                    RunningStatus.REQUIRE_SYNC, old_status=RunningStatus.WAITING_SYNC
+                )
+            else:
+                another_module = "Explorer"
+                self.trainer_status = RunningStatus.REQUIRE_SYNC
+            self.logger.error(
+                f"{another_module} is not ready for model weight sync in {self.config.synchronizer.sync_timeout} seconds."
+            )
+            return None
+
+        non_stop_cnt = sum(
+            value
+            for key, value in self.explorer_status_counter.items()
+            if key != RunningStatus.STOPPED
+        )
+        if non_stop_cnt == 0:
+            return sync_failed()
+        # for status in RunningStatus:
         async with self._ready_condition:
             try:
                 if module == "trainer":
@@ -219,11 +255,7 @@ async def ready_to_nccl_sync(
                         )
                 return self.model_version
             except asyncio.TimeoutError:
-                another_module = "Trainer" if module == "explorer" else "Explorer"
-                self.logger.error(
-                    f"{another_module} is not ready for model weight sync in {self.config.synchronizer.sync_timeout} seconds."
-                )
-                return None
+                return sync_failed()
 
     @classmethod
     def get_actor(cls, config: Optional[Config] = None, namespace: Optional[str] = None):
 
@@ -72,12 +72,8 @@ def __init__(self, config: Config):
         # For checkpoint weights update
         # Use explorer to periodically load the latest model weights and
         # boradcast to all rollout models
-        self.model_version = 0
-        if self.use_state_dict_weights_update:
-            self.old_checkpoint = None
-            self.state_dict = {}
-        else:  # nccl mode
-            self.state_dict_meta = []
+        self.model_version = -1
+        self.last_sync_successful = True
         self.logger.info("Finished initializing Explorer.")
         self.collect_experiences = self.config.explorer.collect_experiences
         self.generated_experience_cnt = 0
@@ -102,7 +98,6 @@ async def setup_weight_sync_group(
             f"master_address={master_address}, master_port={master_port}, "
             f"world_size={world_size}, rank_offset={base_offset}"
         )
-        self.state_dict_meta = state_dict_meta
         # TODO: save state_dict in models
         refs = [
             model.init_process_group.remote(
@@ -130,21 +125,6 @@ def _init_scheduler(self) -> Scheduler:
             )
         return Scheduler(self.config, self.models, self.auxiliary_models)
 
-    async def _update_model_weight(self, step_num: int, state_dict: dict) -> None:
-        # TODO: update model weight
-        self.state_dict = state_dict
-        if self.state_dict_meta is None:
-            update_weight_args_list = []
-            for name, param in state_dict.items():
-                update_weight_args_list.append((name, str(param.dtype), tuple(param.shape)))
-            self.state_dict_meta = update_weight_args_list
-        else:
-            update_weight_args_list = None
-        await asyncio.gather(
-            *[model.sync_model.remote(step_num, update_weight_args_list) for model in self.models]
-        )
-        self.state_dict.clear()
-
     async def _checkpoint_weights_update(self, step_num: Optional[int] = None) -> int:
         step_num = ray.get(self.synchronizer.set_model_state_dict_with_step_num.remote(step_num))
         await asyncio.gather(*[model.sync_model.remote(step_num) for model in self.models])
@@ -156,41 +136,59 @@ async def _state_dict_update(self):
             self.synchronizer.wait_new_model_state_dict.remote(self.model_version)
         )
         if new_version > self.model_version:
-            self.logger.info(f"New model state dict version: {new_version}")
-            await asyncio.gather(*[model.sync_model.remote(new_version) for model in self.models])
+            if self.model_version != -1:
+                self.logger.info(f"New model state dict version: {new_version}")
+                await asyncio.gather(
+                    *[model.sync_model.remote(new_version) for model in self.models]
+                )
             self.model_version = new_version
+            self.last_sync_step = self.explore_step_num
+            ray.get(
+                self.synchronizer.set_explorer_status.remote(
+                    RunningStatus.RUNNING, old_status=RunningStatus.WAITING_SYNC
+                )
+            )
+            self.last_sync_successful = True
         else:
             self.logger.warning(
                 f"No new model state dict found, current version: {self.model_version}"
             )
+            self.last_sync_successful = False
 
     async def _nccl_weights_update(self):
-        assert self.state_dict_meta is not None
         new_version = ray.get(
             self.synchronizer.ready_to_nccl_sync.remote("explorer", self.model_version)
         )
         if new_version is None:
             self.logger.info("Trainer is not ready to sync weight. Skipping sync weight.")
+            self.last_sync_successful = False
             return
         self.model_version = new_version
         await asyncio.gather(
-            *[model.sync_model.remote(self.explore_step_num) for model in self.models]
+            *[model.sync_model.remote(self.model_version) for model in self.models]
         )
+        self.last_sync_step = self.explore_step_num
+        ray.get(
+            self.synchronizer.set_explorer_status.remote(
+                RunningStatus.RUNNING, old_status=RunningStatus.WAITING_SYNC
+            )
+        )
+        self.last_sync_successful = True
 
     async def prepare(self) -> None:
         """Preparation before running."""
+        if self.experience_buffer:
+            await self.experience_buffer.acquire()
         futures = [asyncio.create_task(self.scheduler.start())]
         if self.use_state_dict_weights_update:
             master_address, master_port = await self.models[0].get_available_address.remote()
             futures.append(
                 asyncio.create_task(self.setup_weight_sync_group(master_address, master_port))
             )
         asyncio.gather(*futures, return_exceptions=True)
-        await self.synchronizer.set_explorer_status.remote(RunningStatus.REQUIRE_SYNC)
-        if self.experience_buffer:
-            await self.experience_buffer.acquire()
         if self.config.explorer.eval_on_startup and self.explore_step_num == 0:
             self.eval()
+        await self.synchronizer.set_explorer_status.remote(RunningStatus.REQUIRE_SYNC)
 
     async def get_weight(self, name: str) -> torch.Tensor:
         """Get the weight of the loaded model (For checkpoint weights update)."""
@@ -237,7 +235,10 @@ async def explore_step(self) -> bool:
             self.logger.warning("No more tasks to explore. Stop exploring.")
             await self.save_checkpoint(sync_weight=False)
             await self.synchronizer.set_explorer_status.remote(
-                RunningStatus.STOPPED, old_status=RunningStatus.RUNNING
+                RunningStatus.STOPPED,
+                old_status=RunningStatus.RUNNING
+                if self.last_sync_successful
+                else RunningStatus.REQUIRE_SYNC,
             )
             await self.experience_buffer.release()
             return False
@@ -249,7 +250,7 @@ def need_sync(self) -> bool:
         if self.config.synchronizer.sync_style == SyncStyle.FIXED:
             if self.explore_step_num <= self.config.synchronizer.sync_offset:
                 return False
-            return (
+            require_sync = (
                 self.explore_step_num - self.config.synchronizer.sync_offset
             ) % self.config.synchronizer.sync_interval == 0
         else:
@@ -263,13 +264,13 @@ def need_sync(self) -> bool:
                     ray.get(self.synchronizer.get_trainer_status.remote())
                     == RunningStatus.REQUIRE_SYNC
                 )
-            if require_sync:
-                ray.get(
-                    self.synchronizer.set_explorer_status.remote(
-                        RunningStatus.REQUIRE_SYNC, old_status=RunningStatus.RUNNING
-                    )
+        if require_sync and self.last_sync_successful:
+            ray.get(
+                self.synchronizer.set_explorer_status.remote(
+                    RunningStatus.REQUIRE_SYNC, old_status=RunningStatus.RUNNING
                 )
-            return require_sync
+            )
+        return require_sync
 
     def need_eval(self) -> bool:
         return self.explore_step_num % self.config.explorer.eval_interval == 0
@@ -338,8 +339,9 @@ async def save_checkpoint(self, sync_weight: bool = False) -> None:
                 await self._state_dict_update()
             else:  # nccl weights update
                 await self._nccl_weights_update()
-            self.last_sync_step = self.explore_step_num
-            self.logger.info(f"Explorer sync_weights at step {self.explore_step_num} finished")
+            self.logger.info(
+                f"Explorer sync_weights at step {self.explore_step_num} finished, model version = {self.model_version}."
+            )
 
         # overlay log and weight sync
         await log_task
@@ -354,11 +356,6 @@ async def sync_weight(self) -> None:
         """Synchronize model weights."""
         # call this method before training start to load the latest model weights
         await self.save_checkpoint(sync_weight=True)
-        ray.get(
-            self.synchronizer.set_explorer_status.remote(
-                RunningStatus.RUNNING, old_status=RunningStatus.WAITING_SYNC
-            )
-        )
 
     async def _finish_steps(self, start_step: int, end_step: int, model_version: int) -> None:
         for step in range(start_step, end_step + 1):
 
@@ -227,7 +227,7 @@ def task_done_callback(self, async_task: asyncio.Task):
         if async_task.cancelled():
             return
         elif async_task.exception():
-            self.logger.error(f"Task {task.task_id} failed: {async_task.exception()}")
+            self.logger.error(f"Task {task.task.task_id} failed: {async_task.exception()}")
             return
         else:
             status, exps, runner_id = async_task.result()