modelscope
diff --git a/‎ajet/context_tracker/base_tracker.py‎
Lines changed: 15 additions & 0 deletions b/‎ajet/context_tracker/base_tracker.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎ajet/task_runner/tinkerscript_runner.py‎
Lines changed: 24 additions & 8 deletions b/‎ajet/task_runner/tinkerscript_runner.py‎
Lines changed: 24 additions & 8 deletions
diff --git a/‎ajet/tuner_lib/weight_tuner/experimental/as_tinkerscript_client.py‎
Lines changed: 36 additions & 9 deletions b/‎ajet/tuner_lib/weight_tuner/experimental/as_tinkerscript_client.py‎
Lines changed: 36 additions & 9 deletions
@@ -148,6 +148,21 @@ def __init__(self, config, tokenizer, workflow_task: WorkflowTask, **kwargs):
             <= max_model_len
         )
 
+    def reset(self):
+        self.saved_timelines: List[List[ExtendedMessage]] = []
+        self.current_context_status = ""
+        self.terminal_rewards_dict = {}
+        self.discarded = False
+        self.is_terminated = False
+        self.reward_structure: Union[Reward, None] = None
+        self.context_time_cost = 0
+        self.tag = ""
+        self.current_batch_success_rate: float = float("-inf")
+        self.current_batch_reward: float = float("-inf")
+        self.already_mad_flag: bool = False
+        self.round_cnt = 0
+        self.log_metrics: Optional[Dict[str, Union[float, List[float], Dict[str, Any]]]] = None
+
     def group_tokenize(self):
         raise NotImplementedError
 
 
@@ -23,7 +23,7 @@
 
 class TinkerScriptRunner(BaseAgentRunner):
 
-    def register_episode_and_wait_output(self, episode_uuid: str, openai_base_url: str, openai_api_key: str) -> WorkflowOutput:
+    def register_episode_and_wait_output(self, episode_uuid: str, openai_base_url: str, openai_api_key: str, context_tracker: BaseContextTracker) -> WorkflowOutput:
         """Register the episode as ready in the TinkerScript data interchange center."""
         # parse episode_uuid, openai_base_url, openai_api_key
         zmq_listen_result_addr, ipc_path = get_zmq_socket(self.config, episode_uuid, tag="workflow")
@@ -39,15 +39,30 @@ def register_episode_and_wait_output(self, episode_uuid: str, openai_base_url: s
         # begin wait for result
         zmq_socket = zmq.Context().socket(zmq.REP)
         zmq_socket.bind(zmq_listen_result_addr)
-
-        # <wait for>:
-        #   <from_sourcefile>: ajet/tuner_lib/weight_tuner/experimental/as_tinkerscript_server.py
-        #   <from_code>: socket.send_string(workflow_output.model_dump_json())
-        #   <expect>: workflow_output: WorkflowOutput
-        message = zmq_socket.recv_string()
+        speicial_messages = [
+            "RUNNER.RESET_CONTEXT_TRACKER"
+        ]
+        while True:
+            # <wait for 1/2>:
+            #   <from_sourcefile>: ajet/tuner_lib/weight_tuner/experimental/as_tinkerscript_server.py
+            #   <from_code>: socket.send_string(workflow_output.model_dump_json())
+            #   <expect>: workflow_output: WorkflowOutput
+            # <wait for 2/2>:
+            #   <from_sourcefile>: ajet/tuner_lib/weight_tuner/experimental/as_tinkerscript_server.py
+            #   <from_code>: socket.send_string("RUNNER.SPECIAL.RESET_CONTEXT_TRACKER")
+            #   <expect>: "RUNNER.SPECIAL.RESET_CONTEXT_TRACKER"
+            message = zmq_socket.recv_string()
+            if message not in speicial_messages:
+                zmq_socket.send_string("ack")
+                break
+            elif message == "RUNNER.SPECIAL.RESET_CONTEXT_TRACKER":
+                logger.warning(f"Received reset command for episode {episode_uuid}.")
+                context_tracker.reset()
+                zmq_socket.send_string("ack")
+            else:
+                raise RuntimeError(f"Unknown special message received: {message}")
 
         logger.success(f"Received workflow output for episode {episode_uuid}")
-        zmq_socket.send_string("ack")
         zmq_socket.close()
         if ipc_path and os.path.exists(ipc_path): os.remove(ipc_path)
 
@@ -85,6 +100,7 @@ def execute(self, workflow_task: WorkflowTask) -> BaseContextTracker:
             episode_uuid=context_tracker.episode_uuid,
             openai_base_url=base_url,
             openai_api_key=api_key,
+            context_tracker=context_tracker,
         )
 
         if workflow_output.reward is not None:
 
@@ -28,7 +28,7 @@ def __init__(self, server_url: str):
         self.previous_warning_time = 0
 
 
-    def begin_episode(self, allow_discard_timeout=60) -> Tuple[str, OpenaiBaseUrlAndApiKey]:
+    def begin_episode(self, allow_discard_timeout=60, episode_type="train") -> Tuple[str, OpenaiBaseUrlAndApiKey]:
         """
         Block until an episode is claimed.
         Return (episode_uuid, openai_base_url, openai_api_key)
@@ -37,7 +37,7 @@ def begin_episode(self, allow_discard_timeout=60) -> Tuple[str, OpenaiBaseUrlAnd
             try:
                 req_obj = ClaimEpisodeRequest(
                     client_uuid=self.client_uuid,
-                    episode_type="default",
+                    episode_type=episode_type,
                     allow_discard_timeout=allow_discard_timeout,
                 )
                 resp = httpx.post(
@@ -161,15 +161,15 @@ def start_engine(self):
             raise
 
         # Poll until engine status is "ENGINE.ROLLING"
-        self._wait_until_avail()
+        self._wait_until_status_change_to(desired_status="ENGINE.ROLLING")
         logger.success("Training engine is now ROLLING and ready.")
 
-    def _wait_until_avail(self):
+    def _wait_until_status_change_to(self, desired_status="ENGINE.ROLLING"):
         """
-        Poll engine status until it reaches ENGINE.ROLLING state.
+        Poll engine status until it reaches desired_status.
         Reports status every 5 seconds while waiting.
         """
-        logger.info("Polling engine status until ENGINE.ROLLING...")
+        logger.info(f"Polling engine status until {desired_status}...")
         last_report_time = time.time()
         init_poll_time = last_report_time
 
@@ -184,8 +184,8 @@ def _wait_until_avail(self):
                     last_report_time = current_time
 
                 # Check if engine has reached the desired status
-                if current_status == "ENGINE.ROLLING":
-                    logger.info("Engine status is ENGINE.ROLLING - engine is ready")
+                if current_status == desired_status:
+                    logger.info(f"Engine status is {desired_status}.")
                     break
 
                 # Wait a bit before next poll
@@ -256,7 +256,34 @@ def auto_sync_train_config_and_start_engine(self, agent_jet_job: AgentJetJob):
             logger.info("Engine is already ROLLING. No action needed.")
         elif current_status == "ENGINE.BOOTING":
             logger.info("Engine is BOOTING. Waiting until it becomes ROLLING...")
-            self._wait_until_avail()
+            self._wait_until_status_change_to(desired_status="ENGINE.ROLLING")
             logger.success("Training engine is now ROLLING and ready.")
         else:
             raise RuntimeError(f"Cannot sync train config or start engine when engine is in status: {current_status}")
+
+    def stop_engine(self):
+        """
+        Stop the training engine on the TinkerScript server.
+        This triggers the server to stop the training process.
+        """
+        current_status = self.get_engine_status()
+        if current_status == "ENGINE.OFFLINE":
+            logger.info("Engine is already OFFLINE. No action needed.")
+            return
+
+        try:
+            resp = httpx.post(
+                f"{self.server_url}/stop_engine",
+                json={},
+                timeout=600
+            )
+            resp.raise_for_status()
+            result = resp.json()
+            if result.get("success"):
+                logger.info("Successfully stopped training engine on TinkerScript server")
+            else:
+                logger.error("Failed to stop training engine")
+            self._wait_until_status_change_to(desired_status="ENGINE.OFFLINE")
+        except Exception as e:
+            logger.error(f"Error stopping engine: {e}")
+