add updating action and thought, add reload task instead of reset

patricebechard · patricebechard · commit 4052cb00a74b · 2025-07-17T09:54:16.000-04:00
diff --git a/src/agentlab/analyze/agent_controller.py b/src/agentlab/analyze/agent_controller.py
@@ -174,6 +174,7 @@ def set_task_selector():
 
                 prepare_agent()
                 set_environment_info()
+                prepare_benchmark()
                 reset_environment()
 
 
@@ -216,6 +217,16 @@ def set_environment_info():
     logger.info(f"Done in {end - start}")
 
 
+def prepare_benchmark():
+    logger.info("Preparing benchmark...")
+    start = datetime.now()
+    resp = requests.post(f"{SERVER_URL}/prepare_benchmark")
+    if resp.status_code != 200 or resp.json().get("status") != "success":
+        st.error(resp.json())
+    end = datetime.now()
+    logger.info(f"Done in {end - start}")
+
+
 def reset_environment():
     logger.info("Restarting environment...")
     start = datetime.now()
@@ -242,6 +253,29 @@ def reset_environment():
     logger.info(f"Done postproc in {end - start}")
 
 
+def reload_task():
+    logger.info("Reloading task...")
+    start = datetime.now()
+    resp = requests.post(f"{SERVER_URL}/reload_task")
+    if resp.status_code != 200 or resp.json().get("status") != "success":
+        print(resp.status_code)
+        print(resp.json()["status"])
+        print(resp.json()["message"])
+    response_json = resp.json()
+    if "obs" in response_json:
+        if "screenshot" in response_json["obs"]:
+            screenshot_data = response_json["obs"]["screenshot"]
+            # convert base64 to numpy array
+            screenshot = np.frombuffer(base64.b64decode(screenshot_data["data"]), dtype=np.dtype(screenshot_data["dtype"]))
+            screenshot = screenshot.reshape(screenshot_data["shape"])
+            response_json["obs"]["screenshot"] = screenshot
+    if st.session_state.agent.obs_preprocessor:
+        response_json["obs"] = st.session_state.agent.obs_preprocessor(response_json["obs"])
+    st.session_state.last_obs = response_json["obs"]
+    end = datetime.now()
+    logger.info(f"Done in {end - start}")
+
+
 def step_environment(action):
     logger.info("Stepping environment...")
     start = datetime.now()
@@ -269,7 +303,7 @@ def step_environment(action):
 
 
 def restore_environment():
-    reset_environment()
+    reload_task()
     for action in st.session_state.actions_history:
         step_environment(action)
 
@@ -285,21 +319,46 @@ def get_action():
 
 
 def set_agent_state_box():
+
+    # Custom CSS to set textarea style same as code block
+    st.markdown(
+        """
+        <style>
+        @import url('https://fonts.googleapis.com/css2?family=Handlee&family=IBM+Plex+Mono:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;1,100;1,200;1,300;1,400;1,500;1,600;1,700&family=Sedgwick+Ave&display=swap');
+        textarea, .stTextArea textarea {
+            font-family: "IBM Plex Mono", monospace !important;
+            font-size: 14px !important;
+            font-weight: 400;
+            font-style: normal;
+            line-height: 1.6 !important;
+            padding-top: 18px !important;
+            background-color: #F8F9FB !important;
+
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+
     # set agent state and goal box
     with st.container():
         col1, col2, col3 = st.columns([1, 1, 1])
         with col1:
             with st.container(border=True, height=250):
                 st.markdown("**Goal**")
+                # st.text_area("", st.session_state.agent.obs_history[-1]["goal"], height=175, disabled=True, label_visibility="collapsed")
                 st.code(st.session_state.agent.obs_history[-1]["goal"], wrap_lines=True, language=None, height=175)
         with col2:
             with st.container(border=True, height=250):
                 st.markdown("**Think**")
-                st.code(st.session_state.action_info.think, wrap_lines=True, language=None, height=175)
+                st.session_state.action_info.think = st.text_area(
+                    "Think", st.session_state.action_info.think, height=172, label_visibility="collapsed"
+                )
         with col3:
             with st.container(border=True, height=250):
                 st.markdown("**Action**")
-                st.code(st.session_state.action, wrap_lines=True, language="python", height=175)
+                st.session_state.action = st.text_area("Action", st.session_state.action, height=172, label_visibility="collapsed")
+                # st.code(st.session_state.action, wrap_lines=True, language="python", height=175)
 
 
 def set_prompt_modifier():
diff --git a/src/agentlab/analyze/server.py b/src/agentlab/analyze/server.py
@@ -106,6 +106,10 @@ def __init__(self):
         self.last_obs = None
         self.last_info = None
 
+        # used to reload task
+        self.start_info = None
+        self.start_url = None
+
     def set_info(
         self,
         benchmark_name: str,
@@ -223,12 +227,7 @@ def status(self) -> dict:
             }
         )
 
-    def reset(self) -> dict:
-        """Reset the environment
-
-        :return: Dictionary with obs and info
-        :rtype: dict
-        """
+    def prepare_benchmark(self) -> dict:
         start = time.time()
         if not self.info_set:
             return make_json_safe(
@@ -237,54 +236,124 @@ def reset(self) -> dict:
                     "message": "Environment info not set. Please set the environment info first.",
                 }
             )
+
         if self.env is not None:
             # close the current environment first
             self.env.close()
             self.env = None
-
         # then create the new environment
         benchmark = DEFAULT_BENCHMARKS[self.benchmark_name]()
         benchmark.env_args_list = [
             elem for elem in benchmark.env_args_list if elem.task_name == self.task_name and str(elem.task_seed) == str(self.seed)
         ]
+        start = time.time()
         benchmark.prepare_backends()
+        end = time.time()
+        logger.info(f"prepare_backends done in {end - start}")
 
         env_args = benchmark.env_args_list[0]
-        # env_args.headless = False
-
         self.action_mapping = import_from_path(self.action_mapping_fn)
-        end = time.time()
-        logger.info(f"init reset done in {end - start}")
+
+        # create environment
         start = time.time()
         self.env = env_args.make_env(self.action_mapping, self.exp_dir)
+        print(self.env)
         end = time.time()
         logger.info(f"make_env done in {end - start}")
+        return make_json_safe(
+            {
+                "status": "success",
+                "message": "Environment prepared successfully.",
+            }
+        )
+
+    def reload_task(self) -> dict:
+        """Reload the task
+
+        :return: Dictionary with status
+        :rtype: dict
+        """
+        start = time.time()
+        if not self.info_set:
+            return make_json_safe(
+                {
+                    "status": "error",
+                    "message": "Environment info not set. Please set the environment info first.",
+                }
+            )
+        elif not self.env:
+            return make_json_safe(
+                {
+                    "status": "error",
+                    "message": "Environment not created. Please create an environment first.",
+                }
+            )
+
+        tmp_start = time.time()
+        self.env.unwrapped.page.goto(self.start_url, wait_until="load")
+        tmp_end = time.time()
+        logger.info(f"goto done in {tmp_end - tmp_start}")
+        tmp_start = time.time()
+        self.env.unwrapped.page.evaluate("window.localStorage.clear(); window.sessionStorage.clear();")
+
+        obs = self.env.unwrapped._get_obs()
+        tmp_end = time.time()
+        logger.info(f"clear storage done in {tmp_end - tmp_start}")
+
+        end = time.time()
+        logger.info(f"reload_task done in {end - start}")
+
+        self.last_obs = copy.deepcopy(obs)
+        self.last_info = copy.deepcopy(self.start_info)
+        return make_json_safe(
+            {
+                "status": "success",
+                "message": "Task reloaded successfully.",
+                "obs": self.last_obs,
+                "info": self.last_info,
+            }
+        )
+
+    def reset(self) -> dict:
+        """Reset the environment
+
+        :return: Dictionary with obs and info
+        :rtype: dict
+        """
         start = time.time()
+        if not self.info_set:
+            return make_json_safe(
+                {
+                    "status": "error",
+                    "message": "Environment info not set. Please set the environment info first.",
+                }
+            )
+        elif not self.env:
+            return make_json_safe(
+                {
+                    "status": "error",
+                    "message": "Environment not created. Please create an environment first.",
+                }
+            )
+
         # finally, reset the environment
+        start = time.time()
         obs, info = self.env.reset(seed=self.seed)
-        self.last_obs = copy.deepcopy(obs)
-        self.last_info = copy.deepcopy(info)
         end = time.time()
         logger.info(f"env reset done in {end - start}")
-        start = time.time()
-        # out = make_json_safe(
-        out = make_json_safe(
+
+        self.last_obs = copy.deepcopy(obs)
+        self.last_info = copy.deepcopy(info)
+        self.start_info = copy.deepcopy(info)
+        self.start_url = copy.deepcopy(self.env.unwrapped.page.url)
+        return make_json_safe(
             {
                 "status": "success",
                 "message": "Environment reset successfully",
                 "obs": self.last_obs,
                 "info": self.last_info,
             }
         )
-        end = time.time()
-        logger.info(f"payload cleaned in {end - start}")
-        # log payload size
-        from pympler import asizeof
-
-        logger.info(f"Payload size: {asizeof.asizeof(out)} bytes")
-        # print(out)
-        # return {"status": "success", "message": "Environment reset successfully"}
-        return out
 
     def step(self, action: str) -> dict:
         """Step the environment
@@ -398,6 +467,16 @@ def status():
     return env.status()
 
 
+@app.post("/prepare_benchmark")
+def prepare_benchmark():
+    return env.prepare_benchmark()
+
+
+@app.post("/reload_task")
+def reload_task():
+    return env.reload_task()
+
+
 @app.post("/reset")
 def reset():
     return env.reset()