Skip to content

Commit 15b6eb0

Browse files
authored
Merge pull request #265 from ServiceNow/deep_debug
Deep debug
2 parents f090b5c + 74a0095 commit 15b6eb0

File tree

6 files changed

+77
-10
lines changed

6 files changed

+77
-10
lines changed

.vscode/settings.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
"editor.formatOnSave": true,
44
"editor.defaultFormatter": "ms-python.black-formatter",
55
"editor.codeActionsOnSave": {
6-
"source.organizeImports": "always",
7-
"source.fixAll": "always",
6+
"source.organizeImports": "explicit",
7+
"source.fixAll": "never",
88
},
99
},
1010
// "python.analysis.languageServerMode": "full",

src/agentlab/agents/tool_use_agent/hint_db.csv

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,8 @@ June 11,miniwob.drag-items,30,claude-3-7-sonnet-20250219,MultiToolUse-claude-3-7
1616
June 18,miniwob.count-shape,23,claude-3-7-sonnet-20250219,MultiToolUse-claude-3-7-sonnet-20250219,miniwob,miniwob,allac,Shape and letters size comparison in miniwob,"Shapes or items have different colors and different size. Size is relative to the other objects in the white area and is either ""large"" or ""small"". Shapes that are larger than the average shape or letter are considered ""large"". Others are ""small""."
1717
June 18,miniwob.count-shape,23,claude-3-7-sonnet-20250219,MultiToolUse-claude-3-7-sonnet-20250219,miniwob,miniwob,allac,communicate answer in miniwob,Answer by clicking one of the buttons describing multiple choices.
1818
June 18,miniwob.count-shape,23,claude-3-7-sonnet-20250219,MultiToolUse-claude-3-7-sonnet-20250219,miniwob,miniwob,allac,Simbols of colors in miniwob,"Colors a distinct in this task, e.g., cyan is not a type of blue. "
19-
June 18,miniwob.form-sequence-2,23,claude-3-7-sonnet-20250219,MultiToolUse-claude-3-7-sonnet-20250219,miniwob,miniwob,allac,Reporting results in miniwob,Make sure to click submit to finish the task.
19+
June 18,miniwob.form-sequence-2,23,claude-3-7-sonnet-20250219,MultiToolUse-claude-3-7-sonnet-20250219,miniwob,miniwob,allac,Reporting results in miniwob,Make sure to click submit to finish the task.
20+
July 13,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,allac,Filling form in WorkArena,"If you enter the value in the wrong field, the task may be terminated immediately. The field you are looking for may be in another tab. You have to look around."
21+
July 13,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,allac,Filling form in WorkArena,"Before clicking submit, make sure that all fields are filled properly. Then click submit."
22+
July 13,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,allac,Filling form in WorkArena,Avoid back and forth from tabs to tabs to reduce the number of actions
23+
July 14,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,allac,Filling form in WorkArena,When you see auto-complete make sure to select an element from that list

src/agentlab/analyze/agent_xray.py

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,9 @@ def get_episode_info(info: Info):
926926
927927
{code(step_info.task_info)}
928928
929+
**Terminated or Truncated:**
930+
{code(f"Terminated: {step_info.terminated}, Truncated: {step_info.truncated}")}
931+
929932
**exp_dir:**
930933
931934
<small style="line-height: 1; margin: 0; padding: 0;">{code(exp_dir_str)}</small>"""
@@ -1247,8 +1250,17 @@ def plot_profiling(ax, step_info_list: list[StepInfo], summary_info: dict, progr
12471250
warning("No step info to plot")
12481251
return None
12491252

1250-
# this allows to pop labels to make sure we don't use more than 1 for the legend
1251-
labels = ["reset", "env", "agent", "exec action", "action error"]
1253+
# Updated labels to include new profiling stages
1254+
labels = [
1255+
"reset",
1256+
"env",
1257+
"agent",
1258+
"exec action",
1259+
"action error",
1260+
"wait for page",
1261+
"validation",
1262+
"get observation",
1263+
]
12521264
labels = {e: e for e in labels}
12531265

12541266
colors = plt.get_cmap("tab20c").colors
@@ -1257,6 +1269,7 @@ def plot_profiling(ax, step_info_list: list[StepInfo], summary_info: dict, progr
12571269
all_times = []
12581270
step_times = []
12591271
for i, step_info in progress_fn(list(enumerate(step_info_list)), desc="Building plot."):
1272+
assert isinstance(step_info, StepInfo), f"Expected StepInfo, got {type(step_info)}"
12601273
step = step_info.step
12611274

12621275
prof = deepcopy(step_info.profiling)
@@ -1278,6 +1291,39 @@ def plot_profiling(ax, step_info_list: list[StepInfo], summary_info: dict, progr
12781291
label = labels.pop("exec action", None)
12791292
add_patch(ax, prof.action_exec_start, prof.action_exec_stop, colors[3], label)
12801293

1294+
# NEW: Add wait for page loading visualization
1295+
if (
1296+
hasattr(prof, "wait_for_page_loading_start")
1297+
and prof.wait_for_page_loading_start > 0
1298+
):
1299+
add_patch(
1300+
ax,
1301+
prof.wait_for_page_loading_start,
1302+
prof.wait_for_page_loading_stop,
1303+
colors[19],
1304+
labels.pop("wait for page", None),
1305+
)
1306+
1307+
# NEW: Add validation visualization
1308+
if hasattr(prof, "validation_start") and prof.validation_start > 0:
1309+
add_patch(
1310+
ax,
1311+
prof.validation_start,
1312+
prof.validation_stop,
1313+
colors[8],
1314+
labels.pop("validation", None),
1315+
)
1316+
1317+
# NEW: Add get observation visualization
1318+
if hasattr(prof, "get_observation_start") and prof.get_observation_start > 0:
1319+
add_patch(
1320+
ax,
1321+
prof.get_observation_start,
1322+
prof.get_observation_stop,
1323+
colors[12],
1324+
labels.pop("get observation", None),
1325+
)
1326+
12811327
try:
12821328
next_step_error = step_info_list[i + 1].obs["last_action_error"]
12831329
except (IndexError, KeyError, TypeError):
@@ -1344,7 +1390,6 @@ def plot_profiling(ax, step_info_list: list[StepInfo], summary_info: dict, progr
13441390

13451391
ax.set_ylim(0, 1)
13461392
ax.set_xlim(0, max(all_times) + 1)
1347-
# plt.gca().autoscale()
13481393

13491394
ax.set_xlabel("Time")
13501395
ax.set_yticks([])
@@ -1353,7 +1398,7 @@ def plot_profiling(ax, step_info_list: list[StepInfo], summary_info: dict, progr
13531398
ax.legend(
13541399
loc="upper center",
13551400
bbox_to_anchor=(0.5, 1.2),
1356-
ncol=5,
1401+
ncol=8, # Updated to accommodate new labels
13571402
frameon=True,
13581403
)
13591404

src/agentlab/analyze/overlay_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,6 @@ def overlay_rectangle(
299299

300300
if dashed:
301301
# Draw dashed rectangle
302-
print("Drawing dashed rectangle")
303302
linedashed(draw, x, y, x + w, y, color, width)
304303
linedashed(draw, x + w, y, x + w, y + h, color, width)
305304
linedashed(draw, x + w, y + h, x, y + h, color, width)

src/agentlab/experiments/loop.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import numpy as np
2121
from browsergym.core.chat import Chat
2222
from browsergym.experiments.agent import Agent
23-
from browsergym.experiments.utils import count_messages_token, count_tokens
23+
from browsergym.experiments.utils import count_tokens
2424
from dataclasses_json import DataClassJsonMixin
2525
from PIL import Image
2626
from tqdm import tqdm
@@ -48,6 +48,7 @@ class EnvArgs(DataClassJsonMixin):
4848
slow_mo: Optional[int] = None # use default value from BrowserGym
4949
storage_state: Optional[str | Path | dict] = None
5050
task_kwargs: Optional[dict] = None # use default value from BrowserGym
51+
pre_observation_delay: float = None # seconds, wait for JS events to be fired
5152

5253
def make_env(
5354
self, action_mapping, exp_dir, exp_task_kwargs: dict = {}, use_raw_page_output=True
@@ -71,6 +72,8 @@ def make_env(
7172
extra_kwargs["viewport"] = self.viewport
7273
if self.slow_mo is not None:
7374
extra_kwargs["slow_mo"] = self.slow_mo
75+
if self.pre_observation_delay is not None:
76+
extra_kwargs["pre_observation_delay"] = self.pre_observation_delay
7477
if self.storage_state:
7578
extra_kwargs["pw_context_kwargs"] = {"storage_state": self.storage_state}
7679
if self.task_kwargs is not None:
@@ -142,6 +145,12 @@ class StepTimestamps:
142145
env_stop: float = 0
143146
agent_start: float = 0
144147
agent_stop: float = 0
148+
wait_for_page_loading_start: float = 0
149+
wait_for_page_loading_stop: float = 0
150+
validation_start: float = 0
151+
validation_stop: float = 0
152+
get_observation_start: float = 0
153+
get_observation_stop: float = 0
145154

146155

147156
@dataclass
@@ -199,6 +208,12 @@ def from_step(self, env: gym.Env, action: str, obs_preprocessor: callable):
199208
t.action_exec_start = env_info["action_exec_start"] # start
200209
t.action_exect_after_timeout = env_info["action_exec_stop"]
201210
t.action_exec_stop = env_info["action_exec_stop"] - env_info["action_exec_timeout"]
211+
t.wait_for_page_loading_start = env_info.get("wait_for_page_loading_start", None)
212+
t.wait_for_page_loading_stop = env_info.get("wait_for_page_loading_stop", None)
213+
t.validation_start = env_info.get("validation_start", None)
214+
t.validation_stop = env_info.get("validation_stop", None)
215+
t.get_observation_start = env_info.get("get_observation_start", None)
216+
t.get_observation_stop = env_info.get("get_observation_stop", None)
202217

203218
if obs_preprocessor:
204219
self.obs = obs_preprocessor(self.obs)
@@ -447,6 +462,10 @@ def run(self):
447462
logger.debug("Sending action to environment.")
448463
step_info.from_step(env, action, obs_preprocessor=agent.obs_preprocessor)
449464
logger.debug("Environment stepped.")
465+
if step_info.is_done:
466+
logger.debug(
467+
f"Episode done: terminated: {step_info.terminated}, truncated: {step_info.truncated}."
468+
)
450469

451470
except Exception as e:
452471
err_msg = f"Exception uncaught by agent or environment in task {self.env_args.task_name}.\n{type(e).__name__}:\n{e}"

src/agentlab/experiments/study.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,7 @@ def set_demo_mode(env_args_list: list[EnvArgs]):
726726
env_args.slow_mo = 1000
727727

728728

729-
def _convert_env_args(env_args_list):
729+
def _convert_env_args(env_args_list) -> list[EnvArgs]:
730730
"""Return a list where every element is the *new* EnvArgs.
731731
732732
For backward compatibility, we need to convert the old EnvArgs to the new one.

0 commit comments

Comments
 (0)