Skip to content

Commit 958430c

Browse files
add docstring for functions and black
1 parent 51cacdb commit 958430c

File tree

4 files changed

+281
-13
lines changed

4 files changed

+281
-13
lines changed

src/agentlab/agents/agent_utils.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import copy
2+
23
from PIL import Image, ImageDraw
34
from playwright.sync_api import Page
4-
from agentlab.llm.llm_utils import img_to_base_64
5+
56
from agentlab.analyze import overlay_utils
7+
from agentlab.llm.llm_utils import img_to_base_64
68

79

810
def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
@@ -139,6 +141,3 @@ def overlay_action(obs, action):
139141
act_img = Image.fromarray(act_img)
140142
overlay_utils.annotate_action(act_img, action, properties=obs["extra_element_properties"])
141143
return img_to_base_64(act_img)
142-
143-
144-

src/agentlab/agents/hitl_agent/generic_human_guided_agent.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from PIL import Image
1212

1313
from agentlab.agents import dynamic_prompting as dp
14+
from agentlab.agents.agent_utils import overlay_action
1415
from agentlab.agents.generic_agent.generic_agent import GenericAgent, GenericAgentArgs
1516
from agentlab.agents.generic_agent.generic_agent_prompt import MainPrompt
1617
from agentlab.agents.hitl_agent.hint_labelling import (
@@ -22,10 +23,10 @@
2223
Discussion,
2324
HumanMessage,
2425
SystemMessage,
26+
img_to_base_64,
2527
)
2628
from agentlab.llm.tracking import cost_tracker_decorator
27-
from agentlab.agents.agent_utils import overlay_action
28-
from agentlab.llm.llm_utils import img_to_base_64
29+
2930

3031
class CandidatesGeneration(dp.PromptElement):
3132
# Ask for multiple alternatives; each candidate must contain <think> and <action>.
@@ -323,6 +324,16 @@ def get_action(self, obs):
323324

324325

325326
def get_base_agent(llm_config):
327+
"""Creates and returns a MultipleProposalGenericAgentArgs instance with
328+
specified LLM configuration from CHAT_MODEL_ARGS_DICT.
329+
330+
Args:
331+
llm_config: The LLM configuration key to use from CHAT_MODEL_ARGS_DICT.
332+
333+
Returns:
334+
MultipleProposalGenericAgentArgs: Configured agent arguments instance.
335+
"""
336+
326337
from agentlab.agents.generic_agent.tmlr_config import BASE_FLAGS
327338
from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT
328339

src/agentlab/agents/hitl_agent/hitl_agent.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def get_action(self, obs):
9090
candidates = self.subagent.get_candidate_generations(
9191
obs, hint=step_hint if step_hint else None, n_candidates=3
9292
)
93+
step_n_human_intervention_rounds += 1
9394
suggestions = [
9495
{"action": c["action"], "think": c["agent_info"].think} for c in candidates
9596
]
@@ -156,6 +157,19 @@ def set_reproducibility_mode(self):
156157

157158

158159
def get_base_human_in_the_loop_genericagent(llm_config):
160+
"""
161+
Create a base human-in-the-loop generic agent configuration using the key from CHAT_MODEL_ARGS_DICT.
162+
163+
This function creates a HumanInTheLoopAgentArgs instance with a MultiCandidateGenericAgent
164+
as the subagent, configured with the specified LLM configuration and base flags.
165+
166+
Args:
167+
llm_config (str): The LLM configuration key to use from CHAT_MODEL_ARGS_DICT.
168+
169+
Returns:
170+
HumanInTheLoopAgentArgs: Configured human-in-the-loop agent arguments with
171+
a multi-candidate generic agent as the subagent.
172+
"""
159173
from agentlab.agents.generic_agent.tmlr_config import BASE_FLAGS
160174
from agentlab.agents.hitl_agent.hitl_agent import HumanInTheLoopAgentArgs
161175
from agentlab.agents.hitl_agent.multi_candidate_generic_agent import (

tutorials/2_eval_on_miniwob/inspect_results.ipynb

Lines changed: 251 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 1,
66
"id": "58086537",
77
"metadata": {},
88
"outputs": [],
@@ -25,10 +25,47 @@
2525
},
2626
{
2727
"cell_type": "code",
28-
"execution_count": null,
28+
"execution_count": 5,
29+
"id": "7901cccc",
30+
"metadata": {},
31+
"outputs": [
32+
{
33+
"data": {
34+
"text/plain": [
35+
"PosixPath('/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results')"
36+
]
37+
},
38+
"execution_count": 5,
39+
"metadata": {},
40+
"output_type": "execute_result"
41+
}
42+
],
43+
"source": [
44+
"RESULTS_DIR"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": 6,
2950
"id": "50be19a9",
3051
"metadata": {},
31-
"outputs": [],
52+
"outputs": [
53+
{
54+
"name": "stdout",
55+
"output_type": "stream",
56+
"text": [
57+
"/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results/2025-09-02_15-52-00_hitl-genericagent-gpt-5-mini-2025-08-07-on-workarena-l1-task-name-create\n"
58+
]
59+
},
60+
{
61+
"name": "stderr",
62+
"output_type": "stream",
63+
"text": [
64+
"Searching experiments directories.: 100%|██████████| 1/1 [00:00<00:00, 5433.04it/s]\n",
65+
"Loading results: 100%|██████████| 1/1 [00:00<00:00, 373.26it/s]\n"
66+
]
67+
}
68+
],
3269
"source": [
3370
"# replace this by your desired directory if needed.\n",
3471
"result_dir = get_most_recent_study(RESULTS_DIR, contains=None)\n",
@@ -39,15 +76,222 @@
3976
},
4077
{
4178
"cell_type": "code",
42-
"execution_count": null,
79+
"execution_count": 7,
80+
"id": "82cc1557",
81+
"metadata": {},
82+
"outputs": [
83+
{
84+
"data": {
85+
"text/plain": [
86+
"PosixPath('/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results/2025-09-02_15-52-00_hitl-genericagent-gpt-5-mini-2025-08-07-on-workarena-l1-task-name-create')"
87+
]
88+
},
89+
"execution_count": 7,
90+
"metadata": {},
91+
"output_type": "execute_result"
92+
}
93+
],
94+
"source": [
95+
"result_dir"
96+
]
97+
},
98+
{
99+
"cell_type": "code",
100+
"execution_count": 3,
43101
"id": "a424c470",
44102
"metadata": {},
45-
"outputs": [],
103+
"outputs": [
104+
{
105+
"name": "stdout",
106+
"output_type": "stream",
107+
"text": [
108+
"Found multiple configuration, averaging across tasks and returning a per-agent report.\n"
109+
]
110+
},
111+
{
112+
"data": {
113+
"text/html": [
114+
"<style type=\"text/css\">\n",
115+
"#T_1d2fe th {\n",
116+
" white-space: pre-wrap;\n",
117+
"}\n",
118+
"</style>\n",
119+
"<table id=\"T_1d2fe\">\n",
120+
" <thead>\n",
121+
" <tr>\n",
122+
" <th class=\"blank level0\" >&nbsp;</th>\n",
123+
" <th id=\"T_1d2fe_level0_col0\" class=\"col_heading level0 col0\" >agent.agent\n",
124+
"name</th>\n",
125+
" <th id=\"T_1d2fe_level0_col1\" class=\"col_heading level0 col1\" >env.benchmark</th>\n",
126+
" <th id=\"T_1d2fe_level0_col2\" class=\"col_heading level0 col2\" >avg\n",
127+
"reward</th>\n",
128+
" <th id=\"T_1d2fe_level0_col3\" class=\"col_heading level0 col3\" >std\n",
129+
"err</th>\n",
130+
" <th id=\"T_1d2fe_level0_col4\" class=\"col_heading level0 col4\" >avg\n",
131+
"steps</th>\n",
132+
" <th id=\"T_1d2fe_level0_col5\" class=\"col_heading level0 col5\" >n\n",
133+
"completed</th>\n",
134+
" <th id=\"T_1d2fe_level0_col6\" class=\"col_heading level0 col6\" >n\n",
135+
"err</th>\n",
136+
" </tr>\n",
137+
" </thead>\n",
138+
" <tbody>\n",
139+
" <tr>\n",
140+
" <th id=\"T_1d2fe_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
141+
" <td id=\"T_1d2fe_row0_col0\" class=\"data row0 col0\" >HITL-GenericAgent-gpt-5-mini-2025-08-07</td>\n",
142+
" <td id=\"T_1d2fe_row0_col1\" class=\"data row0 col1\" >workarena</td>\n",
143+
" <td id=\"T_1d2fe_row0_col2\" class=\"data row0 col2\" >nan</td>\n",
144+
" <td id=\"T_1d2fe_row0_col3\" class=\"data row0 col3\" >nan</td>\n",
145+
" <td id=\"T_1d2fe_row0_col4\" class=\"data row0 col4\" >nan</td>\n",
146+
" <td id=\"T_1d2fe_row0_col5\" class=\"data row0 col5\" >0/1</td>\n",
147+
" <td id=\"T_1d2fe_row0_col6\" class=\"data row0 col6\" >0</td>\n",
148+
" </tr>\n",
149+
" </tbody>\n",
150+
"</table>\n"
151+
],
152+
"text/plain": [
153+
"<pandas.io.formats.style.Styler at 0x125c55850>"
154+
]
155+
},
156+
"metadata": {},
157+
"output_type": "display_data"
158+
}
159+
],
46160
"source": [
47161
"report = inspect_results.global_report(result_df)\n",
48162
"inspect_results.display_report(report)"
49163
]
50164
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": null,
168+
"id": "f86e44fd",
169+
"metadata": {},
170+
"outputs": [
171+
{
172+
"data": {
173+
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
174+
"columns": [
175+
{
176+
"name": "('agent.agent_name', 'env.benchmark')",
177+
"rawType": "object",
178+
"type": "unknown"
179+
},
180+
{
181+
"name": "avg_reward",
182+
"rawType": "float64",
183+
"type": "float"
184+
},
185+
{
186+
"name": "std_err",
187+
"rawType": "float64",
188+
"type": "float"
189+
},
190+
{
191+
"name": "avg_steps",
192+
"rawType": "float64",
193+
"type": "float"
194+
},
195+
{
196+
"name": "n_completed",
197+
"rawType": "object",
198+
"type": "string"
199+
},
200+
{
201+
"name": "n_err",
202+
"rawType": "int64",
203+
"type": "integer"
204+
}
205+
],
206+
"ref": "ea68795e-a1d8-404e-9e36-1061d8fa9e87",
207+
"rows": [
208+
[
209+
"('HITL-GenericAgent-gpt-5-mini-2025-08-07', 'workarena')",
210+
null,
211+
null,
212+
null,
213+
"0/1",
214+
"0"
215+
]
216+
],
217+
"shape": {
218+
"columns": 5,
219+
"rows": 1
220+
}
221+
},
222+
"text/html": [
223+
"<div>\n",
224+
"<style scoped>\n",
225+
" .dataframe tbody tr th:only-of-type {\n",
226+
" vertical-align: middle;\n",
227+
" }\n",
228+
"\n",
229+
" .dataframe tbody tr th {\n",
230+
" vertical-align: top;\n",
231+
" }\n",
232+
"\n",
233+
" .dataframe thead th {\n",
234+
" text-align: right;\n",
235+
" }\n",
236+
"</style>\n",
237+
"<table border=\"1\" class=\"dataframe\">\n",
238+
" <thead>\n",
239+
" <tr style=\"text-align: right;\">\n",
240+
" <th></th>\n",
241+
" <th></th>\n",
242+
" <th>avg_reward</th>\n",
243+
" <th>std_err</th>\n",
244+
" <th>avg_steps</th>\n",
245+
" <th>n_completed</th>\n",
246+
" <th>n_err</th>\n",
247+
" </tr>\n",
248+
" <tr>\n",
249+
" <th>agent.agent_name</th>\n",
250+
" <th>env.benchmark</th>\n",
251+
" <th></th>\n",
252+
" <th></th>\n",
253+
" <th></th>\n",
254+
" <th></th>\n",
255+
" <th></th>\n",
256+
" </tr>\n",
257+
" </thead>\n",
258+
" <tbody>\n",
259+
" <tr>\n",
260+
" <th>HITL-GenericAgent-gpt-5-mini-2025-08-07</th>\n",
261+
" <th>workarena</th>\n",
262+
" <td>NaN</td>\n",
263+
" <td>NaN</td>\n",
264+
" <td>NaN</td>\n",
265+
" <td>0/1</td>\n",
266+
" <td>0</td>\n",
267+
" </tr>\n",
268+
" </tbody>\n",
269+
"</table>\n",
270+
"</div>"
271+
],
272+
"text/plain": [
273+
" avg_reward std_err \\\n",
274+
"agent.agent_name env.benchmark \n",
275+
"HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena NaN NaN \n",
276+
"\n",
277+
" avg_steps n_completed \\\n",
278+
"agent.agent_name env.benchmark \n",
279+
"HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena NaN 0/1 \n",
280+
"\n",
281+
" n_err \n",
282+
"agent.agent_name env.benchmark \n",
283+
"HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena 0 "
284+
]
285+
},
286+
"execution_count": 4,
287+
"metadata": {},
288+
"output_type": "execute_result"
289+
}
290+
],
291+
"source": [
292+
"\n"
293+
]
294+
},
51295
{
52296
"cell_type": "markdown",
53297
"id": "385559d7",
@@ -149,7 +393,7 @@
149393
],
150394
"metadata": {
151395
"kernelspec": {
152-
"display_name": "AgentLab",
396+
"display_name": "agentlab",
153397
"language": "python",
154398
"name": "python3"
155399
},
@@ -163,7 +407,7 @@
163407
"name": "python",
164408
"nbconvert_exporter": "python",
165409
"pygments_lexer": "ipython3",
166-
"version": "3.12.7"
410+
"version": "3.12.9"
167411
}
168412
},
169413
"nbformat": 4,

0 commit comments

Comments
 (0)