Skip to content

Commit 6b78e8e

Browse files
revert change to ipynb
1 parent 88d1d8d commit 6b78e8e

File tree

1 file changed

+7
-251
lines changed

1 file changed

+7
-251
lines changed

tutorials/2_eval_on_miniwob/inspect_results.ipynb

Lines changed: 7 additions & 251 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": null,
66
"id": "58086537",
77
"metadata": {},
88
"outputs": [],
@@ -25,47 +25,10 @@
2525
},
2626
{
2727
"cell_type": "code",
28-
"execution_count": 5,
29-
"id": "7901cccc",
30-
"metadata": {},
31-
"outputs": [
32-
{
33-
"data": {
34-
"text/plain": [
35-
"PosixPath('/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results')"
36-
]
37-
},
38-
"execution_count": 5,
39-
"metadata": {},
40-
"output_type": "execute_result"
41-
}
42-
],
43-
"source": [
44-
"RESULTS_DIR"
45-
]
46-
},
47-
{
48-
"cell_type": "code",
49-
"execution_count": 6,
28+
"execution_count": null,
5029
"id": "50be19a9",
5130
"metadata": {},
52-
"outputs": [
53-
{
54-
"name": "stdout",
55-
"output_type": "stream",
56-
"text": [
57-
"/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results/2025-09-02_15-52-00_hitl-genericagent-gpt-5-mini-2025-08-07-on-workarena-l1-task-name-create\n"
58-
]
59-
},
60-
{
61-
"name": "stderr",
62-
"output_type": "stream",
63-
"text": [
64-
"Searching experiments directories.: 100%|██████████| 1/1 [00:00<00:00, 5433.04it/s]\n",
65-
"Loading results: 100%|██████████| 1/1 [00:00<00:00, 373.26it/s]\n"
66-
]
67-
}
68-
],
31+
"outputs": [],
6932
"source": [
7033
"# replace this by your desired directory if needed.\n",
7134
"result_dir = get_most_recent_study(RESULTS_DIR, contains=None)\n",
@@ -76,222 +39,15 @@
7639
},
7740
{
7841
"cell_type": "code",
79-
"execution_count": 7,
80-
"id": "82cc1557",
81-
"metadata": {},
82-
"outputs": [
83-
{
84-
"data": {
85-
"text/plain": [
86-
"PosixPath('/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results/2025-09-02_15-52-00_hitl-genericagent-gpt-5-mini-2025-08-07-on-workarena-l1-task-name-create')"
87-
]
88-
},
89-
"execution_count": 7,
90-
"metadata": {},
91-
"output_type": "execute_result"
92-
}
93-
],
94-
"source": [
95-
"result_dir"
96-
]
97-
},
98-
{
99-
"cell_type": "code",
100-
"execution_count": 3,
42+
"execution_count": null,
10143
"id": "a424c470",
10244
"metadata": {},
103-
"outputs": [
104-
{
105-
"name": "stdout",
106-
"output_type": "stream",
107-
"text": [
108-
"Found multiple configuration, averaging across tasks and returning a per-agent report.\n"
109-
]
110-
},
111-
{
112-
"data": {
113-
"text/html": [
114-
"<style type=\"text/css\">\n",
115-
"#T_1d2fe th {\n",
116-
" white-space: pre-wrap;\n",
117-
"}\n",
118-
"</style>\n",
119-
"<table id=\"T_1d2fe\">\n",
120-
" <thead>\n",
121-
" <tr>\n",
122-
" <th class=\"blank level0\" >&nbsp;</th>\n",
123-
" <th id=\"T_1d2fe_level0_col0\" class=\"col_heading level0 col0\" >agent.agent\n",
124-
"name</th>\n",
125-
" <th id=\"T_1d2fe_level0_col1\" class=\"col_heading level0 col1\" >env.benchmark</th>\n",
126-
" <th id=\"T_1d2fe_level0_col2\" class=\"col_heading level0 col2\" >avg\n",
127-
"reward</th>\n",
128-
" <th id=\"T_1d2fe_level0_col3\" class=\"col_heading level0 col3\" >std\n",
129-
"err</th>\n",
130-
" <th id=\"T_1d2fe_level0_col4\" class=\"col_heading level0 col4\" >avg\n",
131-
"steps</th>\n",
132-
" <th id=\"T_1d2fe_level0_col5\" class=\"col_heading level0 col5\" >n\n",
133-
"completed</th>\n",
134-
" <th id=\"T_1d2fe_level0_col6\" class=\"col_heading level0 col6\" >n\n",
135-
"err</th>\n",
136-
" </tr>\n",
137-
" </thead>\n",
138-
" <tbody>\n",
139-
" <tr>\n",
140-
" <th id=\"T_1d2fe_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
141-
" <td id=\"T_1d2fe_row0_col0\" class=\"data row0 col0\" >HITL-GenericAgent-gpt-5-mini-2025-08-07</td>\n",
142-
" <td id=\"T_1d2fe_row0_col1\" class=\"data row0 col1\" >workarena</td>\n",
143-
" <td id=\"T_1d2fe_row0_col2\" class=\"data row0 col2\" >nan</td>\n",
144-
" <td id=\"T_1d2fe_row0_col3\" class=\"data row0 col3\" >nan</td>\n",
145-
" <td id=\"T_1d2fe_row0_col4\" class=\"data row0 col4\" >nan</td>\n",
146-
" <td id=\"T_1d2fe_row0_col5\" class=\"data row0 col5\" >0/1</td>\n",
147-
" <td id=\"T_1d2fe_row0_col6\" class=\"data row0 col6\" >0</td>\n",
148-
" </tr>\n",
149-
" </tbody>\n",
150-
"</table>\n"
151-
],
152-
"text/plain": [
153-
"<pandas.io.formats.style.Styler at 0x125c55850>"
154-
]
155-
},
156-
"metadata": {},
157-
"output_type": "display_data"
158-
}
159-
],
45+
"outputs": [],
16046
"source": [
16147
"report = inspect_results.global_report(result_df)\n",
16248
"inspect_results.display_report(report)"
16349
]
16450
},
165-
{
166-
"cell_type": "code",
167-
"execution_count": null,
168-
"id": "f86e44fd",
169-
"metadata": {},
170-
"outputs": [
171-
{
172-
"data": {
173-
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
174-
"columns": [
175-
{
176-
"name": "('agent.agent_name', 'env.benchmark')",
177-
"rawType": "object",
178-
"type": "unknown"
179-
},
180-
{
181-
"name": "avg_reward",
182-
"rawType": "float64",
183-
"type": "float"
184-
},
185-
{
186-
"name": "std_err",
187-
"rawType": "float64",
188-
"type": "float"
189-
},
190-
{
191-
"name": "avg_steps",
192-
"rawType": "float64",
193-
"type": "float"
194-
},
195-
{
196-
"name": "n_completed",
197-
"rawType": "object",
198-
"type": "string"
199-
},
200-
{
201-
"name": "n_err",
202-
"rawType": "int64",
203-
"type": "integer"
204-
}
205-
],
206-
"ref": "ea68795e-a1d8-404e-9e36-1061d8fa9e87",
207-
"rows": [
208-
[
209-
"('HITL-GenericAgent-gpt-5-mini-2025-08-07', 'workarena')",
210-
null,
211-
null,
212-
null,
213-
"0/1",
214-
"0"
215-
]
216-
],
217-
"shape": {
218-
"columns": 5,
219-
"rows": 1
220-
}
221-
},
222-
"text/html": [
223-
"<div>\n",
224-
"<style scoped>\n",
225-
" .dataframe tbody tr th:only-of-type {\n",
226-
" vertical-align: middle;\n",
227-
" }\n",
228-
"\n",
229-
" .dataframe tbody tr th {\n",
230-
" vertical-align: top;\n",
231-
" }\n",
232-
"\n",
233-
" .dataframe thead th {\n",
234-
" text-align: right;\n",
235-
" }\n",
236-
"</style>\n",
237-
"<table border=\"1\" class=\"dataframe\">\n",
238-
" <thead>\n",
239-
" <tr style=\"text-align: right;\">\n",
240-
" <th></th>\n",
241-
" <th></th>\n",
242-
" <th>avg_reward</th>\n",
243-
" <th>std_err</th>\n",
244-
" <th>avg_steps</th>\n",
245-
" <th>n_completed</th>\n",
246-
" <th>n_err</th>\n",
247-
" </tr>\n",
248-
" <tr>\n",
249-
" <th>agent.agent_name</th>\n",
250-
" <th>env.benchmark</th>\n",
251-
" <th></th>\n",
252-
" <th></th>\n",
253-
" <th></th>\n",
254-
" <th></th>\n",
255-
" <th></th>\n",
256-
" </tr>\n",
257-
" </thead>\n",
258-
" <tbody>\n",
259-
" <tr>\n",
260-
" <th>HITL-GenericAgent-gpt-5-mini-2025-08-07</th>\n",
261-
" <th>workarena</th>\n",
262-
" <td>NaN</td>\n",
263-
" <td>NaN</td>\n",
264-
" <td>NaN</td>\n",
265-
" <td>0/1</td>\n",
266-
" <td>0</td>\n",
267-
" </tr>\n",
268-
" </tbody>\n",
269-
"</table>\n",
270-
"</div>"
271-
],
272-
"text/plain": [
273-
" avg_reward std_err \\\n",
274-
"agent.agent_name env.benchmark \n",
275-
"HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena NaN NaN \n",
276-
"\n",
277-
" avg_steps n_completed \\\n",
278-
"agent.agent_name env.benchmark \n",
279-
"HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena NaN 0/1 \n",
280-
"\n",
281-
" n_err \n",
282-
"agent.agent_name env.benchmark \n",
283-
"HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena 0 "
284-
]
285-
},
286-
"execution_count": 4,
287-
"metadata": {},
288-
"output_type": "execute_result"
289-
}
290-
],
291-
"source": [
292-
"\n"
293-
]
294-
},
29551
{
29652
"cell_type": "markdown",
29753
"id": "385559d7",
@@ -393,7 +149,7 @@
393149
],
394150
"metadata": {
395151
"kernelspec": {
396-
"display_name": "agentlab",
152+
"display_name": "AgentLab",
397153
"language": "python",
398154
"name": "python3"
399155
},
@@ -407,7 +163,7 @@
407163
"name": "python",
408164
"nbconvert_exporter": "python",
409165
"pygments_lexer": "ipython3",
410-
"version": "3.12.9"
166+
"version": "3.12.7"
411167
}
412168
},
413169
"nbformat": 4,

0 commit comments

Comments
 (0)