Skip to content

Commit 4ccbf41

Browse files
Update repro o1 o3 mini (#216)
* add o1-mini + o3-mini configs * Add o1-mini + o1-mini to repro journal * remove duplicate o3-mini
1 parent 68fd4fd commit 4ccbf41

File tree

5 files changed

+47
-3
lines changed

5 files changed

+47
-3
lines changed

add_study_to_repro_journal.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import os
2+
from pathlib import Path
3+
from agentlab.experiments.study import Study
4+
5+
6+
base_dir = "/home/toolkit/ui_copilot_results"
7+
8+
exp_paths = [
9+
"2025-01-31_22-08-34_genericagent-o3-mini-2025-01-31-on-workarena-l1",
10+
# '2025-02-02_01-53-45_genericagent-openai-o1-mini-2024-09-12-on-workarena-l1',
11+
"2025-02-02_01-55-04_genericagent-openai-o1-mini-2024-09-12-on-workarena-l1",
12+
]
13+
full_paths = [os.path.join(base_dir, exp_path) for exp_path in exp_paths]
14+
15+
for full_path in full_paths:
16+
study = Study.load(Path(full_path))
17+
18+
study.append_to_journal(strict_reproducibility=False)

reproducibility_journal.csv

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,12 @@ ThibaultLSDC,GenericAgent-gpt-4o-mini_vision,visualwebarena,0.13.3,2024-12-02_02
6464
ThibaultLSDC,GenericAgent-gpt-4o_vision,visualwebarena,0.13.3,2024-12-02_07-17-28,7fb7eac8-4bbd-4ebe-be32-15901a7678f2,0.267,0.015,65,910/910,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,df7bc706f3793f47a456d1bda0485b306b8cf612,,0.13.3,None,
6565
ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta_vision,visualwebarena,0.13.3,2024-12-02_09-11-35,22f0611d-aeea-4ee9-a533-b45442b5e080,0.21,0.013,178,910/910,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,df7bc706f3793f47a456d1bda0485b306b8cf612,,0.13.3,None,
6666
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,webarena,0.13.3,2024-12-02_23-18-38,fc5747bc-d998-4942-a0eb-e55a3ccc1cb3,0.184,0.014,213,811/812,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,df7bc706f3793f47a456d1bda0485b306b8cf612,,0.13.3,None,
67-
67+
Leo Boisvert,GenericAgent-o3-mini-2025-01-31,workarena_l1,0.4.1,2025-01-31_22-08-33,a74cc00f-f743-43a1-9cab-59af8bffa3a2,0.482,0.028,3,330/330,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.3,1.44.0,v0.3.2,73baabee6d7ac37a5b8677c80baf83914a4f4dc4," M: src/agentlab/agents/generic_agent/__init__.py
68+
M: src/agentlab/agents/generic_agent/agent_configs.py
69+
M: src/agentlab/analyze/agent_xray.py
70+
M: src/agentlab/llm/chat_api.py
71+
M: src/agentlab/llm/llm_configs.py",0.13.3,1d2d7160e5b7ec9954ecb48988f71eb56288dd29,"
72+
Leo Boisvert,GenericAgent-openai_o1-mini-2024-09-12,workarena_l1,0.4.1,2025-02-02_01-55-04,f3e1fcb8-5fc5-4115-9e00-27251508e2c7,0.518,0.028,5,330/330,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.3,1.44.0,v0.3.2,73baabee6d7ac37a5b8677c80baf83914a4f4dc4," M: src/agentlab/agents/generic_agent/__init__.py
73+
M: src/agentlab/agents/generic_agent/agent_configs.py
74+
M: src/agentlab/analyze/agent_xray.py
75+
M: src/agentlab/llm/llm_configs.py",0.13.3,1d2d7160e5b7ec9954ecb48988f71eb56288dd29,"

src/agentlab/agents/generic_agent/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,17 @@
1717
AGENT_4o_MINI,
1818
AGENT_CLAUDE_SONNET_35,
1919
AGENT_4o_VISION,
20-
AGENT_4o_MINI_VISION,
21-
AGENT_CLAUDE_SONNET_35_VISION,
20+
AGENT_o3_MINI,
21+
AGENT_o1_MINI,
2222
)
2323

2424
__all__ = [
2525
"AGENT_3_5",
2626
"AGENT_4o",
2727
"AGENT_4o_MINI",
2828
"AGENT_4o_VISION",
29+
"AGENT_o3_MINI",
30+
"AGENT_o1_MINI",
2931
"AGENT_LLAMA3_70B",
3032
"AGENT_LLAMA31_70B",
3133
"AGENT_8B",

src/agentlab/agents/generic_agent/agent_configs.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,15 @@
265265
flags=FLAGS_GPT_4o,
266266
)
267267

268+
AGENT_o3_MINI = GenericAgentArgs(
269+
chat_model_args=CHAT_MODEL_ARGS_DICT["openai/o3-mini-2025-01-31"],
270+
flags=FLAGS_GPT_4o,
271+
)
272+
273+
AGENT_o1_MINI = GenericAgentArgs(
274+
chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/openai/o1-mini-2024-09-12"],
275+
flags=FLAGS_GPT_4o,
276+
)
268277
# GPT-4o vision default config
269278
FLAGS_GPT_4o_VISION = FLAGS_GPT_4o.copy()
270279
FLAGS_GPT_4o_VISION.obs.use_screenshot = True

src/agentlab/llm/llm_configs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@
6363
max_input_tokens=16_384,
6464
max_new_tokens=4096,
6565
),
66+
"openai/o1-mini": OpenAIModelArgs(
67+
model_name="openai/o1-mini",
68+
max_total_tokens=128_000,
69+
max_input_tokens=128_000,
70+
max_new_tokens=64_000,
71+
temperature=1e-1,
72+
),
6673
"azure/gpt-35-turbo/gpt-35-turbo": AzureModelArgs(
6774
model_name="gpt-35-turbo",
6875
deployment_name="gpt-35-turbo",

0 commit comments

Comments
 (0)