Skip to content

Commit 3608dd6

Browse files
committed
exp for new models
1 parent 53c16f1 commit 3608dd6

File tree

3 files changed

+94
-6
lines changed

3 files changed

+94
-6
lines changed

main_exp_new_models.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
Note: This script is a convenience script to launch experiments instead of using
3+
the command line.
4+
5+
Copy this script and modify at will, but don't push your changes to the
6+
repository.
7+
"""
8+
9+
import logging
10+
11+
from agentlab.agents.generic_agent import (
12+
CHAT_MODEL_ARGS_DICT,
13+
FLAGS_GPT_4o,
14+
GenericAgentArgs,
15+
)
16+
from agentlab.experiments.study import Study
17+
18+
logging.getLogger().setLevel(logging.INFO)
19+
20+
agent_args = [
21+
GenericAgentArgs(
22+
chat_model_args=CHAT_MODEL_ARGS_DICT["openai/gpt-4.1-mini-2025-04-14"],
23+
flags=FLAGS_GPT_4o,
24+
)
25+
]
26+
27+
28+
# ## select the benchmark to run on
29+
benchmark = "miniwob_tiny_test"
30+
# benchmark = "miniwob"
31+
# benchmark = "workarena_l1"
32+
# benchmark = "workarena_l2"
33+
# benchmark = "workarena_l3"
34+
# benchmark = "webarena"
35+
36+
# Set reproducibility_mode = True for reproducibility
37+
# this will "ask" agents to be deterministic. Also, it will prevent you from launching if you have
38+
# local changes. For your custom agents you need to implement set_reproducibility_mode
39+
reproducibility_mode = False
40+
41+
# Set relaunch = True to relaunch an existing study, this will continue incomplete
42+
# experiments and relaunch errored experiments
43+
relaunch = False
44+
45+
## Number of parallel jobs
46+
n_jobs = 4 # Make sure to use 1 job when debugging in VSCode
47+
# n_jobs = -1 # to use all available cores
48+
49+
50+
if __name__ == "__main__": # necessary for dask backend
51+
52+
if reproducibility_mode:
53+
[a.set_reproducibility_mode() for a in agent_args]
54+
55+
if relaunch:
56+
# relaunch an existing study
57+
study = Study.load_most_recent(contains=None)
58+
study.find_incomplete(include_errors=True)
59+
60+
else:
61+
study = Study(agent_args, benchmark, logging_level_stdout=logging.WARNING)
62+
63+
study.run(
64+
n_jobs=n_jobs,
65+
parallel_backend="ray", # "ray", "joblib" or "sequential"
66+
strict_reproducibility=reproducibility_mode,
67+
n_relaunch=3,
68+
)
69+
70+
if reproducibility_mode:
71+
study.append_to_journal(strict_reproducibility=True)

src/agentlab/agents/generic_agent/__init__.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,23 @@
99
from .agent_configs import (
1010
AGENT_3_5,
1111
AGENT_8B,
12+
AGENT_37_SONNET,
13+
AGENT_CLAUDE_SONNET_35,
14+
AGENT_CLAUDE_SONNET_35_VISION,
1215
AGENT_CUSTOM,
13-
AGENT_LLAMA4_17B_INSTRUCT,
1416
AGENT_LLAMA3_70B,
17+
AGENT_LLAMA4_17B_INSTRUCT,
1518
AGENT_LLAMA31_70B,
19+
CHAT_MODEL_ARGS_DICT,
1620
RANDOM_SEARCH_AGENT,
1721
AGENT_4o,
1822
AGENT_4o_MINI,
19-
AGENT_CLAUDE_SONNET_35,
20-
AGENT_37_SONNET,
21-
AGENT_CLAUDE_SONNET_35_VISION,
22-
AGENT_4o_VISION,
2323
AGENT_4o_MINI_VISION,
24-
AGENT_o3_MINI,
24+
AGENT_4o_VISION,
2525
AGENT_o1_MINI,
26+
AGENT_o3_MINI,
27+
FLAGS_GPT_4o,
28+
GenericAgentArgs,
2629
)
2730

2831
__all__ = [

src/agentlab/llm/llm_configs.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,20 @@
1717
]
1818

1919
CHAT_MODEL_ARGS_DICT = {
20+
"openai/gpt-4.1-mini-2025-04-14": OpenAIModelArgs(
21+
model_name="gpt-4.1-mini-2025-04-14",
22+
max_total_tokens=128_000,
23+
max_input_tokens=128_000,
24+
max_new_tokens=16_384,
25+
vision_support=True,
26+
),
27+
"openai/gpt-4.1-2025-04-14": OpenAIModelArgs(
28+
model_name="gpt-4.1-2025-04-14",
29+
max_total_tokens=128_000,
30+
max_input_tokens=128_000,
31+
max_new_tokens=16_384,
32+
vision_support=True,
33+
),
2034
"openai/o3-mini-2025-01-31": OpenAIModelArgs(
2135
model_name="o3-mini-2025-01-31",
2236
max_total_tokens=200_000,

0 commit comments

Comments
 (0)