Skip to content

Commit 2b4775b

Browse files
committed
Merge branch 'dev' into clean-pipeline
2 parents 3a96d56 + e695e11 commit 2b4775b

File tree

6 files changed

+45
-25
lines changed

6 files changed

+45
-25
lines changed

reproducibility_journal.csv

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,10 @@ ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,miniwob,0.10.2,2024-1
3939
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_06-08-16,0.576,0.02,0,625/625,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,f12887f776525bcad6a0c42cb49651ff4f65af43,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca,
4040
ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_06-08-16,0.678,0.019,0,625/625,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,f12887f776525bcad6a0c42cb49651ff4f65af43,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca,
4141
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,workarena_l1,0.4.1,2024-10-25_20-32-26,2024-10-25_17-34-45,0.433,0.027,1,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,177ba72a7469e5610e6b615adf1bdcde58cb0298,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca,
42+
Maxime Gasse,GenericAgent-gpt-4o-2024-05-13,weblinx_test,0.0.1.dev13,2024-11-04_16-01-14,2024-11-04_15-59-12,0.123,0.006,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.0,1.39.0,0.2.3,6e18fb818a64ec1e3f379c1a6480411d2fd0628b,,0.11.3,3ab1843edb14bfce7d39485f0106d0dc0c2d7486,
43+
ThibaultLSDC,GenericAgent-gpt-4o-mini,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.116,0.006,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,
44+
ThibaultLSDC,GenericAgent-gpt-4o,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.125,0.006,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,
45+
ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.137,0.006,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,
46+
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.089,0.005,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,
47+
ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.125,0.006,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,
48+
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.079,0.005,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ gitpython # for the reproducibility script
2121
requests
2222
matplotlib
2323
ray[default]
24+
python-slugify

src/agentlab/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.2.3"
1+
__version__ = "0.3.0"

src/agentlab/experiments/study.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,26 @@
11
import gzip
22
import logging
33
import pickle
4+
import re
5+
import uuid
46
from dataclasses import dataclass
57
from datetime import datetime
68
from pathlib import Path
7-
import uuid
89

910
import bgym
1011
from bgym import Benchmark, EnvArgs, ExpArgs
12+
from slugify import slugify
1113

1214
from agentlab.agents.agent_args import AgentArgs
1315
from agentlab.analyze import inspect_results
1416
from agentlab.experiments import args
1517
from agentlab.experiments import reproducibility_util as repro
1618
from agentlab.experiments.exp_utils import RESULTS_DIR, add_dependencies
17-
from agentlab.experiments.launch_exp import find_incomplete, run_experiments, non_dummy_count
19+
from agentlab.experiments.launch_exp import (
20+
find_incomplete,
21+
non_dummy_count,
22+
run_experiments,
23+
)
1824

1925
logger = logging.getLogger(__name__)
2026

@@ -220,6 +226,9 @@ def name(self):
220226
study_name = f"{agent_names[0]}_on_{self.benchmark.name}"
221227
else:
222228
study_name = f"{len(agent_names)}_agents_on_{self.benchmark.name}"
229+
230+
study_name = slugify(study_name, max_length=100, allow_unicode=True)
231+
223232
if self.suffix:
224233
study_name += f"_{self.suffix}"
225234
return study_name

src/agentlab/llm/llm_configs.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,41 +20,41 @@
2020
"openai/gpt-4o-mini-2024-07-18": OpenAIModelArgs(
2121
model_name="gpt-4o-mini-2024-07-18",
2222
max_total_tokens=128_000,
23-
max_input_tokens=100_000,
24-
max_new_tokens=16384,
23+
max_input_tokens=128_000,
24+
max_new_tokens=16_384,
2525
vision_support=True,
2626
),
2727
"openai/gpt-4-1106-preview": OpenAIModelArgs(
2828
model_name="gpt-4-1106-preview",
2929
max_total_tokens=128_000,
30-
max_input_tokens=100_000,
31-
max_new_tokens=28_000,
30+
max_input_tokens=128_000,
31+
max_new_tokens=4_096,
3232
),
3333
"openai/gpt-4-vision-preview": OpenAIModelArgs(
3434
model_name="gpt-4-vision-preview",
3535
max_total_tokens=128_000,
36-
max_input_tokens=100_000,
37-
max_new_tokens=28_000, # I think this model has very small default value if we don't set max_new_tokens
36+
max_input_tokens=128_000,
37+
max_new_tokens=16_384, # I think this model has very small default value if we don't set max_new_tokens
3838
vision_support=True,
3939
),
4040
"openai/gpt-4o-2024-05-13": OpenAIModelArgs(
4141
model_name="gpt-4o-2024-05-13",
4242
max_total_tokens=128_000,
43-
max_input_tokens=100_000,
44-
max_new_tokens=28_000, # I think this model has very small default value if we don't set max_new_tokens
43+
max_input_tokens=128_000,
44+
max_new_tokens=4_096, # I think this model has very small default value if we don't set max_new_tokens
4545
vision_support=True,
4646
),
4747
"openai/gpt-3.5-turbo-0125": OpenAIModelArgs(
4848
model_name="gpt-3.5-turbo-0125",
4949
max_total_tokens=16_384,
50-
max_input_tokens=15_000,
51-
max_new_tokens=1_000,
50+
max_input_tokens=16_384,
51+
max_new_tokens=4096,
5252
),
5353
"openai/gpt-3.5-turbo-1106": OpenAIModelArgs(
5454
model_name="gpt-3.5-turbo-1106",
5555
max_total_tokens=16_384,
56-
max_input_tokens=15_000,
57-
max_new_tokens=1_000,
56+
max_input_tokens=16_384,
57+
max_new_tokens=4096,
5858
),
5959
"azure/gpt-35-turbo/gpt-35-turbo": AzureModelArgs(
6060
model_name="gpt-35-turbo",
@@ -68,23 +68,23 @@
6868
deployment_name="gpt-4o-2024-05-13",
6969
max_total_tokens=128_000,
7070
max_input_tokens=100_000,
71-
max_new_tokens=28_000,
71+
max_new_tokens=16_384,
7272
vision_support=True,
7373
),
7474
"azure/gpt-4o-2024-08-06": AzureModelArgs(
7575
model_name="gpt-4o",
7676
deployment_name="gpt-4o-2024-08-06",
7777
max_total_tokens=128_000,
78-
max_input_tokens=100_000,
79-
max_new_tokens=28_000,
78+
max_input_tokens=128_000,
79+
max_new_tokens=16_384,
8080
vision_support=True,
8181
),
8282
"azure/gpt-4o-mini-2024-07-18": AzureModelArgs(
8383
model_name="gpt-4o-mini",
8484
deployment_name="gpt-4o-mini-2024-07-18",
8585
max_total_tokens=128_000,
86-
max_input_tokens=100_000,
87-
max_new_tokens=16384,
86+
max_input_tokens=128_000,
87+
max_new_tokens=16_384,
8888
vision_support=True,
8989
),
9090
# ---------------- OSS LLMs ----------------#
@@ -151,8 +151,8 @@
151151
"openrouter/anthropic/claude-3.5-sonnet:beta": OpenRouterModelArgs(
152152
model_name="anthropic/claude-3.5-sonnet:beta",
153153
max_total_tokens=200_000,
154-
max_input_tokens=160_000,
155-
max_new_tokens=40_000,
154+
max_input_tokens=200_000,
155+
max_new_tokens=8_192,
156156
temperature=1e-1,
157157
vision_support=True,
158158
),
@@ -166,8 +166,8 @@
166166
"openrouter/openai/o1-mini-2024-09-12": OpenRouterModelArgs(
167167
model_name="openai/o1-mini-2024-09-12",
168168
max_total_tokens=128_000,
169-
max_input_tokens=100_000,
170-
max_new_tokens=16384,
169+
max_input_tokens=128_000,
170+
max_new_tokens=64_000,
171171
temperature=1e-1,
172172
),
173173
}

tests/agents/test_generic_prompt.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55

66
from agentlab.agents import dynamic_prompting as dp
77
from agentlab.agents.generic_agent.agent_configs import FLAGS_GPT_3_5
8-
from agentlab.agents.generic_agent.generic_agent_prompt import GenericPromptFlags, MainPrompt
8+
from agentlab.agents.generic_agent.generic_agent_prompt import (
9+
GenericPromptFlags,
10+
MainPrompt,
11+
)
912
from agentlab.llm.llm_utils import count_tokens
1013

1114
html_template = """

0 commit comments

Comments
 (0)