Skip to content

Commit 9e9b800

Browse files
authored
Merge pull request #161 from ServiceNow/AB_res
Ab res
2 parents 7dd91a7 + 4756d94 commit 9e9b800

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

reproducibility_journal.csv

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,10 @@ ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,weblinx_test,0.0.1.dev13,202
4848
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.079,0.005,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,
4949
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,workarena_l2_agent_curriculum_eval,0.4.1,2024-11-29_14-28-47,528da1f2-1949-41dc-b988-85f19f435af2,0.072,0.017,2,235/235,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,b115b2716d8a6328824684a692ed642297f0b1dc,,0.13.3,70dac253628c476aff1af6a975f27f8563453ad2,
5050
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,miniwob,0.13.3,2024-11-29_16-14-00,4d748972-6d35-4489-a197-138b656a7db3,0.646,0.019,0,625/625,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,becb4856fb1612f44010fe74ef8155d367ca17fc,,0.13.3,70dac253628c476aff1af6a975f27f8563453ad2,
51+
ThibaultLSDC,GenericAgent-gpt-4o,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.005,0.003,2,213/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
52+
ThibaultLSDC,GenericAgent-gpt-4o-mini,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.002,0.002,1,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
53+
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.008,0.003,1,212/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
54+
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.007,0.005,8,206/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
55+
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-8b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.001,0.001,15,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
56+
ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.007,0.003,1,212/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
57+
ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.009,0.005,1,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,

src/agentlab/experiments/study.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from abc import ABC, abstractmethod
12
import gzip
23
import logging
34
import pickle
@@ -405,7 +406,6 @@ def load_most_recent(root_dir: Path = None, contains=None) -> "Study":
405406

406407
def _make_study_name(agent_names, benchmark_names, suffix=None):
407408
"""Make a study name from the agent and benchmark names."""
408-
409409
# extract unique agent and benchmark names
410410
agent_names = list(set(agent_names))
411411
benchmark_names = list(set(benchmark_names))

0 commit comments

Comments
 (0)