Skip to content

Commit a46a2ed

Browse files
authored
Merge pull request #146 from ServiceNow:adding_darglint
Adding darglint as workflow test
2 parents 1c92433 + c229e20 commit a46a2ed

File tree

13 files changed

+118
-38
lines changed

13 files changed

+118
-38
lines changed

.github/workflows/darglint.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: Darglint checks
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
9+
jobs:
10+
11+
build:
12+
runs-on: ubuntu-latest
13+
defaults:
14+
run:
15+
shell: bash -l {0}
16+
steps:
17+
18+
- name: Checkout Repository
19+
uses: actions/checkout@v4
20+
21+
- name: Set up Python
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: '3.10'
25+
cache: 'pip' # caching pip dependencies
26+
27+
- name: Pip install
28+
run: pip install darglint
29+
30+
- name: Pip list
31+
run: pip list
32+
33+
- name: Darglint checks
34+
run: darglint -v 2 -z short .

src/agentlab/agents/agent_args.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from bgym import AbstractAgentArgs
21
import bgym
2+
from bgym import AbstractAgentArgs
33

44

55
class AgentArgs(AbstractAgentArgs):
@@ -28,6 +28,9 @@ def set_reproducibility_mode(self):
2828
as possible e.g. setting the temperature of the model to 0.
2929
3030
This is only called when reproducibility is requested.
31+
32+
Raises:
33+
NotImplementedError: If the agent does not support reproducibility.
3134
"""
3235
raise NotImplementedError(
3336
f"set_reproducibility_mode is not implemented for agent_args {self.__class__.__name__}"

src/agentlab/agents/generic_agent/agent_configs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,19 @@
193193
add_missparsed_messages=True,
194194
)
195195

196+
196197
AGENT_8B = GenericAgentArgs(
197198
chat_model_args=CHAT_MODEL_ARGS_DICT["meta-llama/Meta-Llama-3-8B-Instruct"],
198199
flags=FLAGS_8B,
199200
)
200201

201202

203+
AGENT_LLAMA31_8B = GenericAgentArgs(
204+
chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/meta-llama/llama-3.1-8b-instruct"],
205+
flags=FLAGS_8B,
206+
)
207+
208+
202209
# GPT-4o default config
203210
FLAGS_GPT_4o = GenericPromptFlags(
204211
obs=dp.ObsFlags(

src/agentlab/agents/generic_agent/reproducibility_agent.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ def make_repro_agent(agent_args: AgentArgs, exp_dir: Path | str):
199199
agent_args (AgentArgs): The original agent args.
200200
exp_dir (Path | str): The directory where the experiment was saved.
201201
202+
Returns:
203+
ReproAgentArgs: The new agent args.
202204
"""
203205
exp_dir = Path(exp_dir)
204206
assert isinstance(agent_args, GenericAgentArgs)

src/agentlab/agents/visualwebarena/agent.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,12 @@ def get_action(self, obs: Any) -> tuple[str, dict]:
188188
Replica of VisualWebArena agent
189189
https://github.com/web-arena-x/visualwebarena/blob/89f5af29305c3d1e9f97ce4421462060a70c9a03/agent/prompts/prompt_constructor.py#L211
190190
https://github.com/web-arena-x/visualwebarena/blob/89f5af29305c3d1e9f97ce4421462060a70c9a03/agent/prompts/prompt_constructor.py#L272
191+
192+
Args:
193+
obs (Any): Observation from the environment
194+
195+
Returns:
196+
tuple[str, dict]: Action and AgentInfo
191197
"""
192198
user_messages = []
193199

src/agentlab/analyze/inspect_results.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,7 @@ def set_index_from_variables(
6969
index_black_list: List of wildard patterns to match variables that
7070
should be excluded from the index.
7171
task_key: The key to use as the first level of the index.
72-
force_at_leaste_one_variable: If True, force at least one variable in the
73-
index. If no variable is found, the index will be set to
74-
task_key + "agent.agent_name".
72+
add_agent_and_benchmark: If True, add agent.agent_name and env.benchmark
7573
"""
7674
df.reset_index(inplace=True)
7775
constants, variables, _ = get_constants_and_variables(df)
@@ -127,6 +125,7 @@ def load_result_df(
127125
should be included in the index.
128126
index_black_list: List of wildard patterns to match variables that
129127
should be excluded from the index.
128+
remove_args_suffix: If True, remove the _args suffix from the columns
130129
131130
Returns:
132131
pd.DataFrame: The result dataframe
@@ -733,17 +732,13 @@ def _categorize_error(row):
733732

734733

735734
def _benchmark_from_task_name(task_name: str):
736-
"""Extract the benchmark from the task name.
737-
TODO should be more robost, e.g. handle workarna.L1, workarena.L2, etc.
738-
"""
735+
"""Extract the benchmark from the task name."""
736+
# TODO should be more robost, e.g. handle workarna.L1, workarena.L2, etc.
739737
return task_name.split(".")[0]
740738

741739

742740
def summarize_study(result_df: pd.DataFrame) -> pd.DataFrame:
743-
"""Create a summary of the study.
744-
745-
Similar to global report, but handles single agent differently.
746-
"""
741+
"""Create a summary of the study. Similar to global report, but handles single agent differently."""
747742

748743
levels = list(range(result_df.index.nlevels))
749744
return result_df.groupby(level=levels[1:]).apply(summarize)

src/agentlab/experiments/exp_utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ def add_dependencies(exp_args_list: list[ExpArgs], task_dependencies: dict[str,
9595
Returns:
9696
list[ExpArgs]
9797
The modified exp_args_list with dependencies added.
98+
99+
Raises:
100+
ValueError: If the task_dependencies are not valid.
98101
"""
99102

100103
if task_dependencies is None or all([len(dep) == 0 for dep in task_dependencies.values()]):

src/agentlab/experiments/graph_execution_ray.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,17 @@ def poll_for_timeout(tasks: dict[str, ray.ObjectRef], timeout: float, poll_inter
4747
4848
I tried various different methods for killing a job that hangs. so far it's
4949
the only one that seems to work reliably (hopefully)
50+
51+
Args:
52+
tasks: dict[str, ray.ObjectRef]
53+
Dictionary of task_id: task_ref
54+
timeout: float
55+
Timeout in seconds
56+
poll_interval: float
57+
Polling interval in seconds
58+
59+
Returns:
60+
dict[str, Any]: Dictionary of task_id: result
5061
"""
5162
task_list = list(tasks.values())
5263
task_ids = list(tasks.keys())

src/agentlab/experiments/launch_exp.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import bgym
66
from browsergym.experiments.loop import ExpArgs, yield_all_exp_results
7+
78
from agentlab.experiments.exp_utils import run_exp
89

910

@@ -24,13 +25,16 @@ def run_experiments(
2425
Number of parallel jobs.
2526
exp_args_list: list[ExpArgs]
2627
List of ExpArgs objects.
27-
exp_dir: Path
28+
study_dir: Path
2829
Directory where the experiments will be saved.
2930
parallel_backend: str
3031
Parallel backend to use. Either "joblib", "ray" or "sequential".
3132
The only backend that supports webarena graph dependencies correctly is ray or sequential.
3233
avg_step_timeout: int
3334
Will raise a TimeoutError if the episode is not finished after env_args.max_steps * avg_step_timeout seconds.
35+
36+
Raises:
37+
ValueError: If the parallel_backend is not recognized.
3438
"""
3539

3640
if len(exp_args_list) == 0:
@@ -110,6 +114,13 @@ def find_incomplete(study_dir: str | Path, include_errors=True):
110114
Find all incomplete experiments and relaunch them.
111115
- "incomplete_only": relaunch only the incomplete experiments.
112116
- "incomplete_or_error": relaunch incomplete or errors.
117+
118+
Returns:
119+
list[ExpArgs]
120+
List of ExpArgs objects to relaunch.
121+
122+
Raises:
123+
ValueError: If the study_dir does not exist.
113124
"""
114125
study_dir = Path(study_dir)
115126

@@ -152,6 +163,16 @@ def _hide_completed(exp_result: bgym.ExpResult, include_errors: bool = True):
152163
153164
This little hack, allows an elegant way to keep the task dependencies for e.g. webarena
154165
while skipping the tasks that are completed when relaunching.
166+
167+
Args:
168+
exp_result: bgym.ExpResult
169+
The experiment result to hide.
170+
include_errors: bool
171+
If True, include experiments that errored.
172+
173+
Returns:
174+
ExpArgs
175+
The ExpArgs object hidden if the experiment is completed.
155176
"""
156177

157178
hide = False

src/agentlab/experiments/reproducibility_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def _get_git_username(repo: Repo) -> str:
5959
5. Environment variables (GIT_AUTHOR_NAME and GIT_COMMITTER_NAME)
6060
6161
Args:
62-
repo (git.Repo): A GitPython Repo object representing the Git repository.
62+
repo (Repo): A GitPython Repo object representing the Git repository.
6363
6464
Returns:
6565
str: The first non-None username found, or None if no username is found.

0 commit comments

Comments
 (0)