Skip to content

Commit 79cde90

Browse files
add agent-mentor laucher
1 parent 6b78e8e commit 79cde90

File tree

2 files changed

+123
-0
lines changed

2 files changed

+123
-0
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,4 @@ hint = [
109109
[project.scripts]
110110
agentlab-assistant = "agentlab.ui_assistant:main"
111111
agentlab-xray = "agentlab.analyze.agent_xray:main"
112+
agentlab-mentor = "agentlab.agents.hitl_agent.launch_hint_ui:main"
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
"""
2+
Console launcher for the Human-in-the-Loop Generic Agent UI.
3+
4+
Usage (installed entry point):
5+
agentlab-hint-ui --benchmark miniwob --task-name miniwob.book-flight --seed 123
6+
7+
This will run a Study with the MultipleProposalGenericAgent and the selected task.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
import argparse
13+
import logging
14+
from typing import Optional
15+
16+
import bgym
17+
18+
from agentlab.agents.hitl_agent.generic_human_guided_agent import (
19+
HUMAN_GUIDED_GENERIC_AGENT,
20+
)
21+
from agentlab.experiments.study import Study
22+
23+
24+
def build_benchmark(benchmark_name: str, task_name: Optional[str], seed: Optional[int]):
25+
# Instantiate benchmark by name using BrowserGym registry
26+
try:
27+
benchmark = bgym.DEFAULT_BENCHMARKS[benchmark_name.lower()]()
28+
except KeyError as e:
29+
choices = ", ".join(sorted(bgym.DEFAULT_BENCHMARKS.keys()))
30+
raise SystemExit(f"Unknown benchmark '{benchmark_name}'. Choose one of: {choices}") from e
31+
32+
if task_name:
33+
# If a fully-qualified name is provided, filter by exact match; otherwise, allow glob
34+
if any(ch in task_name for ch in "*?[]"):
35+
benchmark = benchmark.subset_from_glob("task_name", task_name)
36+
else:
37+
benchmark = benchmark.subset_from_glob("task_name", task_name)
38+
39+
# If a specific seed is provided, set it on all env args
40+
if seed is not None:
41+
for env_args in benchmark.env_args_list:
42+
env_args.task_seed = seed
43+
44+
# Reasonable defaults for interactive UI
45+
for env_args in benchmark.env_args_list:
46+
env_args.max_steps = env_args.max_steps or 100
47+
# Leave headless True by default; UI is external Gradio, not browser GUI
48+
env_args.headless = True
49+
50+
return benchmark
51+
52+
53+
def parse_args():
54+
p = argparse.ArgumentParser(description="Run HITL Generic Agent UI on a benchmark task")
55+
p.add_argument(
56+
"--benchmark",
57+
required=True,
58+
help="Benchmark name as registered in BrowserGym, e.g., miniwob, workarena_l1, webarena, visualwebarena",
59+
)
60+
p.add_argument(
61+
"--task-name",
62+
dest="task_name",
63+
default=None,
64+
help="Task name or glob to filter tasks within the benchmark (e.g., 'miniwob.*book*')",
65+
)
66+
p.add_argument(
67+
"--seed",
68+
type=int,
69+
default=None,
70+
help="Task seed to use for all selected tasks. If omitted, tasks keep their configured/random seed.",
71+
)
72+
p.add_argument(
73+
"--jobs",
74+
type=int,
75+
default=1,
76+
help="Number of parallel jobs (UI agent typically runs sequentially)",
77+
)
78+
p.add_argument(
79+
"--parallel-backend",
80+
default="sequential",
81+
choices=["sequential", "ray", "joblib"],
82+
help="Parallel backend to use",
83+
)
84+
p.add_argument(
85+
"--retries",
86+
type=int,
87+
default=1,
88+
help="Number of relaunch attempts for incomplete experiments",
89+
)
90+
p.add_argument(
91+
"--log-level",
92+
default="WARNING",
93+
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
94+
help="Logging level",
95+
)
96+
return p.parse_args()
97+
98+
99+
def main():
100+
args = parse_args()
101+
102+
logging_level = getattr(logging, args.log_level)
103+
104+
benchmark = build_benchmark(args.benchmark, args.task_name, args.seed)
105+
agent_configs = [HUMAN_GUIDED_GENERIC_AGENT]
106+
107+
study = Study(
108+
agent_configs,
109+
benchmark,
110+
logging_level=logging_level,
111+
logging_level_stdout=logging_level,
112+
)
113+
114+
study.run(
115+
n_jobs=args.jobs,
116+
parallel_backend=args.parallel_backend,
117+
n_relaunch=args.retries,
118+
)
119+
120+
121+
if __name__ == "__main__":
122+
main()

0 commit comments

Comments
 (0)