Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
install:
@echo "--- 🚀 Installing project dependencies ---"
pip install -e ./browsergym/core -e ./browsergym/miniwob -e ./browsergym/webarena -e ./browsergym/visualwebarena/ -e ./browsergym/experiments -e ./browsergym/assistantbench -e ./browsergym/
pip install -e ./browsergym/core -e ./browsergym/miniwob -e ./browsergym/webarena -e ./browsergym/webarenalite -e ./browsergym/visualwebarena/ -e ./browsergym/experiments -e ./browsergym/assistantbench -e ./browsergym/
playwright install chromium

install-demo:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,20 @@
),
task_metadata=task_metadata("webarena"),
),
"webarena_lite": lambda n_repeats=1: Benchmark(
name="webarena_lite",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["webarena"],
is_multi_tab=True,
supports_parallel_seeds=False,
backends=["webarena"],
env_args_list=make_env_args_list_from_repeat_tasks(
task_list=task_list_from_metadata(metadata=task_metadata("webarenalite")),
max_steps=30,
n_repeats=n_repeats,
seeds_rng=np.random.RandomState(42),
),
task_metadata=task_metadata("webarenalite"),
),
"webarena_tiny": lambda n_repeats=1: Benchmark(
name="webarena_tiny",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["webarena"],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
task_name,requires_reset,sites,eval_types,task_id,browsergym_split,depends_on
webarenalite.4,False,shopping_admin,string_match,4,train,
webarenalite.7,False,map,string_match,7,train,
webarenalite.15,False,shopping_admin,string_match,15,test,
webarenalite.20,False,map,string_match,20,test,
webarenalite.23,False,shopping,string_match,23,test,
webarenalite.27,False,reddit,string_match,27,test,
webarenalite.33,False,map,string_match,33,test,
webarenalite.37,False,map,string_match,37,train,
webarenalite.43,False,shopping_admin,string_match,43,test,
webarenalite.44,False,gitlab,url_match,44,train,
webarenalite.48,False,shopping,string_match,48,test,
webarenalite.56,False,map,string_match,56,train,
webarenalite.58,False,map,string_match,58,train,
webarenalite.65,False,shopping_admin,string_match,65,train,
webarenalite.69,False,reddit,string_match,69,test,
webarenalite.71,False,map,string_match,71,test,
webarenalite.75,False,map,string_match,75,train,
webarenalite.77,False,shopping_admin,string_match,77,test,webarenalite.65
webarenalite.82,False,map,string_match,82,train,
webarenalite.88,False,map,string_match,88,train,
webarenalite.93,False,map,string_match,93,train,
webarenalite.95,False,shopping_admin,string_match,95,train,
webarenalite.96,False,shopping,string_match,96,test,
webarenalite.97,False,map wikipedia,string_match,97,test,webarenalite.93
webarenalite.98,False,map,string_match,98,test,webarenalite.97
webarenalite.103,False,gitlab,url_match,103,train,
webarenalite.109,False,shopping_admin,string_match,109,test,
webarenalite.115,False,shopping_admin,string_match,115,test,
webarenalite.117,False,shopping,string_match,117,test,webarenalite.96
webarenalite.118,False,shopping,program_html,118,train,webarenalite.117
webarenalite.123,False,shopping_admin,string_match,123,train,
webarenalite.125,False,shopping,string_match,125,train,
webarenalite.127,False,shopping_admin,string_match,127,train,webarenalite.123
webarenalite.131,False,shopping_admin,string_match,131,test,
webarenalite.135,False,gitlab,string_match,135,train,
webarenalite.139,False,map,string_match,139,test,
webarenalite.144,False,shopping,string_match,144,test,
webarenalite.149,False,shopping,string_match,149,test,
webarenalite.155,False,map,string_match,155,test,
webarenalite.156,False,gitlab,url_match,156,test,
webarenalite.157,False,shopping_admin,url_match,157,train,webarenalite.131
webarenalite.162,False,shopping,url_match,162,test,
webarenalite.167,False,shopping,string_match,167,test,
webarenalite.169,False,gitlab,string_match,169,train,
webarenalite.173,False,gitlab,string_match url_match,173,train,
webarenalite.182,False,gitlab,string_match url_match,182,train,
webarenalite.190,False,shopping,string_match,190,train,
webarenalite.196,False,shopping_admin,string_match,196,train,
webarenalite.202,False,shopping_admin,string_match,202,train,
webarenalite.205,False,gitlab,string_match,205,train,webarenalite.182
webarenalite.211,False,shopping_admin,string_match,211,train,
webarenalite.215,False,shopping_admin,string_match,215,test,
webarenalite.220,False,map,string_match,220,train,
webarenalite.221,False,map,string_match,221,test,webarenalite.220
webarenalite.225,False,shopping,string_match,225,test,
webarenalite.227,False,shopping,string_match,227,train,
webarenalite.235,False,shopping,string_match,235,train,
webarenalite.236,False,map,string_match,236,train,
webarenalite.240,False,shopping,url_match,240,test,
webarenalite.247,False,shopping_admin,string_match,247,train,
webarenalite.250,False,map,string_match,250,test,
webarenalite.254,False,map,string_match,254,train,
webarenalite.258,False,gitlab,url_match,258,train,
webarenalite.259,False,gitlab,string_match,259,train,webarenalite.258
webarenalite.268,False,wikipedia map,string_match,268,test,
webarenalite.270,False,shopping,url_match,270,train,
webarenalite.276,False,shopping,url_match,276,train,
webarenalite.283,False,shopping,url_match,283,test,
webarenalite.285,False,shopping,url_match,285,train,
webarenalite.287,False,map,string_match,287,test,webarenalite.268
webarenalite.288,False,shopping_admin,string_match,288,train,webarenalite.247
webarenalite.296,False,gitlab,string_match,296,train,
webarenalite.300,False,shopping,url_match,300,test,
webarenalite.311,False,gitlab,string_match,311,test,
webarenalite.313,False,shopping,string_match,313,train,
webarenalite.318,False,gitlab,string_match,318,train,
webarenalite.321,False,shopping,string_match,321,train,
webarenalite.324,False,shopping,url_match,324,train,
webarenalite.333,False,shopping,string_match,333,train,
webarenalite.335,False,shopping,string_match,335,train,
webarenalite.348,False,shopping_admin,string_match,348,test,
webarenalite.349,False,gitlab,string_match,349,test,
webarenalite.354,False,shopping,url_match,354,train,
webarenalite.357,False,gitlab,url_match,357,test,
webarenalite.361,False,shopping,string_match,361,train,
webarenalite.367,False,map,string_match,367,train,
webarenalite.368,False,shopping,string_match,368,test,
webarenalite.369,False,map,program_html,369,train,webarenalite.367
webarenalite.374,False,shopping_admin,url_match,374,train,webarenalite.348
webarenalite.376,False,shopping,string_match,376,test,webarenalite.368
webarenalite.381,False,map,url_match,381,train,
webarenalite.382,False,map,string_match,382,test,webarenalite.381
webarenalite.383,False,map,string_match,383,test,webarenalite.382
webarenalite.384,False,shopping,string_match,384,test,webarenalite.376
webarenalite.386,False,shopping,string_match,386,test,
webarenalite.387,False,shopping,string_match,387,train,webarenalite.386
webarenalite.392,False,gitlab,program_html,392,test,
webarenalite.401,False,reddit,program_html,401,train,
webarenalite.404,False,reddit,program_html,404,train,
webarenalite.415,False,gitlab,program_html,415,test,
webarenalite.419,False,gitlab,program_html,419,test,
webarenalite.423,False,shopping_admin,program_html,423,train,
webarenalite.426,False,wikipedia map,program_html,426,test,
webarenalite.431,False,shopping,program_html,431,train,
webarenalite.440,False,shopping,program_html,440,test,
webarenalite.448,False,gitlab,program_html,448,test,
webarenalite.454,False,shopping_admin,program_html,454,test,
webarenalite.458,False,shopping_admin,program_html,458,test,
webarenalite.464,False,shopping_admin,program_html,464,train,
webarenalite.466,False,shopping,program_html,466,train,
webarenalite.470,False,shopping_admin,program_html,470,test,webarenalite.464
webarenalite.476,False,gitlab,program_html,476,train,
webarenalite.485,False,gitlab,program_html,485,test,
webarenalite.488,False,shopping_admin,program_html,488,test,
webarenalite.491,False,shopping_admin,string_match,491,test,
webarenalite.497,False,shopping_admin,program_html,497,test,
webarenalite.505,False,shopping_admin,program_html,505,train,
webarenalite.506,False,shopping,program_html,506,train,
webarenalite.509,False,shopping,program_html,509,test,
webarenalite.514,False,shopping,program_html,514,test,
webarenalite.516,False,shopping,program_html,516,train,
webarenalite.521,False,shopping,program_html,521,test,
webarenalite.524,False,gitlab,program_html,524,test,
webarenalite.528,False,shopping,program_html,528,train,webarenalite.521
webarenalite.534,False,gitlab,program_html,534,train,
webarenalite.538,False,shopping_admin,program_html,538,train,webarenalite.505
webarenalite.548,False,shopping_admin,program_html,548,train,
webarenalite.566,False,gitlab reddit,program_html,566,test,
webarenalite.567,False,gitlab,program_html,567,test,webarenalite.566
webarenalite.574,False,shopping,program_html,574,test,
webarenalite.577,False,gitlab,program_html,577,train,
webarenalite.582,False,reddit,program_html,582,test,
webarenalite.599,False,reddit,url_match program_html,599,test,
webarenalite.601,False,reddit,url_match program_html,601,train,
webarenalite.605,False,reddit,url_match program_html,605,train,
webarenalite.612,False,reddit,url_match program_html,612,test,
webarenalite.619,False,reddit,url_match program_html,619,train,
webarenalite.626,False,reddit,url_match program_html,626,train,
webarenalite.631,False,reddit,url_match program_html,631,train,
webarenalite.641,False,reddit,url_match program_html,641,test,
webarenalite.645,False,reddit,url_match program_html,645,train,
webarenalite.652,False,reddit,url_match program_html,652,train,
webarenalite.657,False,shopping,url_match program_html,657,train,
webarenalite.668,False,gitlab,url_match program_html,668,test,
webarenalite.673,False,shopping reddit,url_match program_html,673,test,
webarenalite.678,False,shopping_admin,url_match program_html,678,train,
webarenalite.682,False,reddit gitlab,url_match program_html,682,train,
webarenalite.686,False,reddit gitlab,url_match program_html,686,train,
webarenalite.693,False,shopping,url_match program_html,693,train,
webarenalite.704,False,shopping_admin,url_match program_html,704,test,
webarenalite.710,False,shopping_admin,url_match program_html,710,test,
webarenalite.714,False,reddit,program_html,714,train,
webarenalite.720,False,reddit,program_html,720,test,
webarenalite.729,False,reddit,program_html,729,train,
webarenalite.733,False,reddit,program_html,733,train,
webarenalite.741,False,wikipedia map,program_html,741,train,
webarenalite.745,False,gitlab,program_html,745,test,
webarenalite.748,False,gitlab,program_html,748,train,
webarenalite.760,False,map shopping_admin,program_html,760,test,
webarenalite.762,False,map,program_html,762,train,
webarenalite.768,False,shopping_admin,program_html,768,test,webarenalite.760
webarenalite.791,False,gitlab reddit,string_match,791,train,
webarenalite.798,False,shopping,string_match,798,train,
webarenalite.809,False,gitlab,url_match program_html,809,train,
webarenalite.811,False,gitlab,program_html,811,test,
1 change: 1 addition & 0 deletions browsergym/experiments/src/browsergym/experiments/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,7 @@ def _get_env_name(task_name: str):
import browsergym.workarena
elif task_name.startswith("webarena"):
import browsergym.webarena
import browsergym.webarenalite
elif task_name.startswith("visualwebarena"):
import browsergym.visualwebarena
elif task_name.startswith("assistantbench"):
Expand Down
38 changes: 38 additions & 0 deletions browsergym/webarenalite/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[build-system]
requires = ["hatchling", "hatch-requirements-txt"]
build-backend = "hatchling.build"

[project]
name = "browsergym-webarenalite"
description = "WebArena Lite benchmark for BrowserGym"
authors = [
{name = "Aman Jaiswal"},
]
requires-python = ">3.7"
license = {text = "Apache-2.0"}
classifiers = [
"Development Status :: 3 - Alpha",
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: Apache Software License",
]
dynamic = ["dependencies", "version"]

[project.urls]
homepage = "https://github.com/ServiceNow/BrowserGym"

[tool.hatch.version]
path = "../core/src/browsergym/core/__init__.py"

[tool.hatch.metadata.hooks.requirements_txt]
files = ["requirements.txt"]

[tool.hatch.build]
include = [
"src/browsergym/webarenalite/test_webarena_lite.raw.json"
]

[tool.hatch.build.targets.wheel]
packages = ["src/browsergym"]
2 changes: 2 additions & 0 deletions browsergym/webarenalite/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
browsergym-core==0.14.2
libwebarena==0.0.4
24 changes: 24 additions & 0 deletions browsergym/webarenalite/src/browsergym/webarenalite/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import nltk

from browsergym.core.registration import register_task

from . import config, task

# download necessary tokenizer resources
# note: deprecated punkt -> punkt_tab https://github.com/nltk/nltk/issues/3293
try:
nltk.data.find("tokenizers/punkt_tab")
except:
nltk.download("punkt_tab", quiet=True, raise_on_error=True)

ALL_WEBARENA_TASK_IDS = []

# register all WebArena benchmark
for task_id in config.TASK_IDS:
gym_id = f"webarenalite.{task_id}"
register_task(
gym_id,
task.WebArenaLiteTask,
task_kwargs={"task_id": task_id},
)
ALL_WEBARENA_TASK_IDS.append(gym_id)
Loading
Loading