Skip to content

Commit eae4152

Browse files
committed
black formater
1 parent 76ab14e commit eae4152

File tree

7 files changed

+41
-19
lines changed

7 files changed

+41
-19
lines changed

browsergym/experiments/src/browsergym/experiments/benchmark/base.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,13 @@ def make_action_set(self):
5353

5454

5555
BenchmarkBackend = Literal[
56-
"miniwob", "webarena", "webarena_verified", "visualwebarena", "workarena", "assistantbench", "weblinx"
56+
"miniwob",
57+
"webarena",
58+
"webarena_verified",
59+
"visualwebarena",
60+
"workarena",
61+
"assistantbench",
62+
"weblinx",
5763
]
5864

5965

browsergym/experiments/src/browsergym/experiments/benchmark/utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,7 @@ def prepare_backend(backend: str):
158158
massage_tasks(
159159
[
160160
f"webarena_verified.{intent_template_id}.{task_id}"
161-
for intent_template_id, task_id in
162-
[
161+
for intent_template_id, task_id in [
163162
# gitlab, shopping_admin, and map are not ready yet
164163
(23, 410), # reddit
165164
# (330, 533), # gitlab

browsergym/webarena/src/browsergym/webarena/task.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
111111
extra_headers = json.load(f)
112112
page.context.set_extra_http_headers(extra_headers)
113113
except Exception as e:
114-
logger.warning(f"Failed to load extra headers from {extra_headers_file_path}: {e}. Make sure to set the PW_EXTRA_HEADERS environment variable to the path of an existing json file containing the extra headers. Continuing without extra headers.")
114+
logger.warning(
115+
f"Failed to load extra headers from {extra_headers_file_path}: {e}. Make sure to set the PW_EXTRA_HEADERS environment variable to the path of an existing json file containing the extra headers. Continuing without extra headers."
116+
)
115117

116118
# authenticate
117119
for site in self.config["sites"]:

browsergym/webarena_verified/src/browsergym/webarena_verified/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,6 @@
1010
for task in data:
1111
INTENT_TEMPLATE_IDS.append(task["intent_template_id"])
1212

13-
assert len(INTENT_TEMPLATE_IDS) == len(TASK_IDS), "Number of intent template IDs must match number of task IDs"
13+
assert len(INTENT_TEMPLATE_IDS) == len(
14+
TASK_IDS
15+
), "Number of intent template IDs must match number of task IDs"

browsergym/webarena_verified/src/browsergym/webarena_verified/evaluators.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
class WebArenaVerifiedEvaluator:
3434
"""
3535
Evaluator that integrates the webarena_verified evaluation system.
36-
36+
3737
This evaluator handles the new evaluation format with:
3838
- expected_retrieve_value: Validates data retrieval
3939
- expected_backend_state: Validates backend/database changes
@@ -57,8 +57,8 @@ def __init__(self, webarena_instance: WebArenaInstance):
5757
},
5858
WebArenaSite.HOMEPAGE: EnvironmentConfig(
5959
urls=[webarena_instance.home_url],
60-
)
61-
}
60+
),
61+
},
6262
)
6363
# Instantiate data reader and evaluator
6464
reader = WebArenaVerifiedDataReader(config)
@@ -84,6 +84,7 @@ def __call__(
8484
"""
8585
# import webarena dynamically
8686
from webarena.browser_env.actions import ActionTypes
87+
8788
# if last action is not a STOP action, return 0.0 as the task is not completed yet
8889
if trajectory[-1].get("action_type") != ActionTypes.STOP:
8990
return 0.0
@@ -110,9 +111,14 @@ def __call__(
110111
logger.info(f"Running webarena_verified evaluation for task {task.task_id}")
111112
results: TaskEvalResult = self.evaluator.evaluate_task(context=context)
112113
logger.info(f"Webarena_verified evaluation result for task {task.task_id}:")
113-
logger.info(f"status: {results.status}, score: {results.score}, error_msg: {results.error_msg}")
114+
logger.info(
115+
f"status: {results.status}, score: {results.score}, error_msg: {results.error_msg}"
116+
)
114117
for result in results.evaluators_results:
115-
logger.info(f"- {result.evaluator_name}: status: {result.status}, score: {result.score}, error_msg: {result.error_msg}")
118+
logger.info(
119+
f"- {result.evaluator_name}: status: {result.status}, score: {result.score}, error_msg: {result.error_msg}"
120+
)
116121
# return average score
117-
return sum(result.score for result in results.evaluators_results) / len(results.evaluators_results)
118-
122+
return sum(result.score for result in results.evaluators_results) / len(
123+
results.evaluators_results
124+
)

browsergym/webarena_verified/src/browsergym/webarena_verified/task.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ class WebArenaVerifiedTask(GenericWebArenaTask):
2121
"""
2222
WebArena Verified task class that integrates the full evaluation system
2323
from platform-labs-agent-eval-harness.
24-
24+
2525
This task class handles the new evaluation format with:
2626
- expected_retrieve_value
27-
- expected_backend_state
27+
- expected_backend_state
2828
- expected_ui_state
2929
"""
3030

@@ -50,7 +50,7 @@ def __init__(
5050
.joinpath("webarena_verified.json")
5151
.read_text()
5252
)
53-
53+
5454
# substitute URLs
5555
for pattern, url_key in {
5656
"__GITLAB__": "gitlab",
@@ -90,9 +90,12 @@ def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
9090
# pick a task at random
9191
self.config = self.random.choice(self.task_configs)
9292

93-
# hack: dynamically build a config file to read from
93+
# dynamically build a config file to read from
9494
with tempfile.NamedTemporaryFile(
95-
mode="w+", delete=False, prefix=f"wav-{self.config['intent_template_id']}-{self.config['task_id']}_", suffix=".json"
95+
mode="w+",
96+
delete=False,
97+
prefix=f"wav-{self.config['intent_template_id']}-{self.config['task_id']}_",
98+
suffix=".json",
9699
) as f:
97100
json.dump(self.config, f, indent=4)
98101
f.flush()
@@ -109,7 +112,9 @@ def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
109112
extra_headers = json.load(f)
110113
page.context.set_extra_http_headers(extra_headers)
111114
except Exception as e:
112-
logger.warning(f"Failed to load extra headers from {extra_headers_file_path}: {e}. Make sure to set the PW_EXTRA_HEADERS environment variable to the path of an existing json file containing the extra headers. Continuing without extra headers.")
115+
logger.warning(
116+
f"Failed to load extra headers from {extra_headers_file_path}: {e}. Make sure to set the PW_EXTRA_HEADERS environment variable to the path of an existing json file containing the extra headers. Continuing without extra headers."
117+
)
113118

114119
# authenticate
115120
for site in self.config["sites"]:

browsergym/webarenalite/src/browsergym/webarenalite/task.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
9696
extra_headers = json.load(f)
9797
page.context.set_extra_http_headers(extra_headers)
9898
except Exception as e:
99-
logger.warning(f"Failed to load extra headers from {extra_headers_file_path}: {e}. Make sure to set the PW_EXTRA_HEADERS environment variable to the path of an existing json file containing the extra headers. Continuing without extra headers.")
99+
logger.warning(
100+
f"Failed to load extra headers from {extra_headers_file_path}: {e}. Make sure to set the PW_EXTRA_HEADERS environment variable to the path of an existing json file containing the extra headers. Continuing without extra headers."
101+
)
100102

101103
# authenticate
102104
for site in self.config["sites"]:

0 commit comments

Comments
 (0)