From 100238ffcb1d20f45ab6de71e169c3daaa82506d Mon Sep 17 00:00:00 2001 From: Orlando Marquez Date: Wed, 11 Jun 2025 12:26:28 -0400 Subject: [PATCH 1/2] Do not assume = is the only operator --- src/browsergym/workarena/tasks/list.py | 44 ++++++++++++--- tests/test_filter_list_task.py | 78 ++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 8 deletions(-) create mode 100644 tests/test_filter_list_task.py diff --git a/src/browsergym/workarena/tasks/list.py b/src/browsergym/workarena/tasks/list.py index 6153f7f..d628eaf 100644 --- a/src/browsergym/workarena/tasks/list.py +++ b/src/browsergym/workarena/tasks/list.py @@ -101,6 +101,11 @@ class ServiceNowListTask(AbstractServiceNowTask): + OPERATOR_EQUALS = "=" + OPERATOR_NOT_EQUALS = "!=" + OPERATOR_STARTSWITH = "STARTSWITH" + OPERATOR_ISEMPTY = "ISEMPTY" + OPERATOR_EMPTYSTRING = "EMPTYSTRING" @classmethod def all_configs(cls) -> List[dict]: @@ -777,6 +782,9 @@ def validate( list_info = self._extract_list_info(page) current_query = list_info["query"] + if not current_query: + return 0, False, "", {"message": "There are no filters yet."} + # Replace "new query" statements with the standard OR separator current_query = current_query.replace("^NQ", "^OR") @@ -789,24 +797,44 @@ def validate( current_sep = "^" if current_kind != self.filter_kind: - return 0, False, "", {"message": "The kind of filter used is incorrect."} + return 0, False, "", {"message": f"The kind of filter used is incorrect: {current_query}."} # Extract the query pieces for validation current_query = current_query.split(current_sep) # Validate query length is ok if len(current_query) != self.filter_len: - return 0, False, "", {"message": "Incorrect number of filter conditions."} + return 0, False, "", {"message": f"Incorrect number of filter conditions: {current_query}."} + + # Parse column names, operators, and values + current_columns, current_operators, current_values = [], [], [] + + # Note that this is not exhaustive. If/when other operators are added, this will have to be updated. + for predicate in current_query: + if self.OPERATOR_EMPTYSTRING in predicate: + current_columns.append(predicate.replace(self.OPERATOR_EMPTYSTRING, "").strip()) + current_operators.append("=") + current_values.append("") + elif self.OPERATOR_ISEMPTY in predicate: + current_columns.append(predicate.replace(self.OPERATOR_ISEMPTY, "").strip()) + current_operators.append("=") + current_values.append("") + elif any(unsupported_operator in predicate for unsupported_operator in [self.OPERATOR_NOT_EQUALS, self.OPERATOR_STARTSWITH]): + return 0, False, "", {"message": f"Unexpected operator in filter condition: {current_query}."} + elif self.OPERATOR_EQUALS in predicate: + col, val = predicate.split(self.OPERATOR_EQUALS, 1) + current_columns.append(col.strip()) + current_operators.append("=") + current_values.append(val.strip()) + else: + return 0, False, "", {"message": f"Unexpected operator in filter condition: {current_query}."} - # Validate query columns are ok - current_columns = [x.split("=")[0] for x in current_query] if set(current_columns) != set(self.filter_columns): - return 0, False, "", {"message": "Incorrect filter columns."} + return 0, False, "", {"message": f"Incorrect filter columns: {set(current_columns)}. Expected: {set(self.filter_columns)}."} # Validate query values are ok # This is the tricky part because we need to expand the values to their display values # We also need to handle the case where the value is a reference - current_values = [x.split("=")[1] for x in current_query] # Handle filtering across multiple rows if len(set(current_columns)) < len(current_columns): @@ -856,9 +884,9 @@ def validate( # Validate the values if set(current_values) != set(self.filter_values): - return 0, False, "", {"message": "Incorrect filter values."} + return 0, False, "", {"message": f"Incorrect filter values {set(current_values)}. Expected: {set(self.filter_values)}."} - return 1, True, "Nice work, thank you!", {"message": "Correct filter."} + return 1, True, "Nice work, thank you!", {"message": f"Correct filter: {list_info["query"]}."} class ExtractListInfoTask(ServiceNowListTask): diff --git a/tests/test_filter_list_task.py b/tests/test_filter_list_task.py new file mode 100644 index 0000000..18b7dd7 --- /dev/null +++ b/tests/test_filter_list_task.py @@ -0,0 +1,78 @@ +import pytest +from playwright.sync_api import Page +from browsergym.workarena.tasks.list import FilterIncidentListTask + + +# These are all the same ways to express the same filter query (empty string) in ServiceNow. +@pytest.mark.parametrize( + "query", + [ + "assigned_toEMPTYSTRING^short_descriptionISEMPTY^description=This is a beautiful incident", + "assigned_toISEMPTY^short_descriptionEMPTYSTRING^description=This is a beautiful incident^", + "assigned_toISEMPTY^short_descriptionISEMPTY^description=This is a beautiful incident", + "assigned_toEMPTYSTRING^short_descriptionEMPTYSTRING^description=This is a beautiful incident^", + "assigned_to=^short_description=^description=This is a beautiful incident", + ], +) +@pytest.mark.slow +def test_validate_filter_list_task(page: Page, query): + fixed_config = { + "filter_columns": [ + "short_description", + "assigned_to", + "description", + ], + "filter_kind": "AND", + "filter_values": [ + "", + "", + "This is a beautiful incident", + ], + } + task = FilterIncidentListTask(seed=1, fixed_config=fixed_config) + _, _ = task.setup(page=page) + query = query.replace("^", r"%5E").replace("=", r"%3D") + task.page.goto( + task.instance.snow_url + + rf"/now/nav/ui/classic/params/target/incident_list.do?sysparm_query={query}" + ) + reward, done, _, info = task.validate(page, []) + task.teardown() + assert done is True and reward == 1.0 and "Correct filter" in info["message"] + + +# Different ways in which the filter is wrong +@pytest.mark.parametrize( + "query, expected_message", + [ + ("", "There are no filters yet"), + ("assignment_groupEMPTYSTRING", "Incorrect number of filter conditions"), + ("assigned_toEMPTYSTRING^short_description!=Description", "Unexpected operator in filter condition"), + ("assigned_toEMPTYSTRING^short_description=Description", "Incorrect filter columns"), + ("assigned_toISEMPTY^description=My Description", "Incorrect filter values"), + ], +) +@pytest.mark.slow +def test_invalid_filter_list_task(page: Page, query, expected_message): + fixed_config = { + "filter_columns": [ + "assigned_to", + "description", + ], + "filter_kind": "AND", + "filter_values": [ + "", + "Description", + ], + } + task = FilterIncidentListTask(seed=1, fixed_config=fixed_config) + _, _ = task.setup(page=page) + query = query.replace("^", r"%5E").replace("=", r"%3D") + task.page.goto( + task.instance.snow_url + + f"/now/nav/ui/classic/params/target/incident_list.do?sysparm_query={query}" + ) + reward, done, _, info = task.validate(page, []) + task.teardown() + print(info["message"]) + assert done is False and reward == 0.0 and expected_message in info["message"] From 1df827fda3eac693ca1ed77731c50b7407a881aa Mon Sep 17 00:00:00 2001 From: Orlando Marquez Date: Wed, 11 Jun 2025 13:43:11 -0400 Subject: [PATCH 2/2] Apply black formatting --- src/browsergym/workarena/tasks/list.py | 58 ++++++++++++++++++++++---- tests/test_filter_list_task.py | 7 +++- 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/src/browsergym/workarena/tasks/list.py b/src/browsergym/workarena/tasks/list.py index d628eaf..1da22ad 100644 --- a/src/browsergym/workarena/tasks/list.py +++ b/src/browsergym/workarena/tasks/list.py @@ -797,14 +797,24 @@ def validate( current_sep = "^" if current_kind != self.filter_kind: - return 0, False, "", {"message": f"The kind of filter used is incorrect: {current_query}."} + return ( + 0, + False, + "", + {"message": f"The kind of filter used is incorrect: {current_query}."}, + ) # Extract the query pieces for validation current_query = current_query.split(current_sep) # Validate query length is ok if len(current_query) != self.filter_len: - return 0, False, "", {"message": f"Incorrect number of filter conditions: {current_query}."} + return ( + 0, + False, + "", + {"message": f"Incorrect number of filter conditions: {current_query}."}, + ) # Parse column names, operators, and values current_columns, current_operators, current_values = [], [], [] @@ -819,18 +829,38 @@ def validate( current_columns.append(predicate.replace(self.OPERATOR_ISEMPTY, "").strip()) current_operators.append("=") current_values.append("") - elif any(unsupported_operator in predicate for unsupported_operator in [self.OPERATOR_NOT_EQUALS, self.OPERATOR_STARTSWITH]): - return 0, False, "", {"message": f"Unexpected operator in filter condition: {current_query}."} + elif any( + unsupported_operator in predicate + for unsupported_operator in [self.OPERATOR_NOT_EQUALS, self.OPERATOR_STARTSWITH] + ): + return ( + 0, + False, + "", + {"message": f"Unexpected operator in filter condition: {current_query}."}, + ) elif self.OPERATOR_EQUALS in predicate: col, val = predicate.split(self.OPERATOR_EQUALS, 1) current_columns.append(col.strip()) current_operators.append("=") current_values.append(val.strip()) else: - return 0, False, "", {"message": f"Unexpected operator in filter condition: {current_query}."} + return ( + 0, + False, + "", + {"message": f"Unexpected operator in filter condition: {current_query}."}, + ) if set(current_columns) != set(self.filter_columns): - return 0, False, "", {"message": f"Incorrect filter columns: {set(current_columns)}. Expected: {set(self.filter_columns)}."} + return ( + 0, + False, + "", + { + "message": f"Incorrect filter columns: {set(current_columns)}. Expected: {set(self.filter_columns)}." + }, + ) # Validate query values are ok # This is the tricky part because we need to expand the values to their display values @@ -884,9 +914,21 @@ def validate( # Validate the values if set(current_values) != set(self.filter_values): - return 0, False, "", {"message": f"Incorrect filter values {set(current_values)}. Expected: {set(self.filter_values)}."} + return ( + 0, + False, + "", + { + "message": f"Incorrect filter values {set(current_values)}. Expected: {set(self.filter_values)}." + }, + ) - return 1, True, "Nice work, thank you!", {"message": f"Correct filter: {list_info["query"]}."} + return ( + 1, + True, + "Nice work, thank you!", + {"message": f"Correct filter: {list_info['query']}."}, + ) class ExtractListInfoTask(ServiceNowListTask): diff --git a/tests/test_filter_list_task.py b/tests/test_filter_list_task.py index 18b7dd7..f528e74 100644 --- a/tests/test_filter_list_task.py +++ b/tests/test_filter_list_task.py @@ -16,7 +16,7 @@ ) @pytest.mark.slow def test_validate_filter_list_task(page: Page, query): - fixed_config = { + fixed_config = { "filter_columns": [ "short_description", "assigned_to", @@ -47,7 +47,10 @@ def test_validate_filter_list_task(page: Page, query): [ ("", "There are no filters yet"), ("assignment_groupEMPTYSTRING", "Incorrect number of filter conditions"), - ("assigned_toEMPTYSTRING^short_description!=Description", "Unexpected operator in filter condition"), + ( + "assigned_toEMPTYSTRING^short_description!=Description", + "Unexpected operator in filter condition", + ), ("assigned_toEMPTYSTRING^short_description=Description", "Incorrect filter columns"), ("assigned_toISEMPTY^description=My Description", "Incorrect filter values"), ],