Skip to content

Commit 9255afa

Browse files
feat(issue-search): Check max_candidates to push Postgres fields to post-filtering (#74531)
When applying query filters from Postgres, pre-filtering the results before passing the query to Snuba can break if it hits the max size that ClickHouse is able to process ([sentry issue](https://sentry.sentry.io/issues/5419972487/events/e14ff2501904443eb1c38f7600509b4f/?project=1&referrer=slack)). This also manifests as a possible [bug](https://sentry.sentry.io/issues/5419971969/events/9f65f1ff493a47858d91c2f9951e59a7/?referrer=issue_details.related_trace_issue) in the serializer, though that's more of a red herring since ClickHouse won't be able to handle the query either way. To work around this, I'm applying the pattern used in the `PostgresSnubaQueryExecutor` to determine whether to pre- or post-filter the results based on the number of group_ids in the filter. If the number of groups in the pre-filter exceeds the `max_candidates`, we'll apply the filter after the Snuba query in a post-filtering step right before the pagination happens. The tests confirm that the `is:linked` and `is:unlinked` queries work as a pre- and post-filter dependent on the value configured in `snuba.search.max-pre-snuba-candidates`. --------- Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
1 parent 65eafed commit 9255afa

File tree

2 files changed

+67
-1
lines changed

2 files changed

+67
-1
lines changed

src/sentry/search/snuba/executors.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1659,8 +1659,15 @@ def query(
16591659
# Check if any search filters are in POSTGRES_ONLY_SEARCH_FIELDS
16601660
search_filters = search_filters or ()
16611661
group_ids_to_pass_to_snuba = None
1662+
too_many_candidates = False
16621663
if any(sf.key.name in POSTGRES_ONLY_SEARCH_FIELDS for sf in search_filters):
1663-
group_ids_to_pass_to_snuba = list(group_queryset.values_list("id", flat=True))
1664+
max_candidates = options.get("snuba.search.max-pre-snuba-candidates")
1665+
group_ids_to_pass_to_snuba = list(
1666+
group_queryset.using_replica().values_list("id", flat=True)[: max_candidates + 1]
1667+
)
1668+
if too_many_candidates := (len(group_ids_to_pass_to_snuba) > max_candidates):
1669+
metrics.incr("snuba.search.too_many_candidates", skip_internal=False)
1670+
group_ids_to_pass_to_snuba = None
16641671

16651672
# remove the search filters that are only for postgres
16661673
search_filters = [
@@ -1692,6 +1699,7 @@ def query(
16921699
Condition(Column("timestamp", joined_entity), Op.GTE, start),
16931700
Condition(Column("timestamp", joined_entity), Op.LT, end),
16941701
]
1702+
16951703
having = []
16961704
# if we need to prefetch from postgres, we add filter by the group ids
16971705
if group_ids_to_pass_to_snuba is not None:
@@ -1838,6 +1846,15 @@ def query(
18381846
count += bulk_result[k]["data"][0]["count"]
18391847
k += 1
18401848

1849+
if too_many_candidates:
1850+
# If we had too many candidates to reasonably pass down to snuba,
1851+
# we need to apply the Postgres filter as a post-filtering step.
1852+
filtered_group_ids = group_queryset.filter(
1853+
id__in=[group["g.group_id"] for group in data]
1854+
).values_list("id", flat=True)
1855+
1856+
data = [group for group in data if group["g.group_id"] in filtered_group_ids]
1857+
18411858
paginator_results = SequencePaginator(
18421859
[(row[self.sort_strategies[sort_by]], row["g.group_id"]) for row in data],
18431860
reverse=True,

tests/sentry/issues/endpoints/test_organization_group_index.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3070,6 +3070,55 @@ def test_snuba_query_first_release_with_environments(self, mock_query: MagicMock
30703070
assert len(response.data) == len(expected_groups)
30713071
assert {int(r["id"]) for r in response.data} == set(expected_groups)
30723072

3073+
@patch(
3074+
"sentry.search.snuba.executors.GroupAttributesPostgresSnubaQueryExecutor.query",
3075+
side_effect=GroupAttributesPostgresSnubaQueryExecutor.query,
3076+
autospec=True,
3077+
)
3078+
@override_options({"issues.group_attributes.send_kafka": True})
3079+
def test_snuba_query_unlinked(self, mock_query: MagicMock) -> None:
3080+
self.project = self.create_project(organization=self.organization)
3081+
event1 = self.store_event(
3082+
data={"fingerprint": ["group-1"], "message": "MyMessage"},
3083+
project_id=self.project.id,
3084+
)
3085+
event2 = self.store_event(
3086+
data={"fingerprint": ["group-2"], "message": "AnotherMessage"},
3087+
project_id=self.project.id,
3088+
)
3089+
PlatformExternalIssue.objects.create(project_id=self.project.id, group_id=event1.group.id)
3090+
self.external_issue = ExternalIssue.objects.create(
3091+
organization_id=self.organization.id, integration_id=self.integration.id, key="123"
3092+
)
3093+
GroupLink.objects.create(
3094+
project_id=self.project.id,
3095+
group_id=event1.group.id,
3096+
linked_type=GroupLink.LinkedType.issue,
3097+
linked_id=self.external_issue.id,
3098+
)
3099+
3100+
self.login_as(user=self.user)
3101+
# give time for consumers to run and propogate changes to clickhouse
3102+
sleep(1)
3103+
3104+
for value in [0, 5]:
3105+
with override_options({"snuba.search.max-pre-snuba-candidates": value}):
3106+
response = self.get_success_response(
3107+
sort="new",
3108+
useGroupSnubaDataset=1,
3109+
query="is:linked",
3110+
)
3111+
assert len(response.data) == 1
3112+
assert int(response.data[0]["id"]) == event1.group.id
3113+
3114+
response = self.get_success_response(
3115+
sort="new",
3116+
useGroupSnubaDataset=1,
3117+
query="is:unlinked",
3118+
)
3119+
assert len(response.data) == 1
3120+
assert int(response.data[0]["id"]) == event2.group.id
3121+
30733122
@patch(
30743123
"sentry.search.snuba.executors.GroupAttributesPostgresSnubaQueryExecutor.query",
30753124
side_effect=GroupAttributesPostgresSnubaQueryExecutor.query,

0 commit comments

Comments
 (0)