Skip to content

Commit 79eccfc

Browse files
authored
fix(recommended-event): Add subquery to limit ordering to last 1000 events (#97792)
When there is a very large number of recent events, the recommended event query will time out. The current logic is: - Query for events in the past 7 days - Sort by a number of properties - Return the top result This PR adds an `inner_limit` option to `get_events_snql()` that will create a initial subquery for the most recent n events before applying the sort order on top of it. The recommended event query supplies `inner_limit=1000` which should retain similar behavior and be much more performant when there are a large number of events.
1 parent 2f6e95b commit 79eccfc

File tree

5 files changed

+144
-21
lines changed

5 files changed

+144
-21
lines changed

src/sentry/models/group.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ def get_recommended_event(
314314
referrer="Group.get_helpful",
315315
dataset=dataset,
316316
tenant_ids={"organization_id": group.project.organization_id},
317+
inner_limit=1000,
317318
)
318319

319320
if events:

src/sentry/services/eventstore/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ def get_events_snql(
191191
conditions: Sequence[Condition],
192192
orderby: Sequence[str],
193193
limit: int = 100,
194+
inner_limit: int | None = None,
194195
offset: int = 0,
195196
referrer: str = "eventstore.get_events_snql",
196197
dataset: Dataset = Dataset.Events,

src/sentry/services/eventstore/snuba/backend.py

Lines changed: 57 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def get_events_snql(
7575
conditions: Sequence[Condition],
7676
orderby: Sequence[str],
7777
limit: int = DEFAULT_LIMIT,
78+
inner_limit: int | None = None,
7879
offset: int = DEFAULT_OFFSET,
7980
referrer: str = "eventstore.get_events_snql",
8081
dataset: Dataset = Dataset.Events,
@@ -83,6 +84,7 @@ def get_events_snql(
8384
cols = self.__get_columns(dataset)
8485

8586
resolved_order_by = []
87+
order_by_col_names: set[str] = set()
8688
for order_field_alias in orderby:
8789
if order_field_alias.startswith("-"):
8890
direction = Direction.DESC
@@ -91,6 +93,7 @@ def get_events_snql(
9193
direction = Direction.ASC
9294
resolved_column_or_none = DATASETS[dataset].get(order_field_alias)
9395
if resolved_column_or_none:
96+
order_by_col_names.add(resolved_column_or_none)
9497
# special-case handling for nullable column values and proper ordering based on direction
9598
# null values are always last in the sort order regardless of Desc or Asc ordering
9699
if order_field_alias == Columns.NUM_PROCESSING_ERRORS.value.alias:
@@ -131,25 +134,61 @@ def get_events_snql(
131134
[group_id],
132135
)
133136

134-
snql_request = Request(
135-
dataset=dataset.value,
136-
app_id="eventstore",
137-
query=Query(
138-
match=Entity(dataset.value),
139-
select=[Column(col) for col in cols],
140-
where=[
141-
Condition(
142-
Column(DATASETS[dataset][Columns.TIMESTAMP.value.alias]), Op.GTE, start
137+
match_entity = Entity(dataset.value)
138+
event_filters = [
139+
Condition(Column(DATASETS[dataset][Columns.TIMESTAMP.value.alias]), Op.GTE, start),
140+
Condition(Column(DATASETS[dataset][Columns.TIMESTAMP.value.alias]), Op.LT, end),
141+
] + list(conditions)
142+
143+
common_request_kwargs = {
144+
"app_id": "eventstore",
145+
"dataset": dataset.value,
146+
"tenant_ids": tenant_ids or dict(),
147+
}
148+
149+
common_query_kwargs = {
150+
"select": [Column(col) for col in cols],
151+
"orderby": resolved_order_by,
152+
"limit": Limit(limit),
153+
"offset": Offset(offset),
154+
}
155+
156+
# If inner_limit provided, first limit to the most recent N rows, then apply final ordering
157+
# and pagination on top of that subquery.
158+
if inner_limit and inner_limit > 0:
159+
select_and_orderby_cols = set(cols) | order_by_col_names
160+
inner_query = Query(
161+
match=match_entity,
162+
select=[Column(col) for col in select_and_orderby_cols],
163+
where=event_filters,
164+
orderby=[
165+
OrderBy(
166+
Column(DATASETS[dataset][Columns.TIMESTAMP.value.alias]),
167+
direction=Direction.DESC,
143168
),
144-
Condition(Column(DATASETS[dataset][Columns.TIMESTAMP.value.alias]), Op.LT, end),
145-
]
146-
+ list(conditions),
147-
orderby=resolved_order_by,
148-
limit=Limit(limit),
149-
offset=Offset(offset),
150-
),
151-
tenant_ids=tenant_ids or dict(),
152-
)
169+
OrderBy(
170+
Column(DATASETS[dataset][Columns.EVENT_ID.value.alias]),
171+
direction=Direction.DESC,
172+
),
173+
],
174+
limit=Limit(inner_limit),
175+
)
176+
177+
outer_query = Query(
178+
**common_query_kwargs,
179+
match=inner_query,
180+
)
181+
182+
snql_request = Request(**common_request_kwargs, query=outer_query)
183+
else:
184+
snql_request = Request(
185+
**common_request_kwargs,
186+
query=Query(
187+
**common_query_kwargs,
188+
match=match_entity,
189+
where=event_filters,
190+
),
191+
)
153192

154193
result = raw_snql_query(snql_request, referrer, use_cache=False)
155194

src/sentry/testutils/pytest/metrics.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from unittest import mock
55

66
import pytest
7-
from snuba_sdk import Entity, Join
7+
from snuba_sdk import Entity, Join, Query
88

99
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
1010

@@ -70,7 +70,11 @@ def new_build_results(*args, **kwargs):
7070
query = args[0][0].request.query
7171
is_performance_metrics = False
7272
is_metrics = False
73-
if not isinstance(query, MetricsQuery) and not isinstance(query.match, Join):
73+
if (
74+
not isinstance(query, MetricsQuery)
75+
and not isinstance(query.match, Join)
76+
and not isinstance(query.match, Query)
77+
):
7478
is_performance_metrics = query.match.name.startswith("generic")
7579
is_metrics = "metrics" in query.match.name
7680

@@ -93,7 +97,7 @@ def new_create_snql_in_snuba(
9397
query = snql_query.query
9498
is_performance_metrics = False
9599
is_metrics = False
96-
if isinstance(query.match, Entity):
100+
if isinstance(query.match, Entity) and not isinstance(query.match, Query):
97101
is_performance_metrics = query.match.name.startswith("generic")
98102
is_metrics = "metrics" in query.match.name
99103

tests/sentry/services/eventstore/snuba/test_backend.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from sentry.services.eventstore.base import Filter
88
from sentry.services.eventstore.models import Event
99
from sentry.services.eventstore.snuba.backend import SnubaEventStorage
10+
from sentry.snuba.dataset import Dataset
1011
from sentry.testutils.cases import PerformanceIssueTestCase, SnubaTestCase, TestCase
1112
from sentry.testutils.helpers.datetime import before_now
1213
from sentry.utils import snuba
@@ -198,6 +199,83 @@ def test_get_event_by_id_cached(self) -> None:
198199
assert event.project_id == self.project2.id
199200
assert event.group_id == event.group.id
200201

202+
def test_get_events_snql_with_inner_limit(self) -> None:
203+
project = self.create_project()
204+
ts_old = before_now(minutes=6).isoformat()
205+
ts_mid = before_now(minutes=5).isoformat()
206+
ts_new = before_now(minutes=4).isoformat()
207+
208+
event_oldest = self.store_event(
209+
data={
210+
"event_id": "1" * 32,
211+
"type": "default",
212+
"platform": "python",
213+
"fingerprint": ["inner-limit-group"],
214+
"timestamp": ts_old,
215+
},
216+
project_id=project.id,
217+
)
218+
event_middle = self.store_event(
219+
data={
220+
"event_id": "2" * 32,
221+
"type": "default",
222+
"platform": "python",
223+
"fingerprint": ["inner-limit-group"],
224+
"timestamp": ts_mid,
225+
},
226+
project_id=project.id,
227+
)
228+
event_newest = self.store_event(
229+
data={
230+
"event_id": "3" * 32,
231+
"type": "default",
232+
"platform": "python",
233+
"fingerprint": ["inner-limit-group"],
234+
"timestamp": ts_new,
235+
},
236+
project_id=project.id,
237+
)
238+
239+
group_id = event_oldest.group_id
240+
assert group_id is not None
241+
eventstore = SnubaEventStorage()
242+
243+
# Ascending order should return oldest first when no inner limit is used
244+
no_inner = eventstore.get_events_snql(
245+
organization_id=project.organization.id,
246+
group_id=group_id,
247+
start=before_now(minutes=10),
248+
end=before_now(minutes=0),
249+
conditions=[
250+
Condition(Column("project_id"), Op.IN, [project.id]),
251+
Condition(Column("group_id"), Op.IN, [group_id]),
252+
],
253+
orderby=["timestamp", "event_id"],
254+
dataset=Dataset.Events,
255+
tenant_ids={"organization_id": project.organization.id},
256+
limit=2,
257+
)
258+
assert [e.event_id for e in no_inner] == [event_oldest.event_id, event_middle.event_id]
259+
260+
# With inner_limit=2 we first query for the two most recent events, THEN apply sorting
261+
# The result of which should be [event_middle, event_newest]
262+
with_inner = eventstore.get_events_snql(
263+
organization_id=project.organization.id,
264+
group_id=group_id,
265+
start=before_now(minutes=10),
266+
end=before_now(minutes=0),
267+
conditions=[
268+
Condition(Column("project_id"), Op.IN, [project.id]),
269+
Condition(Column("group_id"), Op.IN, [group_id]),
270+
],
271+
orderby=["timestamp", "event_id"],
272+
dataset=Dataset.Events,
273+
tenant_ids={"organization_id": project.organization.id},
274+
limit=2,
275+
inner_limit=2,
276+
)
277+
assert [e.event_id for e in with_inner] == [event_middle.event_id, event_newest.event_id]
278+
201279
def test_get_event_beyond_retention(self) -> None:
202280
event = self.store_event(
203281
data={

0 commit comments

Comments
 (0)