Skip to content

Commit 4b1562c

Browse files
Revert "chore(similarity): Do not send over 30 system-only frames to seer (#80946)"
This reverts commit 5b7dad4. Co-authored-by: jangjodi <[email protected]>
1 parent 65fcca9 commit 4b1562c

File tree

7 files changed

+16
-215
lines changed

7 files changed

+16
-215
lines changed

src/sentry/grouping/ingest/seer.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@
1616
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
1717
from sentry.seer.similarity.types import SimilarIssuesEmbeddingsRequest
1818
from sentry.seer.similarity.utils import (
19-
ReferrerOptions,
2019
event_content_is_seer_eligible,
2120
filter_null_from_string,
22-
get_stacktrace_string_with_metrics,
21+
get_stacktrace_string,
2322
killswitch_enabled,
2423
)
2524
from sentry.utils import metrics
@@ -188,17 +187,14 @@ def get_seer_similar_issues(
188187
should go in (if any), or None if no neighbor was near enough.
189188
"""
190189
event_hash = event.get_primary_hash()
191-
stacktrace_string = get_stacktrace_string_with_metrics(
192-
get_grouping_info_from_variants(variants), event.platform, ReferrerOptions.INGEST
193-
)
190+
stacktrace_string = get_stacktrace_string(get_grouping_info_from_variants(variants))
194191
exception_type = get_path(event.data, "exception", "values", -1, "type")
195192

196193
request_data: SimilarIssuesEmbeddingsRequest = {
197194
"event_id": event.event_id,
198195
"hash": event_hash,
199196
"project_id": event.project.id,
200-
# TODO: remove this once we do the stacktrace null check
201-
"stacktrace": stacktrace_string if stacktrace_string else "",
197+
"stacktrace": stacktrace_string,
202198
"exception_type": filter_null_from_string(exception_type) if exception_type else None,
203199
"k": num_neighbors,
204200
"referrer": "ingest",

src/sentry/issues/endpoints/group_similar_issues_embeddings.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
1919
from sentry.seer.similarity.types import SeerSimilarIssueData, SimilarIssuesEmbeddingsRequest
2020
from sentry.seer.similarity.utils import (
21-
TooManyOnlySystemFramesException,
2221
event_content_has_stacktrace,
2322
get_stacktrace_string,
2423
killswitch_enabled,
@@ -83,12 +82,9 @@ def get(self, request: Request, group) -> Response:
8382
stacktrace_string = ""
8483
if latest_event and event_content_has_stacktrace(latest_event):
8584
grouping_info = get_grouping_info(None, project=group.project, event=latest_event)
86-
try:
87-
stacktrace_string = get_stacktrace_string(grouping_info)
88-
except TooManyOnlySystemFramesException:
89-
stacktrace_string = ""
85+
stacktrace_string = get_stacktrace_string(grouping_info)
9086

91-
if not stacktrace_string or not latest_event:
87+
if stacktrace_string == "" or not latest_event:
9288
return Response([]) # No exception, stacktrace or in-app frames, or event
9389

9490
similar_issues_params: SimilarIssuesEmbeddingsRequest = {

src/sentry/seer/similarity/utils.py

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import logging
2-
from enum import StrEnum
32
from typing import Any, TypeVar
43

54
from sentry import options
@@ -152,15 +151,6 @@
152151
]
153152

154153

155-
class ReferrerOptions(StrEnum):
156-
INGEST = "ingest"
157-
BACKFILL = "backfill"
158-
159-
160-
class TooManyOnlySystemFramesException(Exception):
161-
pass
162-
163-
164154
def _get_value_if_exists(exception_value: dict[str, Any]) -> str:
165155
return exception_value["values"][0] if exception_value.get("values") else ""
166156

@@ -187,7 +177,6 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
187177

188178
frame_count = 0
189179
html_frame_count = 0 # for a temporary metric
190-
is_frames_truncated = False
191180
stacktrace_str = ""
192181
found_non_snipped_context_line = False
193182

@@ -196,15 +185,12 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
196185
def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
197186
nonlocal frame_count
198187
nonlocal html_frame_count
199-
nonlocal is_frames_truncated
200188
nonlocal found_non_snipped_context_line
201189
frame_strings = []
202190

203191
contributing_frames = [
204192
frame for frame in frames if frame.get("id") == "frame" and frame.get("contributes")
205193
]
206-
if len(contributing_frames) + frame_count > MAX_FRAME_COUNT:
207-
is_frames_truncated = True
208194
contributing_frames = _discard_excess_frames(
209195
contributing_frames, MAX_FRAME_COUNT, frame_count
210196
)
@@ -269,8 +255,6 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
269255
exc_value = _get_value_if_exists(exception_value)
270256
elif exception_value.get("id") == "stacktrace" and frame_count < MAX_FRAME_COUNT:
271257
frame_strings = _process_frames(exception_value["values"])
272-
if is_frames_truncated and not app_hash:
273-
raise TooManyOnlySystemFramesException
274258
# Only exceptions have the type and value properties, so we don't need to handle the threads
275259
# case here
276260
header = f"{exc_type}: {exc_value}\n" if exception["id"] == "exception" else ""
@@ -306,31 +290,6 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
306290
return stacktrace_str.strip()
307291

308292

309-
def get_stacktrace_string_with_metrics(
310-
data: dict[str, Any], platform: str | None, referrer: ReferrerOptions
311-
) -> str | None:
312-
try:
313-
stacktrace_string = get_stacktrace_string(data)
314-
except TooManyOnlySystemFramesException:
315-
platform = platform if platform else "unknown"
316-
metrics.incr(
317-
"grouping.similarity.over_threshold_only_system_frames",
318-
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
319-
tags={"platform": platform, "referrer": referrer},
320-
)
321-
if referrer == ReferrerOptions.INGEST:
322-
metrics.incr(
323-
"grouping.similarity.did_call_seer",
324-
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
325-
tags={
326-
"call_made": False,
327-
"blocker": "over-threshold-only-system-frames",
328-
},
329-
)
330-
stacktrace_string = None
331-
return stacktrace_string
332-
333-
334293
def event_content_has_stacktrace(event: Event) -> bool:
335294
# If an event has no stacktrace, there's no data for Seer to analyze, so no point in making the
336295
# API call. If we ever start analyzing message-only events, we'll need to add `event.title in

src/sentry/tasks/embeddings_grouping/utils.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,9 @@
3232
SimilarHashNotFoundError,
3333
)
3434
from sentry.seer.similarity.utils import (
35-
ReferrerOptions,
3635
event_content_has_stacktrace,
3736
filter_null_from_string,
38-
get_stacktrace_string_with_metrics,
37+
get_stacktrace_string,
3938
)
4039
from sentry.snuba.dataset import Dataset
4140
from sentry.snuba.referrer import Referrer
@@ -356,10 +355,8 @@ def get_events_from_nodestore(
356355
event._project_cache = project
357356
if event and event.data and event_content_has_stacktrace(event):
358357
grouping_info = get_grouping_info(None, project=project, event=event)
359-
stacktrace_string = get_stacktrace_string_with_metrics(
360-
grouping_info, event.platform, ReferrerOptions.BACKFILL
361-
)
362-
if not stacktrace_string:
358+
stacktrace_string = get_stacktrace_string(grouping_info)
359+
if stacktrace_string == "":
363360
invalid_event_group_ids.append(group_id)
364361
continue
365362
primary_hash = event.get_primary_hash()

tests/sentry/grouping/ingest/test_seer.py

Lines changed: 1 addition & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
from dataclasses import asdict
22
from time import time
3-
from unittest.mock import MagicMock, Mock, call, patch
3+
from unittest.mock import MagicMock, patch
44

5-
from sentry import options
65
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
76
from sentry.eventstore.models import Event
87
from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping
98
from sentry.models.grouphash import GroupHash
109
from sentry.seer.similarity.types import SeerSimilarIssueData
11-
from sentry.seer.similarity.utils import MAX_FRAME_COUNT
1210
from sentry.testutils.cases import TestCase
1311
from sentry.testutils.helpers.eventprocessing import save_new_event
1412
from sentry.testutils.helpers.options import override_options
@@ -263,54 +261,3 @@ def test_returns_no_grouphash_and_empty_metadata_if_no_similar_group_found(self)
263261
expected_metadata,
264262
None,
265263
)
266-
267-
@patch("sentry.seer.similarity.utils.metrics")
268-
def test_too_many_only_system_frames(self, mock_metrics: Mock) -> None:
269-
type = "FailedToFetchError"
270-
value = "Charlie didn't bring the ball back"
271-
context_line = f"raise {type}('{value}')"
272-
new_event = Event(
273-
project_id=self.project.id,
274-
event_id="22312012112120120908201304152013",
275-
data={
276-
"title": f"{type}('{value}')",
277-
"exception": {
278-
"values": [
279-
{
280-
"type": type,
281-
"value": value,
282-
"stacktrace": {
283-
"frames": [
284-
{
285-
"function": f"play_fetch_{i}",
286-
"filename": f"dogpark{i}.py",
287-
"context_line": context_line,
288-
}
289-
for i in range(MAX_FRAME_COUNT + 1)
290-
]
291-
},
292-
}
293-
]
294-
},
295-
"platform": "python",
296-
},
297-
)
298-
get_seer_similar_issues(new_event, new_event.get_grouping_variants())
299-
300-
sample_rate = options.get("seer.similarity.metrics_sample_rate")
301-
expected_metrics_calls = [
302-
call(
303-
"grouping.similarity.over_threshold_only_system_frames",
304-
sample_rate=sample_rate,
305-
tags={"platform": "python", "referrer": "ingest"},
306-
),
307-
call(
308-
"grouping.similarity.did_call_seer",
309-
sample_rate=1.0,
310-
tags={
311-
"call_made": False,
312-
"blocker": "over-threshold-only-system-frames",
313-
},
314-
),
315-
]
316-
assert mock_metrics.incr.call_args_list == expected_metrics_calls

tests/sentry/seer/similarity/test_utils.py

Lines changed: 6 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,10 @@
33
from typing import Any, Literal, cast
44
from uuid import uuid1
55

6-
import pytest
7-
86
from sentry.eventstore.models import Event
97
from sentry.seer.similarity.utils import (
108
BASE64_ENCODED_PREFIXES,
11-
MAX_FRAME_COUNT,
129
SEER_ELIGIBLE_PLATFORMS,
13-
TooManyOnlySystemFramesException,
1410
_is_snipped_context_line,
1511
event_content_is_seer_eligible,
1612
filter_null_from_string,
@@ -674,18 +670,18 @@ def test_chained_too_many_frames_minified_js_frame_limit(self):
674670
)
675671

676672
def test_chained_too_many_exceptions(self):
677-
"""Test that we restrict number of chained exceptions to MAX_FRAME_COUNT."""
673+
"""Test that we restrict number of chained exceptions to 30."""
678674
data_chained_exception = copy.deepcopy(self.CHAINED_APP_DATA)
679675
data_chained_exception["app"]["component"]["values"][0]["values"] = [
680676
self.create_exception(
681677
exception_type_str="Exception",
682678
exception_value=f"exception {i} message!",
683679
frames=self.create_frames(num_frames=1, context_line_factory=lambda i: f"line {i}"),
684680
)
685-
for i in range(1, MAX_FRAME_COUNT + 2)
681+
for i in range(1, 32)
686682
]
687683
stacktrace_str = get_stacktrace_string(data_chained_exception)
688-
for i in range(2, MAX_FRAME_COUNT + 2):
684+
for i in range(2, 32):
689685
assert f"exception {i} message!" in stacktrace_str
690686
assert "exception 1 message!" not in stacktrace_str
691687

@@ -714,35 +710,9 @@ def test_no_app_no_system(self):
714710
stacktrace_str = get_stacktrace_string(data)
715711
assert stacktrace_str == ""
716712

717-
def test_too_many_system_frames_single_exception(self):
718-
data_system = copy.deepcopy(self.BASE_APP_DATA)
719-
data_system["system"] = data_system.pop("app")
720-
data_system["system"]["component"]["values"][0]["values"][0][
721-
"values"
722-
] += self.create_frames(MAX_FRAME_COUNT + 1, True)
723-
724-
with pytest.raises(TooManyOnlySystemFramesException):
725-
get_stacktrace_string(data_system)
726-
727-
def test_too_many_system_frames_chained_exception(self):
728-
data_system = copy.deepcopy(self.CHAINED_APP_DATA)
729-
data_system["system"] = data_system.pop("app")
730-
# Split MAX_FRAME_COUNT across the two exceptions
731-
data_system["system"]["component"]["values"][0]["values"][0]["values"][0][
732-
"values"
733-
] += self.create_frames(MAX_FRAME_COUNT // 2, True)
734-
data_system["system"]["component"]["values"][0]["values"][1]["values"][0][
735-
"values"
736-
] += self.create_frames(MAX_FRAME_COUNT // 2, True)
737-
738-
with pytest.raises(TooManyOnlySystemFramesException):
739-
get_stacktrace_string(data_system)
713+
def test_over_30_contributing_frames(self):
714+
"""Check that when there are over 30 contributing frames, the last 30 are included."""
740715

741-
def test_too_many_in_app_contributing_frames(self):
742-
"""
743-
Check that when there are over MAX_FRAME_COUNT contributing frames, the last MAX_FRAME_COUNT
744-
are included.
745-
"""
746716
data_frames = copy.deepcopy(self.BASE_APP_DATA)
747717
# Create 30 contributing frames, 1-20 -> last 10 should be included
748718
data_frames["app"]["component"]["values"][0]["values"][0]["values"] = self.create_frames(
@@ -769,7 +739,7 @@ def test_too_many_in_app_contributing_frames(self):
769739
for i in range(41, 61):
770740
num_frames += 1
771741
assert ("test = " + str(i) + "!") in stacktrace_str
772-
assert num_frames == MAX_FRAME_COUNT
742+
assert num_frames == 30
773743

774744
def test_too_many_frames_minified_js_frame_limit(self):
775745
"""Test that we restrict fully-minified stacktraces to 20 frames, and all other stacktraces to 30 frames."""

0 commit comments

Comments
 (0)