Skip to content

Commit 19c92a4

Browse files
authored
ref(similarity): Make message_distance optional from seer response (#78346)
Make message_distance optional from seer response Remove message distance value from internal similar issues tab
1 parent b7485c2 commit 19c92a4

File tree

4 files changed

+33
-34
lines changed

4 files changed

+33
-34
lines changed

src/sentry/issues/endpoints/group_similar_issues_embeddings.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626

2727
class FormattedSimilarIssuesEmbeddingsData(TypedDict):
2828
exception: float
29-
message: float
3029
shouldBeGrouped: str
3130

3231

@@ -49,7 +48,6 @@ def get_formatted_results(
4948
group_data = {}
5049
for similar_issue_data in similar_issues_data:
5150
formatted_response: FormattedSimilarIssuesEmbeddingsData = {
52-
"message": 1 - similar_issue_data.message_distance,
5351
"exception": 1 - similar_issue_data.stacktrace_distance,
5452
"shouldBeGrouped": "Yes" if similar_issue_data.should_group else "No",
5553
}

src/sentry/seer/similarity/types.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class SimilarIssuesEmbeddingsRequest(TypedDict):
3737
class RawSeerSimilarIssueData(TypedDict):
3838
parent_hash: str
3939
stacktrace_distance: float
40-
message_distance: float
40+
message_distance: NotRequired[float]
4141
should_group: bool
4242

4343

@@ -49,20 +49,19 @@ class SimilarIssuesEmbeddingsResponse(TypedDict):
4949
@dataclass
5050
class SeerSimilarIssueData:
5151
stacktrace_distance: float
52-
message_distance: float
5352
should_group: bool
5453
parent_group_id: int
5554
parent_hash: str
55+
message_distance: float | None = None
5656

5757
# Unfortunately, we have to hardcode this separately from the `RawSeerSimilarIssueData` type
5858
# definition because Python has no way to derive it from the type (nor vice-versa)
5959
required_incoming_keys: ClassVar = {
6060
"stacktrace_distance",
61-
"message_distance",
6261
"should_group",
6362
"parent_hash",
6463
}
65-
optional_incoming_keys: ClassVar = {}
64+
optional_incoming_keys: ClassVar = {"message_distance"}
6665
expected_incoming_keys: ClassVar = {*required_incoming_keys, *optional_incoming_keys}
6766

6867
@classmethod

tests/sentry/issues/endpoints/test_group_similar_issues_embeddings.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,6 @@ def create_frames(
139139
def get_expected_response(
140140
self,
141141
group_ids: Sequence[int],
142-
message_distances: Sequence[float],
143142
exception_distances: Sequence[float],
144143
should_be_grouped: Sequence[str],
145144
) -> Sequence[tuple[Any, Mapping[str, Any]]]:
@@ -152,7 +151,6 @@ def get_expected_response(
152151
(
153152
group,
154153
{
155-
"message": message_distances[i],
156154
"exception": exception_distances[i],
157155
"shouldBeGrouped": should_be_grouped[i],
158156
},
@@ -164,16 +162,16 @@ def test_get_formatted_results(self):
164162
event_from_second_similar_group = save_new_event(
165163
{"message": "Adopt don't shop"}, self.project
166164
)
167-
165+
# test including message_distance
168166
similar_issue_data_1 = SeerSimilarIssueData(
169167
message_distance=0.05,
170168
parent_group_id=NonNone(self.similar_event.group_id),
171169
parent_hash=NonNone(self.similar_event.get_primary_hash()),
172170
should_group=True,
173171
stacktrace_distance=0.01,
174172
)
173+
# test without including message_distance
175174
similar_issue_data_2 = SeerSimilarIssueData(
176-
message_distance=0.49,
177175
parent_group_id=NonNone(event_from_second_similar_group.group_id),
178176
parent_hash=NonNone(event_from_second_similar_group.get_primary_hash()),
179177
should_group=False,
@@ -188,7 +186,6 @@ def test_get_formatted_results(self):
188186
NonNone(self.similar_event.group_id),
189187
NonNone(event_from_second_similar_group.group_id),
190188
],
191-
[0.95, 0.51],
192189
[0.99, 0.77],
193190
["Yes", "No"],
194191
)
@@ -215,7 +212,7 @@ def test_simple(self, mock_logger, mock_seer_request, mock_metrics_incr):
215212
)
216213

217214
assert response.data == self.get_expected_response(
218-
[NonNone(self.similar_event.group_id)], [0.95], [0.99], ["Yes"]
215+
[NonNone(self.similar_event.group_id)], [0.99], ["Yes"]
219216
)
220217

221218
expected_seer_request_params = {
@@ -260,19 +257,19 @@ def test_multiple(self, mock_seer_request, mock_record):
260257
seer_return_value: SimilarIssuesEmbeddingsResponse = {
261258
"responses": [
262259
{
260+
# test with message_distance
263261
"message_distance": 0.05,
264262
"parent_hash": NonNone(self.similar_event.get_primary_hash()),
265263
"should_group": True,
266264
"stacktrace_distance": 0.002, # Over threshold
267265
},
268266
{
269-
"message_distance": 0.05,
267+
# test without message_distance
270268
"parent_hash": NonNone(over_threshold_group_event.get_primary_hash()),
271269
"should_group": True,
272270
"stacktrace_distance": 0.002, # Over threshold
273271
},
274272
{
275-
"message_distance": 0.05,
276273
"parent_hash": NonNone(under_threshold_group_event.get_primary_hash()),
277274
"should_group": False,
278275
"stacktrace_distance": 0.05, # Under threshold
@@ -292,7 +289,6 @@ def test_multiple(self, mock_seer_request, mock_record):
292289
NonNone(over_threshold_group_event.group_id),
293290
NonNone(under_threshold_group_event.group_id),
294291
],
295-
[0.95, 0.95, 0.95],
296292
[0.998, 0.998, 0.95],
297293
["Yes", "Yes", "No"],
298294
)
@@ -364,7 +360,7 @@ def test_incomplete_return_data(self, mock_seer_request, mock_logger, mock_metri
364360
)
365361

366362
assert response.data == self.get_expected_response(
367-
[NonNone(self.similar_event.group_id)], [0.95], [0.99], ["Yes"]
363+
[NonNone(self.similar_event.group_id)], [0.99], ["Yes"]
368364
)
369365

370366
@mock.patch("sentry.seer.similarity.similar_issues.delete_seer_grouping_records_by_hash")
@@ -416,7 +412,7 @@ def test_nonexistent_grouphash(
416412
},
417413
)
418414
assert response.data == self.get_expected_response(
419-
[NonNone(self.similar_event.group_id)], [0.95], [0.99], ["Yes"]
415+
[NonNone(self.similar_event.group_id)], [0.99], ["Yes"]
420416
)
421417
mock_logger.warning.assert_called_with(
422418
"get_similarity_data_from_seer.parent_hash_not_found",
@@ -598,7 +594,7 @@ def test_no_optional_params(self, mock_seer_request):
598594
# optional params would be here
599595
)
600596
assert response.data == self.get_expected_response(
601-
[NonNone(self.similar_event.group_id)], [0.95], [0.99], ["Yes"]
597+
[NonNone(self.similar_event.group_id)], [0.99], ["Yes"]
602598
)
603599

604600
mock_seer_request.assert_called_with(
@@ -626,7 +622,7 @@ def test_no_optional_params(self, mock_seer_request):
626622
data={"k": 1},
627623
)
628624
assert response.data == self.get_expected_response(
629-
[NonNone(self.similar_event.group_id)], [0.95], [0.99], ["Yes"]
625+
[NonNone(self.similar_event.group_id)], [0.99], ["Yes"]
630626
)
631627

632628
mock_seer_request.assert_called_with(
@@ -655,7 +651,7 @@ def test_no_optional_params(self, mock_seer_request):
655651
data={"threshold": "0.01"},
656652
)
657653
assert response.data == self.get_expected_response(
658-
[NonNone(self.similar_event.group_id)], [0.95], [0.99], ["Yes"]
654+
[NonNone(self.similar_event.group_id)], [0.99], ["Yes"]
659655
)
660656

661657
mock_seer_request.assert_called_with(

tests/sentry/seer/similarity/test_types.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,25 @@ def test_from_raw_simple(self):
3636
**similar_issue_data # type:ignore[arg-type]
3737
)
3838

39+
def test_from_raw_no_message_distance(self):
40+
similar_event = save_new_event({"message": "Dogs are great!"}, self.project)
41+
raw_similar_issue_data: RawSeerSimilarIssueData = {
42+
"parent_hash": NonNone(similar_event.get_primary_hash()),
43+
"should_group": True,
44+
"stacktrace_distance": 0.01,
45+
}
46+
47+
similar_issue_data = {
48+
**raw_similar_issue_data,
49+
"parent_group_id": similar_event.group_id,
50+
}
51+
52+
assert SeerSimilarIssueData.from_raw(
53+
self.project.id, raw_similar_issue_data
54+
) == SeerSimilarIssueData(
55+
**similar_issue_data # type:ignore[arg-type]
56+
)
57+
3958
def test_from_raw_unexpected_data(self):
4059
similar_event = save_new_event({"message": "Dogs are great!"}, self.project)
4160
raw_similar_issue_data = {
@@ -79,20 +98,7 @@ def test_from_raw_missing_data(self):
7998

8099
with pytest.raises(
81100
IncompleteSeerDataError,
82-
match="Seer similar issues response entry missing key 'message_distance'",
83-
):
84-
raw_similar_issue_data = {
85-
"parent_hash": NonNone(similar_event.get_primary_hash()),
86-
# missing `message_distance`
87-
"should_group": True,
88-
"stacktrace_distance": 0.01,
89-
}
90-
91-
SeerSimilarIssueData.from_raw(self.project.id, raw_similar_issue_data)
92-
93-
with pytest.raises(
94-
IncompleteSeerDataError,
95-
match="Seer similar issues response entry missing keys 'message_distance', 'stacktrace_distance'",
101+
match="Seer similar issues response entry missing key 'stacktrace_distance'",
96102
):
97103
raw_similar_issue_data = {
98104
"parent_hash": NonNone(similar_event.get_primary_hash()),

0 commit comments

Comments
 (0)