Skip to content

Commit a520079

Browse files
authored
ref(similarity): gate MinHash similarity if on SaaS (#106086)
Follow-up to #105973. We should only do `MinHash` similarity work if we are not on embeddings-based grouping.
1 parent d4e4b74 commit a520079

File tree

5 files changed

+23
-7
lines changed

5 files changed

+23
-7
lines changed

src/sentry/deletions/defaults/group.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,10 @@ def _delete_children(self, instance_list: Sequence[Group]) -> None:
251251
def delete_instance(self, instance: Group) -> None:
252252
from sentry import similarity
253253

254+
# Don't do MinHash work if we use embeddings-based similarity.
254255
if not self.skip_models or similarity not in self.skip_models:
255-
similarity.delete(None, instance)
256+
if not instance.project.get_option("sentry:similarity_backfill_completed"):
257+
similarity.delete(None, instance)
256258

257259
return super().delete_instance(instance)
258260

src/sentry/issues/endpoints/group_similar_issues.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,20 @@ def _fix_label(label: tuple[str, ...] | str) -> str:
2121

2222
@region_silo_endpoint
2323
class GroupSimilarIssuesEndpoint(GroupEndpoint):
24+
"""
25+
This endpoint uses the legacy MinHash similarity system which has been replaced
26+
by embeddings-based grouping for SaaS.
27+
"""
28+
2429
publish_status = {
2530
"GET": ApiPublishStatus.PRIVATE,
2631
}
2732

2833
def get(self, request: Request, group: Group) -> Response:
34+
# Any project using embeddings-based grouping will not work with this endpoint
35+
if group.project.get_option("sentry:similarity_backfill_completed"):
36+
return Response([])
37+
2938
features = similarity.features
3039

3140
limit_s = request.GET.get("limit", None)

src/sentry/tasks/merge.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,9 @@ def merge_groups(
112112
# work for this group.
113113
from_object_ids.remove(from_object_id)
114114

115-
similarity.merge(group.project, new_group, [group], allow_unsafe=True)
115+
# Don't do MinHash work if we use embeddings-based similarity.
116+
if not group.project.get_option("sentry:similarity_backfill_completed"):
117+
similarity.merge(group.project, new_group, [group], allow_unsafe=True)
116118

117119
environment_ids = list(
118120
Environment.objects.filter(projects=group.project).values_list("id", flat=True)

src/sentry/tasks/reprocessing2.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,8 @@ def finish_reprocessing(project_id: int, group_id: int) -> None:
276276

277277
eventstream.backend.exclude_groups(project_id, [group_id])
278278

279-
from sentry import similarity
279+
# Don't do MinHash work if we use embeddings-based similarity.
280+
if not group.project.get_option("sentry:similarity_backfill_completed"):
281+
from sentry import similarity
280282

281-
similarity.delete(None, group)
283+
similarity.delete(None, group)

src/sentry/tasks/unmerge.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,9 @@ def truncate_denormalizations(project: Project, group: Group) -> None:
272272
[str(group.id)],
273273
)
274274

275-
similarity.delete(project, group)
275+
# Don't do MinHash work if we use embeddings-based similarity.
276+
if not project.get_option("sentry:similarity_backfill_completed"):
277+
similarity.delete(project, group)
276278

277279

278280
def collect_group_environment_data(
@@ -458,8 +460,7 @@ def repair_denormalizations(
458460
repair_group_release_data(caches, project, events)
459461
repair_tsdb_data(caches, project, events)
460462

461-
# Skip MinHash similarity indexing for projects that have been backfilled to Seer.
462-
# Those projects use Seer's embeddings-based similarity instead.
463+
# Don't do MinHash work if we use embeddings-based similarity.
463464
if not project.get_option("sentry:similarity_backfill_completed"):
464465
for event in events:
465466
similarity.record(project, [event])

0 commit comments

Comments
 (0)