Skip to content

Commit 34d9dc6

Browse files
authored
ref(similarity): Add project option to enable ingestion after backfill (#74586)
Add enable ingestion flag to backfill endpoint Add enable ingestion flag in backfill If the flag is enabled, add sentry:similarity_backfill_completed to project options
1 parent 9255afa commit 34d9dc6

File tree

5 files changed

+106
-3
lines changed

5 files changed

+106
-3
lines changed

src/sentry/api/endpoints/project_backfill_similar_issues_embeddings_records.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,21 @@ def post(self, request: Request, project) -> Response:
3333

3434
last_processed_id = None
3535
only_delete = False
36+
enable_ingestion = False
37+
3638
if request.data.get("last_processed_id"):
3739
last_processed_id = int(request.data["last_processed_id"])
3840

3941
if request.data.get("only_delete"):
4042
only_delete = True
4143

44+
if request.data.get("enable_ingestion"):
45+
enable_ingestion = request.data["enable_ingestion"] == "true"
46+
4247
backfill_seer_grouping_records_for_project.delay(
4348
current_project_id=project.id,
4449
last_processed_group_id_input=last_processed_id,
4550
only_delete=only_delete,
51+
enable_ingestion=enable_ingestion,
4652
)
4753
return Response(status=204)

src/sentry/tasks/embeddings_grouping/backfill_seer_grouping_records_for_project.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ def backfill_seer_grouping_records_for_project(
4949
last_processed_group_id_input: int | None,
5050
cohort: str | list[int] | None = None,
5151
last_processed_project_index_input: int | None = None,
52-
only_delete=False,
52+
only_delete: bool = False,
53+
enable_ingestion: bool = False,
5354
*args: Any,
5455
**kwargs: Any,
5556
) -> None:
@@ -105,6 +106,7 @@ def backfill_seer_grouping_records_for_project(
105106
last_processed_project_index=last_processed_project_index_input,
106107
cohort=cohort,
107108
only_delete=only_delete,
109+
enable_ingestion=enable_ingestion,
108110
)
109111
return
110112

@@ -121,13 +123,14 @@ def backfill_seer_grouping_records_for_project(
121123
last_processed_project_index=last_processed_project_index,
122124
cohort=cohort,
123125
only_delete=only_delete,
126+
enable_ingestion=enable_ingestion,
124127
)
125128
return
126129

127130
batch_size = options.get("embeddings-grouping.seer.backfill-batch-size")
128131

129132
(groups_to_backfill_with_no_embedding, batch_end_id) = get_current_batch_groups_from_postgres(
130-
project, last_processed_group_id, batch_size
133+
project, last_processed_group_id, batch_size, enable_ingestion
131134
)
132135

133136
if len(groups_to_backfill_with_no_embedding) == 0:
@@ -137,6 +140,7 @@ def backfill_seer_grouping_records_for_project(
137140
redis_client=redis_client,
138141
last_processed_project_index=last_processed_project_index,
139142
cohort=cohort,
143+
enable_ingestion=enable_ingestion,
140144
)
141145
return
142146

@@ -154,6 +158,7 @@ def backfill_seer_grouping_records_for_project(
154158
redis_client=redis_client,
155159
last_processed_project_index=last_processed_project_index,
156160
cohort=cohort,
161+
enable_ingestion=enable_ingestion,
157162
)
158163
return
159164

@@ -167,6 +172,7 @@ def backfill_seer_grouping_records_for_project(
167172
redis_client=redis_client,
168173
last_processed_project_index=last_processed_project_index,
169174
cohort=cohort,
175+
enable_ingestion=enable_ingestion,
170176
)
171177
return
172178

@@ -217,6 +223,7 @@ def backfill_seer_grouping_records_for_project(
217223
redis_client=redis_client,
218224
last_processed_project_index=last_processed_project_index,
219225
cohort=cohort,
226+
enable_ingestion=enable_ingestion,
220227
)
221228

222229

@@ -228,6 +235,7 @@ def call_next_backfill(
228235
last_processed_project_index: int,
229236
cohort: str | list[int] | None = None,
230237
only_delete: bool = False,
238+
enable_ingestion: bool = False,
231239
):
232240
if last_processed_group_id is not None:
233241
redis_client.set(
@@ -249,6 +257,7 @@ def call_next_backfill(
249257
cohort,
250258
last_processed_project_index,
251259
only_delete,
260+
enable_ingestion,
252261
],
253262
headers={"sentry-propagate-traces": False},
254263
)
@@ -295,6 +304,7 @@ def call_next_backfill(
295304
cohort,
296305
last_processed_project_index,
297306
only_delete,
307+
enable_ingestion,
298308
],
299309
headers={"sentry-propagate-traces": False},
300310
)

src/sentry/tasks/embeddings_grouping/utils.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,9 @@ def initialize_backfill(
130130

131131

132132
@sentry_sdk.tracing.trace
133-
def get_current_batch_groups_from_postgres(project, last_processed_group_id, batch_size):
133+
def get_current_batch_groups_from_postgres(
134+
project, last_processed_group_id, batch_size, enable_ingestion: bool = False
135+
):
134136
group_id_filter = Q()
135137
if last_processed_group_id is not None:
136138
group_id_filter = Q(id__lt=last_processed_group_id)
@@ -174,6 +176,13 @@ def get_current_batch_groups_from_postgres(project, last_processed_group_id, bat
174176
"backfill_seer_grouping_records.no_more_groups",
175177
extra={"project_id": project.id},
176178
)
179+
if enable_ingestion:
180+
logger.info(
181+
"backfill_seer_grouping_records.enable_ingestion",
182+
extra={"project_id": project.id},
183+
)
184+
project.update_option("sentry:similarity_backfill_completed", int(time.time()))
185+
177186
return (
178187
groups_to_backfill_batch,
179188
None,

tests/sentry/api/endpoints/test_project_backfill_similar_issues_embeddings_records.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def test_post_success_no_last_processed_id(
5050
current_project_id=self.project.id,
5151
last_processed_group_id_input=None,
5252
only_delete=False,
53+
enable_ingestion=False,
5354
)
5455

5556
@patch(
@@ -66,6 +67,7 @@ def test_post_success_no_last_processed_id_single_org(
6667
current_project_id=self.project.id,
6768
last_processed_group_id_input=None,
6869
only_delete=False,
70+
enable_ingestion=False,
6971
)
7072

7173
@patch(
@@ -85,6 +87,7 @@ def test_post_success_last_processed_id(
8587
current_project_id=self.project.id,
8688
last_processed_group_id_input=8,
8789
only_delete=False,
90+
enable_ingestion=False,
8891
)
8992

9093
@patch(
@@ -106,4 +109,27 @@ def test_post_success_only_delete(
106109
current_project_id=self.project.id,
107110
last_processed_group_id_input=8,
108111
only_delete=True,
112+
enable_ingestion=False,
113+
)
114+
115+
@patch(
116+
"sentry.api.endpoints.project_backfill_similar_issues_embeddings_records.is_active_superuser",
117+
return_value=True,
118+
)
119+
@patch(
120+
"sentry.api.endpoints.project_backfill_similar_issues_embeddings_records.backfill_seer_grouping_records_for_project.delay"
121+
)
122+
@with_feature("projects:similarity-embeddings-backfill")
123+
def test_post_success_enable_ingestion(
124+
self, mock_backfill_seer_grouping_records, mock_is_active_superuser
125+
):
126+
response = self.client.post(
127+
self.url, data={"last_processed_id": "8", "enable_ingestion": "true"}
128+
)
129+
assert response.status_code == 204, response.content
130+
mock_backfill_seer_grouping_records.assert_called_with(
131+
current_project_id=self.project.id,
132+
last_processed_group_id_input=8,
133+
only_delete=False,
134+
enable_ingestion=True,
109135
)

tests/sentry/tasks/test_backfill_seer_grouping_records.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1486,6 +1486,7 @@ def test_backfill_seer_grouping_records_empty_nodestore(
14861486
redis_client=ANY,
14871487
last_processed_project_index=0,
14881488
cohort=None,
1489+
enable_ingestion=False,
14891490
)
14901491

14911492
@with_feature("projects:similarity-embeddings-backfill")
@@ -1534,3 +1535,54 @@ def test_backfill_seer_grouping_records_killswitch_enabled(self, mock_logger):
15341535
mock_logger.info.assert_called_with(
15351536
"backfill_seer_grouping_records.killswitch_enabled",
15361537
)
1538+
1539+
@with_feature("projects:similarity-embeddings-backfill")
1540+
@patch("sentry.tasks.embeddings_grouping.utils.logger")
1541+
@patch("sentry.tasks.embeddings_grouping.utils.post_bulk_grouping_records")
1542+
def test_backfill_seer_grouping_records_enable_ingestion(
1543+
self, mock_post_bulk_grouping_records, mock_logger
1544+
):
1545+
"""
1546+
Test that when the enable_ingestion flag is True, the project option is set and the
1547+
log is called.
1548+
"""
1549+
mock_post_bulk_grouping_records.return_value = {"success": True, "groups_with_neighbor": {}}
1550+
1551+
with TaskRunner():
1552+
backfill_seer_grouping_records_for_project(self.project.id, None, enable_ingestion=True)
1553+
1554+
groups = Group.objects.filter(project_id=self.project.id)
1555+
for group in groups:
1556+
assert group.data["metadata"].get("seer_similarity") == {
1557+
"similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
1558+
"request_hash": self.group_hashes[group.id],
1559+
}
1560+
1561+
mock_logger.info.assert_called_with(
1562+
"backfill_seer_grouping_records.enable_ingestion",
1563+
extra={"project_id": self.project.id},
1564+
)
1565+
assert self.project.get_option("sentry:similarity_backfill_completed") is not None
1566+
1567+
@with_feature("projects:similarity-embeddings-backfill")
1568+
@patch("sentry.tasks.embeddings_grouping.utils.logger")
1569+
@patch("sentry.tasks.embeddings_grouping.utils.post_bulk_grouping_records")
1570+
def test_backfill_seer_grouping_records_no_enable_ingestion(
1571+
self, mock_post_bulk_grouping_records, mock_logger
1572+
):
1573+
"""
1574+
Test that when the enable_ingestion flag is False, the project option is not set.
1575+
"""
1576+
mock_post_bulk_grouping_records.return_value = {"success": True, "groups_with_neighbor": {}}
1577+
1578+
with TaskRunner():
1579+
backfill_seer_grouping_records_for_project(self.project.id, None)
1580+
1581+
groups = Group.objects.filter(project_id=self.project.id)
1582+
for group in groups:
1583+
assert group.data["metadata"].get("seer_similarity") == {
1584+
"similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
1585+
"request_hash": self.group_hashes[group.id],
1586+
}
1587+
1588+
assert self.project.get_option("sentry:similarity_backfill_completed") is None

0 commit comments

Comments
 (0)