Skip to content

Commit 8aa73f0

Browse files
ref(replay): delete seer data on replay deletes (#97279)
Part of [REPLAY-538: Delete async summaries when replay is deleted](https://linear.app/getsentry/issue/REPLAY-538/delete-async-summaries-when-replay-is-deleted). Sends a seer request to delete summaries when a replay is deleted from bulk delete or replay details endpoints. Seer deletes were not added to the delete_replays script. --------- Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
1 parent 39de7a4 commit 8aa73f0

File tree

7 files changed

+291
-35
lines changed

7 files changed

+291
-35
lines changed

src/sentry/replays/endpoints/project_replay_details.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from sentry.models.project import Project
1515
from sentry.replays.post_process import process_raw_response
1616
from sentry.replays.query import query_replay_instance
17-
from sentry.replays.tasks import delete_recording_segments
17+
from sentry.replays.tasks import delete_replay
1818
from sentry.replays.usecases.reader import has_archived_segment
1919

2020

@@ -96,5 +96,10 @@ def delete(self, request: Request, project: Project, replay_id: str) -> Response
9696
if has_archived_segment(project.id, replay_id):
9797
return Response(status=404)
9898

99-
delete_recording_segments.delay(project_id=project.id, replay_id=replay_id)
99+
delete_replay.delay(
100+
project_id=project.id,
101+
replay_id=replay_id,
102+
has_seer_data=features.has("organizations:replay-ai-summaries", project.organization),
103+
)
104+
100105
return Response(status=204)

src/sentry/replays/endpoints/project_replay_jobs_delete.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from rest_framework.request import Request
33
from rest_framework.response import Response
44

5-
from sentry import audit_log
5+
from sentry import audit_log, features
66
from sentry.api.api_owners import ApiOwner
77
from sentry.api.api_publish_status import ApiPublishStatus
88
from sentry.api.base import region_silo_endpoint
@@ -102,9 +102,11 @@ def post(self, request: Request, project) -> Response:
102102
status="pending",
103103
)
104104

105+
has_seer_data = features.has("organizations:replay-ai-summaries", project.organization)
106+
105107
# We always start with an offset of 0 (obviously) but future work doesn't need to obey
106108
# this. You're free to start from wherever you want.
107-
run_bulk_replay_delete_job.delay(job.id, offset=0)
109+
run_bulk_replay_delete_job.delay(job.id, offset=0, has_seer_data=has_seer_data)
108110

109111
self.create_audit_entry(
110112
request,

src/sentry/replays/tasks.py

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
storage_kv,
1616
)
1717
from sentry.replays.models import DeletionJobStatus, ReplayDeletionJobModel, ReplayRecordingSegment
18-
from sentry.replays.usecases.delete import delete_matched_rows, fetch_rows_matching_pattern
18+
from sentry.replays.usecases.delete import (
19+
delete_matched_rows,
20+
delete_seer_replay_data,
21+
fetch_rows_matching_pattern,
22+
)
1923
from sentry.replays.usecases.events import archive_event
2024
from sentry.replays.usecases.reader import fetch_segments_metadata
2125
from sentry.silo.base import SiloMode
@@ -30,7 +34,7 @@
3034

3135

3236
@instrumented_task(
33-
name="sentry.replays.tasks.delete_recording_segments",
37+
name="sentry.replays.tasks.delete_replay",
3438
queue="replays.delete_replay",
3539
default_retry_delay=5,
3640
max_retries=5,
@@ -43,32 +47,17 @@
4347
),
4448
),
4549
)
46-
def delete_recording_segments(project_id: int, replay_id: str, **kwargs: Any) -> None:
50+
def delete_replay(
51+
project_id: int, replay_id: str, has_seer_data: bool = False, **kwargs: Any
52+
) -> None:
4753
"""Asynchronously delete a replay."""
48-
metrics.incr("replays.delete_recording_segments", amount=1, tags={"status": "started"})
54+
metrics.incr("replays.delete_replay", amount=1, tags={"status": "started"})
4955
publisher = initialize_replays_publisher(is_async=False)
5056
archive_replay(publisher, project_id, replay_id)
5157
delete_replay_recording(project_id, replay_id)
52-
metrics.incr("replays.delete_recording_segments", amount=1, tags={"status": "finished"})
53-
54-
55-
@instrumented_task(
56-
name="sentry.replays.tasks.delete_replay_recording_async",
57-
queue="replays.delete_replay",
58-
default_retry_delay=5,
59-
max_retries=5,
60-
silo_mode=SiloMode.REGION,
61-
taskworker_config=TaskworkerConfig(
62-
namespace=replays_tasks,
63-
processing_deadline_duration=120,
64-
retry=Retry(
65-
times=5,
66-
delay=5,
67-
),
68-
),
69-
)
70-
def delete_replay_recording_async(project_id: int, replay_id: str) -> None:
71-
delete_replay_recording(project_id, replay_id)
58+
if has_seer_data:
59+
delete_seer_replay_data(project_id, [replay_id])
60+
metrics.incr("replays.delete_replay", amount=1, tags={"status": "finished"})
7261

7362

7463
@instrumented_task(
@@ -117,6 +106,25 @@ def delete_replays_script_async(
117106
segment_model.delete()
118107

119108

109+
@instrumented_task(
110+
name="sentry.replays.tasks.delete_replay_recording_async",
111+
queue="replays.delete_replay",
112+
default_retry_delay=5,
113+
max_retries=5,
114+
silo_mode=SiloMode.REGION,
115+
taskworker_config=TaskworkerConfig(
116+
namespace=replays_tasks,
117+
processing_deadline_duration=120,
118+
retry=Retry(
119+
times=5,
120+
delay=5,
121+
),
122+
),
123+
)
124+
def delete_replay_recording_async(project_id: int, replay_id: str) -> None:
125+
delete_replay_recording(project_id, replay_id)
126+
127+
120128
def delete_replay_recording(project_id: int, replay_id: str) -> None:
121129
"""Delete all recording-segments associated with a Replay."""
122130
segments_from_metadata = fetch_segments_metadata(project_id, replay_id, offset=0, limit=10000)
@@ -178,7 +186,9 @@ def _delete_if_exists(filename: str) -> None:
178186
namespace=replays_tasks, retry=Retry(times=5), processing_deadline_duration=300
179187
),
180188
)
181-
def run_bulk_replay_delete_job(replay_delete_job_id: int, offset: int, limit: int = 100) -> None:
189+
def run_bulk_replay_delete_job(
190+
replay_delete_job_id: int, offset: int, limit: int = 100, has_seer_data: bool = False
191+
) -> None:
182192
"""Replay bulk deletion task.
183193
184194
We specify retry behavior in the task definition. However, if the task fails more than 5 times
@@ -213,6 +223,10 @@ def run_bulk_replay_delete_job(replay_delete_job_id: int, offset: int, limit: in
213223
# Delete the matched rows if any rows were returned.
214224
if len(results["rows"]) > 0:
215225
delete_matched_rows(job.project_id, results["rows"])
226+
if has_seer_data:
227+
delete_seer_replay_data(
228+
job.project_id, [row["replay_id"] for row in results["rows"]]
229+
)
216230
except Exception:
217231
logger.exception("Bulk delete replays failed.")
218232

@@ -228,8 +242,9 @@ def run_bulk_replay_delete_job(replay_delete_job_id: int, offset: int, limit: in
228242
# Checkpoint before continuing.
229243
job.offset = next_offset
230244
job.save()
231-
232-
run_bulk_replay_delete_job.delay(job.id, next_offset, limit=limit)
245+
run_bulk_replay_delete_job.delay(
246+
job.id, next_offset, limit=limit, has_seer_data=has_seer_data
247+
)
233248
return None
234249
else:
235250
# If we've finished deleting all the replays for the selection. We can move the status to

src/sentry/replays/usecases/delete.py

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22

33
import concurrent.futures as cf
44
import functools
5+
import logging
56
from datetime import datetime
6-
from typing import TypedDict
7+
from typing import Any, TypedDict
78

9+
import requests
10+
from django.conf import settings
811
from google.cloud.exceptions import NotFound
912
from snuba_sdk import (
1013
Column,
@@ -32,6 +35,8 @@
3235
from sentry.replays.usecases.events import archive_event
3336
from sentry.replays.usecases.query import execute_query, handle_search_filters
3437
from sentry.replays.usecases.query.configs.aggregate import search_config as agg_search_config
38+
from sentry.seer.signed_seer_api import sign_with_seer_secret
39+
from sentry.utils import json
3540
from sentry.utils.retries import ConditionalRetryPolicy, exponential_delay
3641
from sentry.utils.snuba import (
3742
QueryExecutionError,
@@ -49,6 +54,12 @@
4954
UnexpectedResponseError,
5055
)
5156

57+
SEER_DELETE_SUMMARIES_URL = (
58+
f"{settings.SEER_AUTOFIX_URL}/v1/automation/summarize/replay/breadcrumbs/delete"
59+
)
60+
61+
logger = logging.getLogger(__name__)
62+
5263

5364
def delete_matched_rows(project_id: int, rows: list[MatchedRow]) -> int | None:
5465
if not rows:
@@ -182,3 +193,61 @@ def fetch_rows_matching_pattern(
182193
for row in rows
183194
],
184195
}
196+
197+
198+
def make_seer_request(
199+
url: str,
200+
data: dict[str, Any],
201+
timeout: int | tuple[int, int] | None = None,
202+
) -> tuple[requests.Response | None, int]:
203+
"""
204+
Makes a standalone POST request to a Seer URL with built in error handling. Expects valid JSON data.
205+
Returns a tuple of (response, status code). If a request error occurred the response will be None.
206+
XXX: Investigate migrating this to the shared util make_signed_seer_api_request, which uses connection pool.
207+
"""
208+
str_data = json.dumps(data)
209+
210+
try:
211+
response = requests.post(
212+
url,
213+
data=str_data,
214+
headers={
215+
"content-type": "application/json;charset=utf-8",
216+
**sign_with_seer_secret(str_data.encode()),
217+
},
218+
timeout=timeout or settings.SEER_DEFAULT_TIMEOUT or 5,
219+
)
220+
# Don't raise for error status, just return response.
221+
222+
except requests.exceptions.Timeout:
223+
return (None, 504)
224+
225+
except requests.exceptions.RequestException:
226+
return (None, 502)
227+
228+
return (response, response.status_code)
229+
230+
231+
def delete_seer_replay_data(
232+
project_id: int,
233+
replay_ids: list[str],
234+
timeout: int | tuple[int, int] | None = None,
235+
) -> bool:
236+
response, status_code = make_seer_request(
237+
SEER_DELETE_SUMMARIES_URL,
238+
{
239+
"replay_ids": replay_ids,
240+
},
241+
timeout=timeout,
242+
)
243+
if status_code >= 400:
244+
logger.error(
245+
"Failed to delete replay data from Seer",
246+
extra={
247+
"project_id": project_id,
248+
"replay_ids": replay_ids,
249+
"status_code": status_code,
250+
"response": response.content if response else None,
251+
},
252+
)
253+
return status_code < 400

tests/sentry/replays/endpoints/test_project_replay_details.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from sentry.replays.lib.storage import RecordingSegmentStorageMeta, storage
1111
from sentry.replays.models import ReplayRecordingSegment
1212
from sentry.replays.testutils import assert_expected_response, mock_expected_response, mock_replay
13+
from sentry.replays.usecases.delete import SEER_DELETE_SUMMARIES_URL
1314
from sentry.testutils.cases import APITestCase, ReplaysSnubaTestCase
1415
from sentry.testutils.helpers import TaskRunner
1516
from sentry.utils import kafka_config
@@ -237,3 +238,31 @@ def test_delete_replay_from_clickhouse_data(self) -> None:
237238
assert storage.get(metadata1) is None
238239
assert storage.get(metadata2) is None
239240
assert storage.get(metadata3) is not None
241+
242+
@mock.patch("sentry.replays.usecases.delete.make_seer_request")
243+
def test_delete_replay_from_seer(
244+
self,
245+
mock_make_seer_request: mock.MagicMock,
246+
) -> None:
247+
"""Test delete method deletes from Seer if summaries are enabled."""
248+
kept_replay_id = uuid4().hex
249+
250+
t1 = datetime.datetime.now() - datetime.timedelta(seconds=10)
251+
t2 = datetime.datetime.now() - datetime.timedelta(seconds=5)
252+
self.store_replays(mock_replay(t1, self.project.id, self.replay_id, segment_id=0))
253+
self.store_replays(mock_replay(t2, self.project.id, self.replay_id, segment_id=1))
254+
self.store_replays(mock_replay(t1, self.project.id, kept_replay_id, segment_id=0))
255+
256+
mock_make_seer_request.return_value = (None, 204)
257+
258+
with self.feature({**REPLAYS_FEATURES, "organizations:replay-ai-summaries": True}):
259+
with TaskRunner():
260+
response = self.client.delete(self.url)
261+
assert response.status_code == 204
262+
263+
mock_make_seer_request.assert_called_once()
264+
(url, data) = mock_make_seer_request.call_args.args
265+
assert url == SEER_DELETE_SUMMARIES_URL
266+
assert data == {
267+
"replay_ids": [self.replay_id],
268+
}

tests/sentry/replays/endpoints/test_project_replay_jobs_delete.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def test_post_success(self, mock_task: MagicMock) -> None:
185185
assert job.status == "pending"
186186

187187
# Verify task was scheduled
188-
mock_task.assert_called_once_with(job.id, offset=0)
188+
mock_task.assert_called_once_with(job.id, offset=0, has_seer_data=False)
189189

190190
with assume_test_silo_mode(SiloMode.REGION):
191191
RegionOutbox(
@@ -342,6 +342,30 @@ def test_permission_granted_with_project_admin(self) -> None:
342342
)
343343
assert response.status_code == 201
344344

345+
@patch("sentry.replays.tasks.run_bulk_replay_delete_job.delay")
346+
def test_post_has_seer_data(self, mock_task: MagicMock) -> None:
347+
"""Test POST with summaries enabled schedules task with has_seer_data=True."""
348+
data = {
349+
"data": {
350+
"rangeStart": "2023-01-01T00:00:00Z",
351+
"rangeEnd": "2023-01-02T00:00:00Z",
352+
"environments": ["production"],
353+
"query": None,
354+
}
355+
}
356+
357+
with self.feature({"organizations:replay-ai-summaries": True}):
358+
response = self.get_success_response(
359+
self.organization.slug, self.project.slug, method="post", **data, status_code=201
360+
)
361+
362+
job_data = response.data["data"]
363+
job = ReplayDeletionJobModel.objects.get(id=job_data["id"])
364+
assert job.project_id == self.project.id
365+
assert job.status == "pending"
366+
367+
mock_task.assert_called_once_with(job.id, offset=0, has_seer_data=True)
368+
345369

346370
@region_silo_test
347371
class ProjectReplayDeletionJobDetailTest(APITestCase):

0 commit comments

Comments
 (0)