Skip to content

Commit 1197cfa

Browse files
authored
ref(deletions): Use the same logic to schedule tasks (#103836)
This helps with code consistency.
1 parent 6469ade commit 1197cfa

File tree

3 files changed

+28
-27
lines changed

3 files changed

+28
-27
lines changed

src/sentry/api/helpers/group_index/delete.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from sentry.models.project import Project
2020
from sentry.signals import issue_deleted
2121
from sentry.utils.audit import create_audit_entry
22+
from sentry.utils.iterators import chunked
2223

2324
from . import BULK_MUTATION_LIMIT, SearchFunction
2425
from .validators import ValidationError
@@ -85,14 +86,30 @@ def delete_group_list(
8586
GroupInbox.objects.filter(project_id=project.id, group_id__in=group_ids).delete()
8687

8788
# Schedule a task per GROUP_CHUNK_SIZE batch of groups
88-
for i in range(0, len(group_ids), GROUP_CHUNK_SIZE):
89+
schedule_group_deletion_tasks(project.id, group_ids, transaction_id)
90+
91+
92+
def schedule_group_deletion_tasks(
93+
project_id: int, group_ids: list[int], transaction_id: str | None = None
94+
) -> int:
95+
"""Schedule tasks to delete groups in batches of GROUP_CHUNK_SIZE.
96+
97+
:param project_id: The project ID.
98+
:param group_ids: The list of group IDs to delete.
99+
:param transaction_id: The transaction ID.
100+
:return: The total number of tasks scheduled.
101+
"""
102+
total_tasks = 0
103+
for group_id_chunk in chunked(group_ids, GROUP_CHUNK_SIZE):
89104
delete_groups_for_project.apply_async(
90105
kwargs={
91-
"project_id": project.id,
92-
"object_ids": group_ids[i : i + GROUP_CHUNK_SIZE],
93-
"transaction_id": str(transaction_id),
106+
"project_id": project_id,
107+
"object_ids": group_id_chunk,
108+
"transaction_id": transaction_id or uuid4().hex,
94109
}
95110
)
111+
total_tasks += 1
112+
return total_tasks
96113

97114

98115
def create_audit_entries(

src/sentry/tasks/delete_pending_groups.py

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
import logging
22
from collections import defaultdict
33
from datetime import timedelta
4-
from uuid import uuid4
54

65
from django.utils import timezone
76

8-
from sentry.deletions.defaults.group import GROUP_CHUNK_SIZE
9-
from sentry.deletions.tasks.groups import delete_groups_for_project
7+
from sentry.api.helpers.group_index.delete import schedule_group_deletion_tasks
108
from sentry.models.group import Group, GroupStatus
119
from sentry.silo.base import SiloMode
1210
from sentry.tasks.base import instrumented_task
@@ -41,9 +39,7 @@ def delete_pending_groups() -> None:
4139
"""
4240
statuses_to_delete = [GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS]
4341

44-
# XXX: If needed add a partial index with the status and last_seen fields
45-
# This can timeout for lack of an index on the status field
46-
# Not using the last_seen index to avoid the lack of composite index on status and last_seen
42+
# Just using status to take advantage of the status DB index
4743
groups = Group.objects.filter(status__in=statuses_to_delete).values_list(
4844
"id", "project_id", "last_seen"
4945
)[:BATCH_LIMIT]
@@ -71,19 +67,7 @@ def delete_pending_groups() -> None:
7167
)
7268

7369
for project_id, group_ids in groups_by_project.items():
74-
# Schedule deletion tasks in chunks of GROUP_CHUNK_SIZE
75-
for i in range(0, len(group_ids), GROUP_CHUNK_SIZE):
76-
chunk = group_ids[i : i + GROUP_CHUNK_SIZE]
77-
transaction_id = str(uuid4())
78-
79-
delete_groups_for_project.apply_async(
80-
kwargs={
81-
"project_id": project_id,
82-
"object_ids": chunk,
83-
"transaction_id": transaction_id,
84-
}
85-
)
86-
total_tasks += 1
70+
total_tasks += schedule_group_deletion_tasks(project_id, group_ids)
8771

8872
metrics.incr("delete_pending_groups.groups_scheduled", amount=total_groups, sample_rate=1.0)
8973
metrics.incr("delete_pending_groups.tasks_scheduled", amount=total_tasks, sample_rate=1.0)

tests/sentry/tasks/test_delete_pending_groups.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_schedules_only_groups_within_valid_date_range(self) -> None:
5555
)
5656

5757
with patch(
58-
"sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async"
58+
"sentry.api.helpers.group_index.delete.delete_groups_for_project.apply_async"
5959
) as mock_delete_task:
6060
delete_pending_groups()
6161

@@ -76,7 +76,7 @@ def test_schedules_only_groups_within_valid_date_range(self) -> None:
7676
wrong_status.id,
7777
]
7878

79-
@patch("sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async")
79+
@patch("sentry.api.helpers.group_index.delete.delete_groups_for_project.apply_async")
8080
def test_groups_by_project(self, mock_delete_task: MagicMock) -> None:
8181
"""Test that groups are properly grouped by project when scheduling deletion."""
8282
project1 = self.create_project()
@@ -109,8 +109,8 @@ def test_groups_by_project(self, mock_delete_task: MagicMock) -> None:
109109
elif call_kwargs["project_id"] == project2.id:
110110
assert set(call_kwargs["object_ids"]) == {group3.id}
111111

112-
@patch("sentry.tasks.delete_pending_groups.GROUP_CHUNK_SIZE", new=10)
113-
@patch("sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async")
112+
@patch("sentry.api.helpers.group_index.delete.GROUP_CHUNK_SIZE", 10)
113+
@patch("sentry.api.helpers.group_index.delete.delete_groups_for_project.apply_async")
114114
@patch("sentry.tasks.delete_pending_groups.metrics.incr")
115115
def test_chunks_large_batches(
116116
self,

0 commit comments

Comments
 (0)