-
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
feat(deletions): Use GroupHistory to determine status change #104044
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,7 @@ | |
|
|
||
| from sentry.api.helpers.group_index.delete import schedule_group_deletion_tasks | ||
| from sentry.models.group import Group, GroupStatus | ||
| from sentry.models.grouphistory import GroupHistory, GroupHistoryStatus | ||
| from sentry.silo.base import SiloMode | ||
| from sentry.tasks.base import instrumented_task | ||
| from sentry.taskworker.namespaces import deletion_tasks | ||
|
|
@@ -34,29 +35,37 @@ def delete_pending_groups() -> None: | |
| and schedules deletion tasks for them. Groups are batched by project to ensure | ||
| efficient deletion processing. | ||
|
|
||
| Only processes groups with last_seen between 6 hours and 90 days ago to avoid | ||
| processing very recent groups (safety window) or groups past retention period. | ||
| Only processes groups where status was changed between 6 hours and 90 days ago | ||
| to avoid processing very recent groups (safety window) or groups past retention period. | ||
| """ | ||
| statuses_to_delete = [GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS] | ||
|
|
||
| # Just using status to take advantage of the status DB index | ||
| groups = Group.objects.filter(status__in=statuses_to_delete).values_list( | ||
| "id", "project_id", "last_seen" | ||
| )[:BATCH_LIMIT] | ||
|
|
||
| if not groups: | ||
| logger.info("delete_pending_groups.no_groups_found") | ||
| return | ||
| groups = list( | ||
| Group.objects.filter(status__in=statuses_to_delete).values_list( | ||
| "id", "project_id", "last_seen" | ||
| ) | ||
| ) | ||
|
|
||
| # Process groups between 6 hours and 90 days old | ||
| now = timezone.now() | ||
| min_last_seen = now - timedelta(days=MAX_LAST_SEEN_DAYS) | ||
| max_last_seen = now - timedelta(hours=MIN_LAST_SEEN_HOURS) | ||
| status_change_threshold = now - timedelta(hours=MIN_LAST_SEEN_HOURS) | ||
|
|
||
| # Group by project_id to ensure all groups in a batch belong to the same project | ||
| groups_by_project: dict[int, list[int]] = defaultdict(list) | ||
| for group_id, project_id, last_seen in groups: | ||
| if last_seen >= min_last_seen and last_seen <= max_last_seen: | ||
| groups_by_project[project_id].append(group_id) | ||
| if last_seen >= min_last_seen: | ||
|
||
| group_history = GroupHistory.objects.filter( | ||
| group_id=group_id, | ||
| status__in=[GroupHistoryStatus.DELETED], | ||
| date_added__lte=status_change_threshold, | ||
| ).first() | ||
|
||
| if group_history and group_history.date_added <= status_change_threshold: | ||
| groups_by_project[project_id].append(group_id) | ||
|
||
|
|
||
| if not groups_by_project: | ||
| logger.info("delete_pending_groups.no_groups_in_limbo_found") | ||
| return | ||
|
|
||
| total_groups = sum(len(group_ids) for group_ids in groups_by_project.values()) | ||
| total_tasks = 0 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug: Missing batch limit causes memory issues
The
BATCH_LIMITslice was removed from the query, causing all groups with deletion statuses to be loaded into memory instead of the first 1000. This transforms a batched query into a full table scan that could load thousands or millions of groups, potentially exhausting server memory and causing severe performance degradation.