Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion src/sentry/audit_log/services/log/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from django.db import IntegrityError, router, transaction

from sentry import options
from sentry import audit_log, options
from sentry.audit_log.services.log import AuditLogEvent, LogService, UserIpEvent
from sentry.db.postgres.transactions import enforce_constraints
from sentry.hybridcloud.models.outbox import RegionOutbox
Expand Down Expand Up @@ -83,6 +83,26 @@ def find_last_log(

return last_entry.as_event()

def find_issue_deletions_before(
self,
*,
cutoff_datetime: datetime.datetime,
min_datetime: datetime.datetime,
limit: int = 1000,
) -> list[int]:
issue_delete_event_id = audit_log.get_event_id("ISSUE_DELETE")
return [
obj_id
for obj_id in AuditLogEntry.objects.filter(
event=issue_delete_event_id,
datetime__gte=min_datetime,
datetime__lt=cutoff_datetime,
)
.values_list("target_object", flat=True)
.distinct()[:limit]
if obj_id is not None
]

def _should_skip_invalid_event(self, event: AuditLogEvent) -> bool:
event_id_pass_list = self._get_invalid_event_id_pass_list()
return event.event_id in event_id_pass_list
Expand Down Expand Up @@ -134,3 +154,12 @@ def find_last_log(
data: dict[str, str] | None = None,
) -> AuditLogEvent | None:
return None

def find_issue_deletions_before(
self,
*,
cutoff_datetime: datetime.datetime,
min_datetime: datetime.datetime,
limit: int = 1000,
) -> list[int]:
return []
13 changes: 13 additions & 0 deletions src/sentry/audit_log/services/log/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# defined, because we want to reflect on type annotations and avoid forward references.

import abc
import datetime

from sentry.hybridcloud.rpc import silo_mode_delegation
from sentry.hybridcloud.rpc.service import RpcService, rpc_method
Expand Down Expand Up @@ -42,6 +43,18 @@ def find_last_log(
) -> AuditLogEvent | None:
pass

@rpc_method
@abc.abstractmethod
def find_issue_deletions_before(
self,
*,
cutoff_datetime: datetime.datetime,
min_datetime: datetime.datetime,
limit: int = 1000,
) -> list[int]:
"""Find group IDs with ISSUE_DELETE audit events in the time range [min_datetime, cutoff_datetime)."""
pass


def impl_by_db() -> LogService:
from .impl import DatabaseBackedLogService
Expand Down
38 changes: 26 additions & 12 deletions src/sentry/tasks/delete_pending_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from django.utils import timezone

from sentry.api.helpers.group_index.delete import schedule_group_deletion_tasks
from sentry.audit_log.services.log.service import log_rpc_service
from sentry.models.group import Group, GroupStatus
from sentry.silo.base import SiloMode
from sentry.tasks.base import instrumented_task
Expand Down Expand Up @@ -34,30 +35,43 @@ def delete_pending_groups() -> None:
and schedules deletion tasks for them. Groups are batched by project to ensure
efficient deletion processing.

Only processes groups with last_seen between 6 hours and 90 days ago to avoid
processing very recent groups (safety window) or groups past retention period.
Only processes groups where deletion was requested between 6 hours and 90 days ago
(via audit logs) to avoid processing very recent groups (safety window) or groups
past retention period.
"""
statuses_to_delete = [GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS]

# Just using status to take advantage of the status DB index
groups = Group.objects.filter(status__in=statuses_to_delete).values_list(
"id", "project_id", "last_seen"
)[:BATCH_LIMIT]

if not groups:
logger.info("delete_pending_groups.no_groups_found")
return
groups = list(
Group.objects.filter(status__in=statuses_to_delete).values_list(
"id", "project_id", "last_seen"
)
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Missing batch limit causes memory issues

The BATCH_LIMIT slice was removed from the query, causing all groups with deletion statuses to be loaded into memory instead of the first 1000. This transforms a batched query into a full table scan that could load thousands or millions of groups, potentially exhausting server memory and causing severe performance degradation.

Fix in Cursor Fix in Web


# Process groups between 6 hours and 90 days old
now = timezone.now()
min_last_seen = now - timedelta(days=MAX_LAST_SEEN_DAYS)
max_last_seen = now - timedelta(hours=MIN_LAST_SEEN_HOURS)
cutoff_time = now - timedelta(hours=MIN_LAST_SEEN_HOURS)

# Query audit logs to find groups where deletion was requested in the time range
# (between 90 days ago and 6 hours ago)
non_recent_deletion_group_ids = set(
log_rpc_service.find_issue_deletions_before(
cutoff_datetime=cutoff_time,
min_datetime=min_last_seen,
limit=BATCH_LIMIT,
)
)

# Group by project_id to ensure all groups in a batch belong to the same project
groups_by_project: dict[int, list[int]] = defaultdict(list)
for group_id, project_id, last_seen in groups:
if last_seen >= min_last_seen and last_seen <= max_last_seen:
if last_seen >= min_last_seen and group_id in non_recent_deletion_group_ids:
groups_by_project[project_id].append(group_id)

if not groups_by_project:
logger.info("delete_pending_groups.no_groups_in_limbo_found")
return

total_groups = sum(len(group_ids) for group_ids in groups_by_project.values())
total_tasks = 0

Expand Down
88 changes: 75 additions & 13 deletions tests/sentry/tasks/test_delete_pending_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@

from django.utils import timezone

from sentry import audit_log
from sentry.models.auditlogentry import AuditLogEntry
from sentry.models.group import Group, GroupStatus
from sentry.silo.base import SiloMode
from sentry.tasks.delete_pending_groups import (
MAX_LAST_SEEN_DAYS,
MIN_LAST_SEEN_HOURS,
delete_pending_groups,
)
from sentry.testutils.cases import TestCase
from sentry.testutils.silo import assume_test_silo_mode
from sentry.types.group import GroupSubStatus


Expand All @@ -30,24 +34,40 @@ def _days_ago(self, days: int) -> datetime:
def _hours_ago(self, hours: int) -> datetime:
return timezone.now() - timedelta(hours=hours)

def _create_audit_log_for_group_deletion(self, group: Group, deletion_time: datetime) -> None:
"""Create an audit log entry for group deletion."""
with assume_test_silo_mode(SiloMode.CONTROL):
AuditLogEntry.objects.create(
organization_id=group.organization.id,
event=audit_log.get_event_id("ISSUE_DELETE"),
target_object=group.id,
datetime=deletion_time,
actor=self.user,
ip_address="127.0.0.1",
data={"issue_id": group.id, "project_slug": group.project.slug},
)

def test_schedules_only_groups_within_valid_date_range(self) -> None:
"""Test that only groups with last_seen between 24h-90d are scheduled for deletion."""
"""Test that only groups with last_seen between 24h-90d and audit logs are scheduled for deletion."""
project = self.create_project()

# Too recent - within 4 hours (should NOT be scheduled)
# Too recent - within 4 hours (should NOT be scheduled due to recent audit log)
too_recent = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._hours_ago(4)
)
self._create_audit_log_for_group_deletion(too_recent, self._hours_ago(4))

# Valid range - should be scheduled
# Valid range - should be scheduled (has audit log older than 6 hours)
valid_group = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._hours_ago(7)
)
self._create_audit_log_for_group_deletion(valid_group, self._hours_ago(7))

# Too old - over 90 days (should NOT be scheduled)
# Too old - over 90 days (should NOT be scheduled due to old last_seen)
too_old = self.create_group(
project=project, status=GroupStatus.DELETION_IN_PROGRESS, last_seen=self._days_ago(91)
)
self._create_audit_log_for_group_deletion(too_old, self._days_ago(91))

# Wrong status - should NOT be scheduled
wrong_status = self.create_group(
Expand All @@ -57,6 +77,11 @@ def test_schedules_only_groups_within_valid_date_range(self) -> None:
last_seen=self._days_ago(5),
)

# No audit log - should NOT be scheduled even with valid status and date
no_audit_log = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._hours_ago(8)
)

with patch(
"sentry.api.helpers.group_index.delete.delete_groups_for_project.apply_async"
) as mock_delete_task:
Expand All @@ -68,16 +93,14 @@ def test_schedules_only_groups_within_valid_date_range(self) -> None:
assert call_kwargs["object_ids"] == [valid_group.id]
assert call_kwargs["project_id"] == project.id

assert self._count_groups_in_deletion_status_and_valid_date_range() != 0
with self.tasks():
delete_pending_groups()

assert self._count_groups_in_deletion_status_and_valid_date_range() == 0
assert list(Group.objects.all().values_list("id", flat=True).order_by("id")) == [
# Verify all expected groups still exist (none were deleted yet, only scheduled)
assert set(Group.objects.all().values_list("id", flat=True)) == {
too_recent.id,
valid_group.id,
too_old.id,
wrong_status.id,
]
no_audit_log.id,
}

@patch("sentry.api.helpers.group_index.delete.delete_groups_for_project.apply_async")
def test_groups_by_project(self, mock_delete_task: MagicMock) -> None:
Expand All @@ -88,12 +111,17 @@ def test_groups_by_project(self, mock_delete_task: MagicMock) -> None:
group1 = self.create_group(
project=project1, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
)
self._create_audit_log_for_group_deletion(group1, self._days_ago(2))

group2 = self.create_group(
project=project1, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
)
self._create_audit_log_for_group_deletion(group2, self._days_ago(2))

group3 = self.create_group(
project=project2, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
)
self._create_audit_log_for_group_deletion(group3, self._days_ago(2))

delete_pending_groups()

Expand Down Expand Up @@ -127,10 +155,12 @@ def test_chunks_large_batches(

# Create more groups than GROUP_CHUNK_SIZE (10 in this test)
num_groups = GROUPS_MORE_THAN_CHUNK_SIZE + GROUP_CHUNK_SIZE
deletion_time = self._days_ago(2)
for _ in range(num_groups):
self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
group = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=deletion_time
)
self._create_audit_log_for_group_deletion(group, deletion_time)

delete_pending_groups()

Expand Down Expand Up @@ -160,3 +190,35 @@ def test_chunks_large_batches(
c for c in incr_calls if c.args[0] == "delete_pending_groups.tasks_scheduled"
)
assert tasks_scheduled_call.kwargs["amount"] == 2

@patch("sentry.api.helpers.group_index.delete.delete_groups_for_project.apply_async")
def test_respects_audit_log_time_range(self, mock_delete_task: MagicMock) -> None:
"""Test that only audit logs within the 90-day time range are considered."""
project = self.create_project()

# Group with very old audit log (100 days ago) - should NOT be scheduled
# even though it has valid status and last_seen
very_old_audit = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(10)
)
self._create_audit_log_for_group_deletion(very_old_audit, self._days_ago(100))

# Group with audit log exactly at boundary (91 days ago) - should NOT be scheduled
at_boundary = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(20)
)
self._create_audit_log_for_group_deletion(at_boundary, self._days_ago(91))

# Group with audit log just inside the range (89 days ago) - should be scheduled
inside_range = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(30)
)
self._create_audit_log_for_group_deletion(inside_range, self._days_ago(89))

delete_pending_groups()

# Verify only the group with audit log inside the time range was scheduled
mock_delete_task.assert_called_once()
call_kwargs = mock_delete_task.call_args.kwargs["kwargs"]
assert call_kwargs["object_ids"] == [inside_range.id]
assert call_kwargs["project_id"] == project.id
Loading