Skip to content

Commit e259527

Browse files
authored
feat(ownership): Migrate issue owners cache invalidation to use timestamp versioning on ownership (#106108)
We've been seeing [some timeouts](https://sentry.sentry.io/issues/6731345393/?project=1&referrer=Linear) in the `code_owners_auto_sync` task since it performs expensive O(n) cache invalidation. When CODEOWNERS or ownership rules changed, we would previously: 1. Query all groups in the project with recent events 2. Delete the issue owners debounce cache entry for each group individually (via batched `cache.delete_many`) The solution implemented in this PR is to replace this expensive fan-out cache invalidation with an O(1) timestamp-based versioning system on ownership schemas. **Before**: - Issue owners debounce cache simply stored `True` for each group - On ownership change: delete cache entries for ALL active groups in the project **After**: - Issue owners debounce cache stores the timestamp of when owners were last evaluated for the group - On ownership change: set a cache value with a project-wide `ownership_changed_at` timestamp - On debounce check: - If `issue_owners_debounce_time` is None --> no debounce, evaluate ownership - If `issue_owners_debounce_time` is True --> legacy cache value, debounce (will disappear from cache 24hrs after this PR is rolled out, and then this logic can be removed) - If `issue_owners_debounce_time` is a timestamp --> compare timestamps - If `ownership_changed_at` is None or `ownership_changed_at` < `issue_owners_debounce_time` --> debounce - If `ownership_changed_at` >= `issue_owners_debounce_time` --> ownership rules changed in the interim, evaluate ownership ### Follow-up Left TODOs for both of these in the code: - Remove backwards compatibility logic for `True` cache values 24 hrs after this PR is rolled out - Investigate applying same O(1) cache invalidation pattern to `invalidate_assignee_exists_cache`
1 parent ef5e2b5 commit e259527

File tree

8 files changed

+233
-36
lines changed

8 files changed

+233
-36
lines changed

src/sentry/models/groupassignee.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def deassign(
224224
if not ownership:
225225
ownership = ProjectOwnership(project_id=group.project.id)
226226
GroupOwner.invalidate_assignee_exists_cache(group.project.id, group.id)
227-
GroupOwner.invalidate_debounce_issue_owners_evaluation_cache(group.project.id, group.id)
227+
GroupOwner.invalidate_debounce_issue_owners_evaluation_cache(group.id)
228228

229229
metrics.incr("group.assignee.change", instance="deassigned", skip_internal=True)
230230
# sync Sentry assignee to external issues

src/sentry/models/groupowner.py

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
ASSIGNEE_EXISTS_KEY = lambda group_id: f"assignee_exists:1:{group_id}"
2929
ASSIGNEE_EXISTS_DURATION = 60 * 60 * 24
3030
ASSIGNEE_DOES_NOT_EXIST_DURATION = 60
31+
PROJECT_OWNERSHIP_VERSION_KEY = lambda project_id: f"ownership_version:1:{project_id}"
32+
PROJECT_OWNERSHIP_VERSION_DURATION = 60 * 60 * 24
3133

3234

3335
class GroupOwnerType(Enum):
@@ -190,32 +192,13 @@ def get_autoassigned_owner(cls, group_id, project_id, autoassignment_types):
190192
return issue_owner
191193

192194
@classmethod
193-
def invalidate_debounce_issue_owners_evaluation_cache(cls, project_id, group_id=None):
195+
def invalidate_debounce_issue_owners_evaluation_cache(cls, group_id):
194196
"""
195-
If `group_id` is provided, clear the debounce issue owners cache for that group, else clear
196-
the cache of all groups for a project that had an event within the
197-
ISSUE_OWNERS_DEBOUNCE_DURATION window.
197+
Clear the debounce issue owners cache for a specific group.
198198
"""
199-
if group_id:
200-
cache.delete(ISSUE_OWNERS_DEBOUNCE_KEY(group_id))
201-
return
202-
203-
# Get all the groups for a project that had an event within the ISSUE_OWNERS_DEBOUNCE_DURATION window.
204-
# Any groups without events in that window would have expired their TTL in the cache.
205-
queryset = Group.objects.filter(
206-
project_id=project_id,
207-
last_seen__gte=timezone.now() - timedelta(seconds=ISSUE_OWNERS_DEBOUNCE_DURATION),
208-
).values_list("id", flat=True)
209-
210-
# Run cache invalidation in batches
211-
group_id_iter = queryset.iterator(chunk_size=1000)
212-
while True:
213-
group_ids = list(itertools.islice(group_id_iter, 1000))
214-
if not group_ids:
215-
break
216-
cache_keys = [ISSUE_OWNERS_DEBOUNCE_KEY(group_id) for group_id in group_ids]
217-
cache.delete_many(cache_keys)
199+
cache.delete(ISSUE_OWNERS_DEBOUNCE_KEY(group_id))
218200

201+
# TODO(shashank): can make this O(1) cache invalidation by using the project ownership version timestamp (follow-up PR)
219202
@classmethod
220203
def invalidate_assignee_exists_cache(cls, project_id, group_id=None):
221204
"""
@@ -243,6 +226,34 @@ def invalidate_assignee_exists_cache(cls, project_id, group_id=None):
243226
cache_keys = [ASSIGNEE_EXISTS_KEY(group_id) for group_id in group_ids]
244227
cache.delete_many(cache_keys)
245228

229+
@classmethod
230+
def set_project_ownership_version(cls, project_id: int) -> float:
231+
"""
232+
Set the project ownership version timestamp when ownership rules change.
233+
234+
When ownership rules (ProjectCodeOwners or ProjectOwnership) change, we set a
235+
timestamp. During debounce checks, we compare the group's debounce timestamp
236+
against this version timestamp to determine if re-evaluation is needed.
237+
238+
Returns the timestamp that was set.
239+
"""
240+
version_time = timezone.now().timestamp()
241+
cache.set(
242+
PROJECT_OWNERSHIP_VERSION_KEY(project_id),
243+
version_time,
244+
PROJECT_OWNERSHIP_VERSION_DURATION,
245+
)
246+
return version_time
247+
248+
@classmethod
249+
def get_project_ownership_version(cls, project_id: int) -> float | None:
250+
"""
251+
Get the project ownership version timestamp.
252+
253+
Returns the timestamp when ownership rules were last changed, or None if not set.
254+
"""
255+
return cache.get(PROJECT_OWNERSHIP_VERSION_KEY(project_id))
256+
246257

247258
def get_owner_details(group_list: Sequence[Group]) -> dict[int, list[OwnersSerialized]]:
248259
group_ids = [g.id for g in group_list]

src/sentry/models/projectcodeowners.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -151,18 +151,14 @@ def modify_date_updated(instance, **kwargs):
151151

152152
def process_resource_change(instance, change, **kwargs):
153153
from sentry.models.groupowner import GroupOwner
154-
from sentry.models.projectownership import ProjectOwnership
155154

156155
cache.set(
157156
ProjectCodeOwners.get_cache_key(instance.project_id),
158157
None,
159158
READ_CACHE_DURATION,
160159
)
161-
ownership = ProjectOwnership.get_ownership_cached(instance.project_id)
162-
if not ownership:
163-
ownership = ProjectOwnership(project_id=instance.project_id)
164160

165-
GroupOwner.invalidate_debounce_issue_owners_evaluation_cache(instance.project_id)
161+
GroupOwner.set_project_ownership_version(instance.project_id)
166162

167163

168164
pre_save.connect(

src/sentry/models/projectownership.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ def process_resource_change(instance, change, **kwargs):
423423
READ_CACHE_DURATION,
424424
)
425425
GroupOwner.invalidate_assignee_exists_cache(instance.project.id)
426-
GroupOwner.invalidate_debounce_issue_owners_evaluation_cache(instance.project_id)
426+
GroupOwner.set_project_ownership_version(instance.project_id)
427427

428428

429429
# Signals update the cached reads used in post_processing

src/sentry/tasks/post_process.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ def handle_owner_assignment(job):
229229
ASSIGNEE_EXISTS_KEY,
230230
ISSUE_OWNERS_DEBOUNCE_DURATION,
231231
ISSUE_OWNERS_DEBOUNCE_KEY,
232+
GroupOwner,
232233
)
233234
from sentry.models.projectownership import ProjectOwnership
234235

@@ -250,11 +251,25 @@ def handle_owner_assignment(job):
250251
return
251252

252253
issue_owners_key = ISSUE_OWNERS_DEBOUNCE_KEY(group.id)
253-
debounce_issue_owners = cache.get(issue_owners_key)
254+
issue_owners_debounce_time = cache.get(issue_owners_key)
255+
256+
# Debounce check with timestamp-based invalidation:
257+
# - issue_owners_debounce_time is None: no debounce, process ownership
258+
# - issue_owners_debounce_time is True: legacy format, debounce (backwards compatibility)
259+
# - issue_owners_debounce_time is float: compare timestamps
260+
# - If ownership_changed_at is None or ownership_changed_at < issue_owners_debounce_time: debounce
261+
# - If ownership_changed_at >= issue_owners_debounce_time: ownership rules changed, re-evaluate
262+
if issue_owners_debounce_time is not None:
263+
# TODO(shashank): once rolled out for 24hrs, remove this legacy cache format check and the comment line above. prob will need to remove some tests in test_post_process.py as well.
264+
if issue_owners_debounce_time is True:
265+
# Backwards compatibility: old cache format, debounce
266+
metrics.incr("sentry.tasks.post_process.handle_owner_assignment.debounce")
267+
return
254268

255-
if debounce_issue_owners:
256-
metrics.incr("sentry.tasks.post_process.handle_owner_assignment.debounce")
257-
return
269+
ownership_changed_at = GroupOwner.get_project_ownership_version(project.id)
270+
if ownership_changed_at is None or ownership_changed_at < issue_owners_debounce_time:
271+
metrics.incr("sentry.tasks.post_process.handle_owner_assignment.debounce")
272+
return
258273

259274
if should_issue_owners_ratelimit(
260275
project_id=project.id,
@@ -286,7 +301,7 @@ def handle_owner_assignment(job):
286301
issue_owners = ProjectOwnership.get_issue_owners(project.id, event.data)
287302
cache.set(
288303
issue_owners_key,
289-
True,
304+
timezone.now().timestamp(),
290305
ISSUE_OWNERS_DEBOUNCE_DURATION,
291306
)
292307

tests/sentry/models/test_projectcodeowners.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from sentry.issues.ownership.grammar import Matcher, Owner, Rule, dump_schema
2+
from sentry.models.groupowner import PROJECT_OWNERSHIP_VERSION_KEY, GroupOwner
23
from sentry.models.projectcodeowners import ProjectCodeOwners
34
from sentry.testutils.cases import TestCase
45
from sentry.utils.cache import cache
@@ -88,3 +89,33 @@ def test_merge_codeowners(self) -> None:
8889
},
8990
],
9091
}
92+
93+
def test_post_save_sets_ownership_version(self) -> None:
94+
cache.delete(PROJECT_OWNERSHIP_VERSION_KEY(self.project.id))
95+
assert GroupOwner.get_project_ownership_version(self.project.id) is None
96+
97+
self.create_codeowners(
98+
self.project,
99+
self.code_mapping,
100+
raw=self.data["raw"],
101+
)
102+
103+
version = GroupOwner.get_project_ownership_version(self.project.id)
104+
assert version is not None
105+
assert isinstance(version, float)
106+
107+
def test_post_delete_sets_ownership_version(self) -> None:
108+
codeowners = self.create_codeowners(
109+
self.project,
110+
self.code_mapping,
111+
raw=self.data["raw"],
112+
)
113+
114+
cache.delete(PROJECT_OWNERSHIP_VERSION_KEY(self.project.id))
115+
assert GroupOwner.get_project_ownership_version(self.project.id) is None
116+
117+
codeowners.delete()
118+
119+
version = GroupOwner.get_project_ownership_version(self.project.id)
120+
assert version is not None
121+
assert isinstance(version, float)

tests/sentry/models/test_projectownership.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@
22

33
from sentry.issues.ownership.grammar import Matcher, Owner, Rule, dump_schema, resolve_actors
44
from sentry.models.groupassignee import GroupAssignee
5-
from sentry.models.groupowner import GroupOwner, GroupOwnerType, OwnerRuleType
5+
from sentry.models.groupowner import (
6+
PROJECT_OWNERSHIP_VERSION_KEY,
7+
GroupOwner,
8+
GroupOwnerType,
9+
OwnerRuleType,
10+
)
611
from sentry.models.projectownership import ProjectOwnership
712
from sentry.models.repository import Repository
813
from sentry.testutils.cases import TestCase
@@ -12,6 +17,7 @@
1217
from sentry.types.actor import Actor, ActorType
1318
from sentry.users.models.user_avatar import UserAvatar
1419
from sentry.users.services.user.service import user_service
20+
from sentry.utils.cache import cache
1521

1622
pytestmark = requires_snuba
1723

@@ -772,6 +778,34 @@ def test_autoassignment_with_multiple_codeowners(self) -> None:
772778
assignee = GroupAssignee.objects.get(group=event.group)
773779
assert assignee.team_id == processing_team.id
774780

781+
def test_post_save_sets_ownership_version(self) -> None:
782+
cache.delete(PROJECT_OWNERSHIP_VERSION_KEY(self.project.id))
783+
assert GroupOwner.get_project_ownership_version(self.project.id) is None
784+
785+
ProjectOwnership.objects.create(
786+
project_id=self.project.id,
787+
schema=dump_schema([Rule(Matcher("path", "*.py"), [Owner("user", self.user.email)])]),
788+
)
789+
790+
version = GroupOwner.get_project_ownership_version(self.project.id)
791+
assert version is not None
792+
assert isinstance(version, float)
793+
794+
def test_post_delete_sets_ownership_version(self) -> None:
795+
ownership = ProjectOwnership.objects.create(
796+
project_id=self.project.id,
797+
schema=dump_schema([Rule(Matcher("path", "*.py"), [Owner("user", self.user.email)])]),
798+
)
799+
800+
cache.delete(PROJECT_OWNERSHIP_VERSION_KEY(self.project.id))
801+
assert GroupOwner.get_project_ownership_version(self.project.id) is None
802+
803+
ownership.delete()
804+
805+
version = GroupOwner.get_project_ownership_version(self.project.id)
806+
assert version is not None
807+
assert isinstance(version, float)
808+
775809

776810
class ResolveActorsTestCase(TestCase):
777811
def test_no_actors(self) -> None:

tests/sentry/tasks/test_post_process.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,6 +1444,116 @@ def test_debounces_handle_owner_assignments(self, mock_incr: MagicMock) -> None:
14441444
)
14451445
mock_incr.assert_any_call("sentry.tasks.post_process.handle_owner_assignment.debounce")
14461446

1447+
@patch("sentry.utils.metrics.incr")
1448+
def test_debounces_with_timestamp_format(self, mock_incr: MagicMock) -> None:
1449+
self.make_ownership()
1450+
event = self.create_event(
1451+
data={
1452+
"message": "oh no",
1453+
"platform": "python",
1454+
"stacktrace": {"frames": [{"filename": "src/app.py"}]},
1455+
},
1456+
project_id=self.project.id,
1457+
)
1458+
debounce_time = cache.get(ISSUE_OWNERS_DEBOUNCE_KEY(event.group_id))
1459+
assert debounce_time is None
1460+
1461+
# First event: evaluates ownership and sets debounce timestamp
1462+
self.call_post_process_group(
1463+
is_new=False,
1464+
is_regression=False,
1465+
is_new_group_environment=False,
1466+
event=event,
1467+
)
1468+
debounce_time = cache.get(ISSUE_OWNERS_DEBOUNCE_KEY(event.group_id))
1469+
assert debounce_time is not None
1470+
assert isinstance(debounce_time, float)
1471+
1472+
# Second event: should debounce because no ownership change occurred
1473+
self.call_post_process_group(
1474+
is_new=False,
1475+
is_regression=False,
1476+
is_new_group_environment=False,
1477+
event=event,
1478+
)
1479+
mock_incr.assert_any_call("sentry.tasks.post_process.handle_owner_assignment.debounce")
1480+
1481+
@patch("sentry.utils.metrics.incr")
1482+
def test_timestamp_invalidation_when_ownership_changes(self, mock_incr: MagicMock) -> None:
1483+
self.make_ownership()
1484+
event = self.create_event(
1485+
data={
1486+
"message": "oh no",
1487+
"platform": "python",
1488+
"stacktrace": {"frames": [{"filename": "src/app.py"}]},
1489+
},
1490+
project_id=self.project.id,
1491+
)
1492+
debounce_time = cache.get(ISSUE_OWNERS_DEBOUNCE_KEY(event.group_id))
1493+
assert debounce_time is None
1494+
1495+
# First event: should evaluate ownership and set debounce timestamp
1496+
self.call_post_process_group(
1497+
is_new=False,
1498+
is_regression=False,
1499+
is_new_group_environment=False,
1500+
event=event,
1501+
)
1502+
debounce_time = cache.get(ISSUE_OWNERS_DEBOUNCE_KEY(event.group_id))
1503+
assert debounce_time is not None
1504+
assert isinstance(debounce_time, float)
1505+
1506+
# Simulate ownership rules changing
1507+
GroupOwner.set_project_ownership_version(self.project.id)
1508+
1509+
# Second event: should NOT debounce because ownership changed after debounce was set
1510+
self.call_post_process_group(
1511+
is_new=False,
1512+
is_regression=False,
1513+
is_new_group_environment=False,
1514+
event=event,
1515+
)
1516+
assert (
1517+
mock.call("sentry.tasks.post_process.handle_owner_assignment.debounce")
1518+
not in mock_incr.call_args_list
1519+
)
1520+
1521+
@patch("sentry.utils.metrics.incr")
1522+
def test_timestamp_debounce_when_ownership_older(self, mock_incr: MagicMock) -> None:
1523+
self.make_ownership()
1524+
event = self.create_event(
1525+
data={
1526+
"message": "oh no",
1527+
"platform": "python",
1528+
"stacktrace": {"frames": [{"filename": "src/app.py"}]},
1529+
},
1530+
project_id=self.project.id,
1531+
)
1532+
debounce_time = cache.get(ISSUE_OWNERS_DEBOUNCE_KEY(event.group_id))
1533+
assert debounce_time is None
1534+
1535+
# Ownership changed in the past (before any events)
1536+
GroupOwner.set_project_ownership_version(self.project.id)
1537+
1538+
# First event: evaluates ownership (ownership change is in the past, so no debounce yet)
1539+
self.call_post_process_group(
1540+
is_new=False,
1541+
is_regression=False,
1542+
is_new_group_environment=False,
1543+
event=event,
1544+
)
1545+
debounce_time = cache.get(ISSUE_OWNERS_DEBOUNCE_KEY(event.group_id))
1546+
assert debounce_time is not None
1547+
1548+
# Second event: should debounce because ownership change is older than the debounce timestamp
1549+
self.call_post_process_group(
1550+
is_new=False,
1551+
is_regression=False,
1552+
is_new_group_environment=False,
1553+
event=event,
1554+
)
1555+
mock_incr.assert_any_call("sentry.tasks.post_process.handle_owner_assignment.debounce")
1556+
14471557
@patch("sentry.utils.metrics.incr")
14481558
def test_issue_owners_should_ratelimit(self, mock_incr: MagicMock) -> None:
14491559
cache.set(

0 commit comments

Comments
 (0)