Skip to content

Commit 65eafed

Browse files
authored
ref(seer grouping): Add new sentry:similarity_backfill_completed project option to enablement check (#74600)
Currently, backfilling a project's issues into the Seer database and enabling Seer grouping are two separate steps - the backfill script has to be run and then the project has to be manually added to the list of projects using the feature. This has a few disadvantages: - Any groups which are created between the time the backfill ends and the time the enablement change is deployed get missed by Seer entirely. - Because we're doing the backfill in batches, we're also doing the enablement in batches, which causes large, abrupt increases in the load being placed on Seer. This means that if Seer reaches an overload point, we both won't have much warning and won't know exactly at what point the load was too much. - Manually adding projects to the enablement list is prone to error and kind of a pain. To solve this problem, this PR adds a new project option, `sentry:similarity_backfill_completed`, which the Seer similarity backfill script can set once all of a project's issues have been backfilled. It also updates `should_call_seer_for_grouping` to consider the presence of the option equivalent to having the feature flag(s) on. That way: - There's no lag time between the backfill ending and grouping being turned on. - Traffic to Seer can increase one project at a time, which lets us better understand how it handles the increases in load. - There's no more annoying copying and pasting. For now the project option will live alongside the flag (rather than replacing it), so that events from projects which already have the feature flag on but don't yet have the project option set won’t suddenly be deemed Seer-ineligible. We can decide later whether we want to run a migration to set the option for projects which already have the flag (and then delete the flag) or just live with the hybrid system under the theory that they're both temporary, and will disappear once Seer grouping is GAed.
1 parent 9d2bbbe commit 65eafed

File tree

4 files changed

+39
-12
lines changed

4 files changed

+39
-12
lines changed

src/sentry/grouping/ingest/seer.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,7 @@ def should_call_seer_for_grouping(event: Event, primary_hashes: CalculatedHashes
3030

3131
project = event.project
3232

33-
has_either_seer_grouping_feature = features.has(
34-
"projects:similarity-embeddings-metadata", project
35-
) or features.has("projects:similarity-embeddings-grouping", project)
36-
37-
if not has_either_seer_grouping_feature:
33+
if not _project_has_similarity_grouping_enabled(project):
3834
return False
3935

4036
if _has_customized_fingerprint(event, primary_hashes):
@@ -60,6 +56,20 @@ def should_call_seer_for_grouping(event: Event, primary_hashes: CalculatedHashes
6056
return True
6157

6258

59+
def _project_has_similarity_grouping_enabled(project: Project) -> bool:
60+
has_either_seer_grouping_feature = features.has(
61+
"projects:similarity-embeddings-metadata", project
62+
) or features.has("projects:similarity-embeddings-grouping", project)
63+
64+
# TODO: This is a hack to get ingest to turn on for projects as soon as they're backfilled. When
65+
# the backfill script completes, we turn on this option, enabling ingest immediately rather than
66+
# forcing the project to wait until it's been manually added to a feature handler. Once all
67+
# projects have been backfilled, the option (and this check) can go away.
68+
has_been_backfilled = project.get_option("sentry:similarity_backfill_completed")
69+
70+
return has_either_seer_grouping_feature or has_been_backfilled
71+
72+
6373
# TODO: Here we're including events with hybrid fingerprints (ones which are `{{ default }}`
6474
# combined with some other value). To the extent to which we're then using this function to decide
6575
# whether or not to call Seer, this means that the calculations giving rise to the default part of

src/sentry/models/options/project_option.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
"sentry:secondary_grouping_config",
5454
"sentry:secondary_grouping_expiry",
5555
"sentry:grouping_auto_update",
56+
"sentry:similarity_backfill_completed",
5657
"sentry:fingerprinting_rules",
5758
"sentry:relay_pii_config",
5859
"sentry:metrics_extraction_rules",

src/sentry/projectoptions/defaults.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@
4444
# is auto upgrading enabled?
4545
register(key="sentry:grouping_auto_update", default=True)
4646

47+
# Has this project had its issues backfilled into the Seer database, and if so, when did the
48+
# backfill complete? (This is a temporary way to flag projects as we roll out Seer grouping, because
49+
# it can be flipped on in the backfill script, unlike inclusion in a getsentry feature handler.)
50+
register(key="sentry:similarity_backfill_completed", default=None)
51+
52+
4753
# The JavaScript loader version that is the project default. This option
4854
# is expected to be never set but the epoch defaults are used if no
4955
# version is set on a project's DSN.

tests/sentry/grouping/ingest/test_seer.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,23 +45,33 @@ def setUp(self):
4545
)
4646
self.primary_hashes = self.event.get_hashes()
4747

48-
def test_obeys_seer_similarity_flags(self):
49-
for metadata_flag, grouping_flag, expected_result in [
50-
(False, False, False),
51-
(True, False, True),
52-
(False, True, True),
53-
(True, True, True),
48+
def test_obeys_feature_enablement_check(self):
49+
for metadata_flag, grouping_flag, backfill_completed_option, expected_result in [
50+
# TODO: This manual cartesian product business is gross, but thankfully it's temporary -
51+
# the metadata flag is about to go away and the backfill completed option will go away
52+
# once all projects are backfilled.
53+
(False, False, None, False),
54+
(True, False, None, True),
55+
(False, True, None, True),
56+
(True, True, None, True),
57+
(False, False, 11211231, True),
58+
(True, False, 11211231, True),
59+
(False, True, 11211231, True),
60+
(True, True, 11211231, True),
5461
]:
5562
with Feature(
5663
{
5764
"projects:similarity-embeddings-metadata": metadata_flag,
5865
"projects:similarity-embeddings-grouping": grouping_flag,
5966
}
6067
):
68+
self.project.update_option(
69+
"sentry:similarity_backfill_completed", backfill_completed_option
70+
)
6171
assert (
6272
should_call_seer_for_grouping(self.event, self.primary_hashes)
6373
is expected_result
64-
), f"Case ({metadata_flag}, {grouping_flag}) failed."
74+
), f"Case (metadata {metadata_flag}, grouping {grouping_flag}, backfill completed {backfill_completed_option}) failed."
6575

6676
@with_feature("projects:similarity-embeddings-grouping")
6777
def test_obeys_content_filter(self):

0 commit comments

Comments
 (0)