Skip to content

Commit e9df615

Browse files
authored
feat(grouping): Small grouping code refactor (#23857)
1 parent dec4f00 commit e9df615

File tree

5 files changed

+103
-73
lines changed

5 files changed

+103
-73
lines changed

src/sentry/grouping/component.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,13 @@ def update(self, hint=None, contributes=None, contributes_to_similarity=None, va
114114
if contributes_to_similarity is not None:
115115
self.contributes_to_similarity = contributes_to_similarity
116116

117+
def shallow_copy(self):
118+
"""Creates a shallow copy."""
119+
rv = object.__new__(self.__class__)
120+
rv.__dict__.update(self.__dict__)
121+
rv.values = list(self.values)
122+
return rv
123+
117124
def iter_values(self):
118125
"""Recursively walks the component and flattens it into a list of
119126
values.

src/sentry/grouping/strategies/base.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515

1616
def strategy(id=None, ids=None, variants=None, interfaces=None, name=None, score=None):
1717
"""Registers a strategy"""
18-
if interfaces is None or variants is None:
19-
raise TypeError("interfaces and variants are required")
18+
if interfaces is None:
19+
raise TypeError("interfaces is required")
2020

2121
if name is None:
2222
if len(interfaces) != 1:
@@ -87,6 +87,15 @@ def lookup_strategy(strategy_id):
8787
raise LookupError("Unknown strategy %r" % strategy_id)
8888

8989

90+
def flatten_variants_from_component(component):
91+
"""Given a component extracts variants from it if that component is
92+
a variant provider. Otherwise this just returns the root component.
93+
"""
94+
if not component.variant_provider:
95+
return {component.id: component}
96+
return {c.id: c for c in component.values}
97+
98+
9099
class Strategy:
91100
"""Baseclass for all strategies."""
92101

@@ -97,13 +106,16 @@ def __init__(self, id, name, interfaces, variants, score, func):
97106
self.interfaces = interfaces
98107
self.mandatory_variants = []
99108
self.optional_variants = []
100-
self.variants = []
101-
for variant in variants:
102-
if variant[:1] == "!":
103-
self.mandatory_variants.append(variant[1:])
104-
else:
105-
self.optional_variants.append(variant)
106-
self.variants.append(variant)
109+
if variants is not None:
110+
self.variants = []
111+
for variant in variants:
112+
if variant[:1] == "!":
113+
self.mandatory_variants.append(variant[1:])
114+
else:
115+
self.optional_variants.append(variant)
116+
self.variants.append(variant)
117+
else:
118+
self.variants = None
107119
self.score = score
108120
self.func = func
109121
self.variant_processor_func = None
@@ -128,7 +140,7 @@ def variant_processor(self, func):
128140
self.variant_processor_func = func
129141
return func
130142

131-
def get_grouping_component(self, event, variant, context):
143+
def get_grouping_component(self, event, context, variant=None):
132144
"""Given a specific variant this calculates the grouping component."""
133145
args = []
134146
for iface_path in self.interfaces:
@@ -146,12 +158,22 @@ def get_grouping_component_variants(self, event, context):
146158
"""This returns a dictionary of all components by variant that this
147159
strategy can produce.
148160
"""
161+
# If no variants are provided we skip over the default logic and
162+
# go exclusively by the variant_processor_func. This also makes this
163+
# code a noop for delegate only strategies.
164+
if not self.variants:
165+
component = self.get_grouping_component(event, context)
166+
rv = flatten_variants_from_component(component)
167+
if self.variant_processor_func is not None:
168+
rv = self._invoke(self.variant_processor_func, rv, event=event, context=context)
169+
return rv
170+
149171
rv = {}
150172
# trivial case: we do not have mandatory variants and can handle
151173
# them all the same.
152174
if not self.mandatory_variants:
153175
for variant in self.variants:
154-
component = self.get_grouping_component(event, variant, context)
176+
component = self.get_grouping_component(event, context, variant)
155177
if component is not None:
156178
rv[variant] = component
157179

@@ -160,7 +182,7 @@ def get_grouping_component_variants(self, event, context):
160182
prevent_contribution = None
161183

162184
for variant in self.mandatory_variants:
163-
component = self.get_grouping_component(event, variant, context)
185+
component = self.get_grouping_component(event, context, variant)
164186
if component is None:
165187
continue
166188
if component.contributes:
@@ -172,7 +194,7 @@ def get_grouping_component_variants(self, event, context):
172194
for variant in self.optional_variants:
173195
# We also only want to create another variant if it
174196
# produces different results than the mandatory components
175-
component = self.get_grouping_component(event, variant, context)
197+
component = self.get_grouping_component(event, context, variant)
176198
if component is None:
177199
continue
178200

@@ -273,7 +295,6 @@ class NewStrategyConfiguration(StrategyConfiguration):
273295

274296
NewStrategyConfiguration.id = id
275297
NewStrategyConfiguration.base = base
276-
NewStrategyConfiguration.config_class = id.split(":", 1)[0]
277298
NewStrategyConfiguration.strategies = dict(base.strategies) if base else {}
278299
NewStrategyConfiguration.delegates = dict(base.delegates) if base else {}
279300
NewStrategyConfiguration.initial_context = dict(base.initial_context) if base else {}

src/sentry/grouping/strategies/configurations.py

Lines changed: 49 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,63 @@
55
)
66

77

8-
# The classes of grouping algorithms
9-
CLASSES = []
10-
118
# The full mapping of all known configurations.
129
CONFIGURATIONS = {}
1310

11+
# The implied base strategy *every* strategy inherits from if no
12+
# base is defined.
13+
BASE_STRATEGY = create_strategy_configuration(
14+
None,
15+
strategies=[
16+
"expect-ct:v1",
17+
"expect-staple:v1",
18+
"hpkp:v1",
19+
"csp:v1",
20+
"threads:v1",
21+
"stacktrace:v1",
22+
"chained-exception:v1",
23+
"template:v1",
24+
"message:v1",
25+
],
26+
delegates=["frame:v1", "stacktrace:v1", "single-exception:v1"],
27+
initial_context={
28+
# This key in the context tells the system which variant should
29+
# be produced. TODO: phase this out.
30+
"variant": None,
31+
# This is a flag that can be used by any delegate to respond to
32+
# a detected recursion. This is currently used by the frame
33+
# strategy to disable itself. Recursion is detected by the outer
34+
# strategy.
35+
"is_recursion": False,
36+
# This turns on the automatic message trimming by the message
37+
# strategy.
38+
"trim_message": False,
39+
# newstyle: enables the legacy function logic. This is only used
40+
# by the newstyle:2019-04-05 strategy. Once this is no longer used
41+
# this can go away entirely.
42+
"legacy_function_logic": False,
43+
# newstyle: turns on some javascript fuzzing features.
44+
"javascript_fuzzing": False,
45+
# newstyle: platforms for which context line should be taken into
46+
# account when grouping.
47+
"contextline_platforms": (),
48+
# newstyle: this detects anonymous classes in PHP code.
49+
"php_detect_anonymous_classes": False,
50+
# newstyle: turns on a bug that was present in some variants
51+
"with_context_line_file_origin_bug": False,
52+
# newstyle: turns on falling back to exception values when there
53+
# is no stacktrace.
54+
"with_exception_value_fallback": False,
55+
},
56+
)
57+
1458

1559
def register_strategy_config(id, **kwargs):
1660
if kwargs.get("base") is not None:
1761
kwargs["base"] = CONFIGURATIONS[kwargs["base"]]
62+
else:
63+
kwargs["base"] = BASE_STRATEGY
1864
rv = create_strategy_configuration(id, **kwargs)
19-
if rv.config_class not in CLASSES:
20-
CLASSES.append(rv.config_class)
2165
CONFIGURATIONS[rv.id] = rv
2266
return rv
2367

@@ -30,15 +74,9 @@ def register_strategy_config(id, **kwargs):
3074
register_strategy_config(
3175
id="legacy:2019-03-12",
3276
strategies=[
33-
"expect-ct:v1",
34-
"expect-staple:v1",
35-
"hpkp:v1",
36-
"csp:v1",
3777
"threads:legacy",
3878
"stacktrace:legacy",
3979
"chained-exception:legacy",
40-
"template:v1",
41-
"message:v1",
4280
],
4381
delegates=["frame:legacy", "stacktrace:legacy", "single-exception:legacy"],
4482
changelog="""
@@ -58,19 +96,7 @@ def register_strategy_config(id, **kwargs):
5896

5997
register_strategy_config(
6098
id="newstyle:2019-05-08",
61-
strategies=[
62-
"expect-ct:v1",
63-
"expect-staple:v1",
64-
"hpkp:v1",
65-
"csp:v1",
66-
"threads:v1",
67-
"stacktrace:v1",
68-
"chained-exception:v1",
69-
"template:v1",
70-
"message:v1",
71-
],
7299
risk=RISK_LEVEL_HIGH,
73-
delegates=["frame:v1", "stacktrace:v1", "single-exception:v1"],
74100
changelog="""
75101
* Uses source code information all platforms with reliable sources
76102
for grouping (JavaScript, Python, PHP and Ruby) and function
@@ -84,10 +110,8 @@ def register_strategy_config(id, **kwargs):
84110
* C/C++ and other native stacktraces are more reliably grouped.
85111
""",
86112
initial_context={
87-
"legacy_function_logic": False,
88113
"javascript_fuzzing": True,
89114
"contextline_platforms": ("javascript", "node", "python", "php", "ruby"),
90-
"php_detect_anonymous_classes": False,
91115
"with_context_line_file_origin_bug": True,
92116
"trim_message": True,
93117
"with_exception_value_fallback": True,
@@ -97,7 +121,6 @@ def register_strategy_config(id, **kwargs):
97121
register_strategy_config(
98122
id="newstyle:2019-10-29",
99123
base="newstyle:2019-05-08",
100-
delegates=["frame:v1"],
101124
risk=RISK_LEVEL_MEDIUM,
102125
changelog="""
103126
* Better rules for when to take context lines into account for
@@ -118,39 +141,19 @@ def register_strategy_config(id, **kwargs):
118141

119142
register_strategy_config(
120143
id="newstyle:2019-04-05",
121-
strategies=[
122-
"expect-ct:v1",
123-
"expect-staple:v1",
124-
"hpkp:v1",
125-
"csp:v1",
126-
"threads:v1",
127-
"stacktrace:v1",
128-
"chained-exception:v1",
129-
"template:v1",
130-
"message:v1",
131-
],
132-
delegates=["frame:v1", "stacktrace:v1", "single-exception:v1"],
133144
risk=RISK_LEVEL_HIGH,
134145
changelog="""
135146
* Experimental grouping algorithm (should not be used)
136147
""",
137148
hidden=True,
138149
initial_context={
139150
"legacy_function_logic": True,
140-
"javascript_fuzzing": False,
141-
"contextline_platforms": (),
142-
"php_detect_anonymous_classes": False,
143-
"with_context_line_file_origin_bug": False,
144-
"trim_message": False,
145-
"with_exception_value_fallback": False,
146151
},
147152
)
148153

149154
register_strategy_config(
150155
id="newstyle:2019-04-17",
151156
base="newstyle:2019-04-05",
152-
strategies=["message:v1"],
153-
delegates=["frame:v1", "single-exception:v1"],
154157
risk=RISK_LEVEL_HIGH,
155158
changelog="""
156159
* Experimental grouping algorithm (should not be used)

src/sentry/grouping/strategies/legacy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def remove_function_outliers_legacy(function):
150150
return new_function, None
151151

152152

153-
@strategy(id="single-exception:legacy", interfaces=["singleexception"], variants=["!system", "app"])
153+
@strategy(id="single-exception:legacy", interfaces=["singleexception"])
154154
def single_exception_legacy(exception, context, **meta):
155155
type_component = GroupingComponent(
156156
id="type",
@@ -223,7 +223,7 @@ def chained_exception_legacy_variant_processor(variants, context, **meta):
223223
return remove_non_stacktrace_variants(variants)
224224

225225

226-
@strategy(id="frame:legacy", interfaces=["frame"], variants=["!system", "app"])
226+
@strategy(id="frame:legacy", interfaces=["frame"])
227227
def frame_legacy(frame, event, **meta):
228228
platform = frame.platform or event.platform
229229

src/sentry/grouping/strategies/newstyle.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@
5353

5454
def is_recursion_v1(frame1, frame2):
5555
"Returns a boolean indicating whether frames are recursive calls."
56+
if frame2 is None:
57+
return False
58+
5659
for field in RECURSION_COMPARISON_FIELDS:
5760
if getattr(frame1, field, None) != getattr(frame2, field, None):
5861
return False
@@ -230,13 +233,10 @@ def get_function_component(
230233
@strategy(
231234
ids=["frame:v1"],
232235
interfaces=["frame"],
233-
variants=["!system", "app"],
234236
)
235237
def frame(frame, event, context, **meta):
236238
platform = frame.platform or event.platform
237239

238-
platform = frame.platform or event.platform
239-
240240
# Safari throws [native code] frames in for calls like ``forEach``
241241
# whereas Chrome ignores these. Let's remove it from the hashing algo
242242
# so that they're more likely to group together
@@ -308,6 +308,9 @@ def frame(frame, event, context, **meta):
308308
):
309309
rv.update(contributes=False, hint="ignored low quality javascript frame")
310310

311+
if context["is_recursion"]:
312+
rv.update(contributes=False, hint="ignored due to recursion")
313+
311314
return rv
312315

313316

@@ -343,19 +346,16 @@ def get_contextline_component(frame, platform, function, context):
343346
def stacktrace(stacktrace, context, **meta):
344347
variant = context["variant"]
345348
frames = stacktrace.frames
346-
all_frames_considered_in_app = False
347349

348350
values = []
349351
prev_frame = None
350352
frames_for_filtering = []
351353
for frame in frames:
352-
frame_component = context.get_grouping_component(frame, **meta)
353-
if variant == "app" and not frame.in_app and not all_frames_considered_in_app:
354+
with context:
355+
context["is_recursion"] = is_recursion_v1(frame, prev_frame)
356+
frame_component = context.get_grouping_component(frame, **meta)
357+
if variant == "app" and not frame.in_app:
354358
frame_component.update(contributes=False, hint="non app frame")
355-
elif prev_frame is not None and is_recursion_v1(frame, prev_frame):
356-
frame_component.update(contributes=False, hint="ignored due to recursion")
357-
elif variant == "app" and not frame.in_app and all_frames_considered_in_app:
358-
frame_component.update(hint="frame considered in-app because no frame is in-app")
359359
values.append(frame_component)
360360
frames_for_filtering.append(frame.get_raw_data())
361361
prev_frame = frame
@@ -419,7 +419,6 @@ def _stacktrace_encoder(id, stacktrace):
419419
@strategy(
420420
ids=["single-exception:v1"],
421421
interfaces=["singleexception"],
422-
variants=["!system", "app"],
423422
)
424423
def single_exception(exception, context, **meta):
425424
if exception.stacktrace is not None:

0 commit comments

Comments
 (0)