Skip to content

Commit 0fc74d5

Browse files
authored
ref(grouping): Clarify and add to xfail parameterization tests (#109693)
In our test suite for message parameterization, we primarily test cases which are working as expected, but we also have two xfail tests to document cases where the parameterizer doesn't currently behave as we'd like it to. This serves as a rough TODO list, and lets us document cases we know we want to improve. This PR does a small refactor of those tests, and adds a number of new test cases. Key changes: - Since the two tests are identical, combine them into one. - Pull the now-greatly-expanded set of test cases out into a variable to match the format of the good and experimental cases. - Add an `actual` parameter to the test, so we can document the current parameterization behavior, and test against it as well. - For ease of understanding, switch from using an `xfail` test testing `==` to using a passing test testing `!=`.
1 parent 6ed1f7f commit 0fc74d5

File tree

1 file changed

+130
-27
lines changed

1 file changed

+130
-27
lines changed

tests/sentry/grouping/test_parameterization.py

Lines changed: 130 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -192,33 +192,136 @@ def test_experimental_parameterization(
192192
)
193193

194194

195-
# These are test cases that we should fix
196-
@pytest.mark.xfail(strict=True)
197-
@pytest.mark.parametrize(
198-
("name", "input", "expected"),
199-
[
200-
(
201-
"URL - non-http protocol user/pass/port",
202-
"""blah tcp://user:pass@email.com:10 had a problem""",
203-
"""blah <url> had a problem""",
204-
),
205-
],
206-
)
207-
def test_fail_parameterize(
208-
name: str, input: str, expected: str, parameterizer: Parameterizer
209-
) -> None:
210-
assert parameterizer.parameterize_all(input) == expected
195+
# Known problems, which we should fix if we can (might not always be possible). Includes false
196+
# positives (cases where we parameterize too aggressively), false negatives (cases where miss
197+
# parameterizing something), and otherwise incorrect parameterizations.
198+
#
199+
# TODO: Move as many of these as possible up to `standard_cases` above by improving
200+
# parameterization. (Remember to remove the last item in each tuple for the cases you fix.)
201+
incorrect_cases = [
202+
# ("name", "input", "desired", "actual")
203+
(
204+
"date - datetime space-separated UTC",
205+
"2006-01-02 15:04:05Z",
206+
"<date>",
207+
"<date>Z",
208+
),
209+
(
210+
"date - datetime space-separated w offset",
211+
"2006-01-02 15:04:05+01:00",
212+
"<date>",
213+
"<date>+<int>:<int>",
214+
),
215+
(
216+
"date - datetime compressed space-separated",
217+
"20060102 150405",
218+
"<date>",
219+
"<hex> <int>",
220+
),
221+
(
222+
"date - datetime compressed space-separated UTC",
223+
"20060102 150405Z",
224+
"<date>",
225+
"<hex> 150405Z",
226+
),
227+
(
228+
"date - datetime compressed space-separated w offset",
229+
"20060102 150405+0100",
230+
"<date>",
231+
"<hex> <int>+<int>",
232+
),
233+
(
234+
"date - datetime compressed T-separated",
235+
"20060102T150405",
236+
"<date>",
237+
"20060102T150405",
238+
),
239+
(
240+
"date - datetime compressed T-separated UTC",
241+
"20060102T150405Z",
242+
"<date>",
243+
"20060102T150405Z",
244+
),
245+
(
246+
"date - datetime compressed T-separated w offset",
247+
"20060102T150405+0100",
248+
"<date>",
249+
"20060102T150405+<int>",
250+
),
251+
(
252+
"git sha",
253+
"commit a93c7d2",
254+
"commit <git_sha>",
255+
"commit a93c7d2",
256+
),
257+
(
258+
"hex without prefix - lowercase, no numbers until later",
259+
"deadbeef 123",
260+
"deadbeef <int>",
261+
"<hex> <int>",
262+
),
263+
(
264+
"hex without prefix - uppercase, no numbers until later",
265+
"DEADBEEF 123",
266+
"DEADBEEF <int>",
267+
"<hex> <int>",
268+
),
269+
(
270+
"int - number in word",
271+
"Encoding: utf-8",
272+
"Encoding: utf-8",
273+
"Encoding: utf<int>",
274+
),
275+
(
276+
"int - with commas",
277+
"4,150,908",
278+
"<int>",
279+
"<int>,<int>,<int>",
280+
),
281+
(
282+
"ip - double colon object property",
283+
"Option::unwrap()",
284+
"Option::unwrap()",
285+
"Option<ip>unwrap()",
286+
),
287+
(
288+
"ip - double colon object property including hex",
289+
"Bee::buzz()",
290+
"Bee::buzz()",
291+
"<ip>buzz()",
292+
),
293+
(
294+
"json - double quotes",
295+
'{"dogs are great": true, "dog_id": "greatdog1231"}',
296+
'{"dogs are great": <bool>, "dog_id": <id>}',
297+
'{"dogs are great": true, "dog_id": "greatdog1231"}',
298+
),
299+
# Single quotes make this not valid JSON, but when the JSON is stringified, sometimes it comes
300+
# out that way
301+
(
302+
"json - single quotes",
303+
"{'dogs are great': true, 'dog_id': 'greatdog1231'}",
304+
"{'dogs are great': <bool>, 'dog_id': '<id>'}",
305+
"{'dogs are great': true, 'dog_id': 'greatdog1231'}",
306+
),
307+
(
308+
"random sequence as id",
309+
"invoice k9Mtd2gDcgG",
310+
"invoice <random_str>",
311+
"invoice k9Mtd2gDcgG",
312+
),
313+
(
314+
"URL - non-http protocol user/pass/port",
315+
"tcp://user:pass@email.com:10 had a problem",
316+
"<url> had a problem",
317+
"tcp://user:<email>:<int> had a problem",
318+
),
319+
]
211320

212321

213-
# These are test cases where we're too aggressive
214-
@pytest.mark.xfail(strict=True)
215-
@pytest.mark.parametrize(
216-
("name", "input", "expected"),
217-
[
218-
("Not an Int", "Encoding: utf-8", "Encoding: utf-8"), # produces "Encoding: utf<int>"
219-
],
220-
)
221-
def test_too_aggressive_parameterize(
222-
name: str, input: str, expected: str, parameterizer: Parameterizer
322+
@pytest.mark.parametrize(("name", "input", "desired", "actual"), incorrect_cases)
323+
def test_incorrect_parameterization(
324+
name: str, input: str, desired: str, actual: str, parameterizer: Parameterizer
223325
) -> None:
224-
assert parameterizer.parameterize_all(input) == expected
326+
assert parameterizer.parameterize_all(input) != desired
327+
assert parameterizer.parameterize_all(input) == actual

0 commit comments

Comments
 (0)