Skip to content

Commit 2db67cb

Browse files
Update analyzer commons to don't suggest possessive quantifiers (#980)
1 parent d12b017 commit 2db67cb

File tree

3 files changed

+36
-40
lines changed

3 files changed

+36
-40
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
<mockito.version>3.9.0</mockito.version>
9292
<sonar.version>8.9.0.43852</sonar.version>
9393
<sonar.orchestrator.version>3.35.1.2719</sonar.orchestrator.version>
94-
<sonar-analyzer-commons.version>1.21.0.809</sonar-analyzer-commons.version>
94+
<sonar-analyzer-commons.version>1.21.0.818</sonar-analyzer-commons.version>
9595
<sonarlint-core.version>6.0.0.32513</sonarlint-core.version>
9696
<sslr.version>1.23</sslr.version>
9797
<protobuf.version>3.17.3</protobuf.version>

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/S5857.html

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,15 @@
11
<p>Using reluctant quantifiers (also known as lazy or non-greedy quantifiers) in patterns can often lead to needless backtracking, making the regex
22
needlessly inefficient and potentially vulnerable to <a href="https://www.regular-expressions.info/catastrophic.html">catastrophic backtracking</a>.
33
Particularly when using <code>.*?</code> or <code>.+?</code> to match anything up to some terminating character, it is usually a better idea to
4-
instead use a greedily or possessively quantified negated character class containing the terminating character. For example <code>&lt;.+?&gt;</code>
5-
should be replaced with <code>&lt;[^&gt;]++&gt;</code>.</p>
4+
instead use a greedily or quantified negated character class containing the terminating character. For example <code>&lt;.+?&gt;</code> should be
5+
replaced with <code>&lt;[^&gt;]*&gt;</code> or <code>&lt;[^&gt;]+&gt;</code>.</p>
66
<h2>Noncompliant Code Example</h2>
77
<pre>
88
r'&lt;.+?&gt;'
99
r'".*?"'
1010
</pre>
1111
<h2>Compliant Solution</h2>
1212
<pre>
13-
r'&lt;[^&gt;]++&gt;'
14-
r'"[^"]*+"'
15-
</pre>
16-
<p>or</p>
17-
<pre>
1813
r'&lt;[^&gt;]+&gt;'
1914
r'"[^"]*"'
2015
</pre>

python-checks/src/test/resources/checks/regex/reluctantQuantifierCheck.py

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,52 +2,52 @@
22

33

44
def non_compliant(input):
5-
re.match(r"<.+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]++".}}
6-
re.match(r"<\S+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\s]++".}}
7-
re.match(r"<\D+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\d]++".}}
8-
re.match(r"<\W+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\w]++".}}
9-
10-
re.match(r"<.{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]{2,5}+".}}
11-
re.match(r"<\S{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\s]{2,5}+".}}
12-
re.match(r"<\D{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\d]{2,5}+".}}
13-
re.match(r"<\W{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\w]{2,5}+".}}
14-
15-
re.match(r"<.{2,}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]{2,}+".}}
16-
re.match(r"\".*?\"", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\"]*+".}}
17-
re.match(r".*?\w", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\W*+".}}
18-
re.match(r".*?\W", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\w*+".}}
19-
re.match(r"\[.*?\]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\]]*+".}}
20-
re.match(r".+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc]++".}}
5+
re.match(r"<.+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]+".}}
6+
re.match(r"<\S+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\s]+".}}
7+
re.match(r"<\D+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\d]+".}}
8+
re.match(r"<\W+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\w]+".}}
9+
10+
re.match(r"<.{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]{2,5}".}}
11+
re.match(r"<\S{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\s]{2,5}".}}
12+
re.match(r"<\D{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\d]{2,5}".}}
13+
re.match(r"<\W{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\w]{2,5}".}}
14+
15+
re.match(r"<.{2,}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]{2,}".}}
16+
re.match(r"\".*?\"", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\"]*".}}
17+
re.match(r".*?\w", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\W*".}}
18+
re.match(r".*?\W", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\w*".}}
19+
re.match(r"\[.*?\]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\]]*".}}
20+
re.match(r".+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc]+".}}
2121
re.match(r"(?-U:\s)*?\S", input)
22-
re.match(r"(?U:\s)*?\S", input, re.ASCII) # Noncompliant {{Replace this use of a reluctant quantifier with "[\s\S]*+".}}
22+
re.match(r"(?U:\s)*?\S", input, re.ASCII) # Noncompliant {{Replace this use of a reluctant quantifier with "[\s\S]*".}}
2323
re.match(r"(?U:a|\s)*?\S", input)
2424
re.match(r"\S*?\s", input)
2525
re.match(r"\S*?(?-U:\s)", input)
26-
re.match(r"\S*?(?U:\s)", input, re.ASCII) # Noncompliant {{Replace this use of a reluctant quantifier with "[\S\s]*+".}}
27-
re.match(r"\S*?(?U)\s", input, re.ASCII) # Noncompliant {{Replace this use of a reluctant quantifier with "[\S\s]*+".}}
26+
re.match(r"\S*?(?U:\s)", input, re.ASCII) # Noncompliant {{Replace this use of a reluctant quantifier with "[\S\s]*".}}
27+
re.match(r"\S*?(?U)\s", input, re.ASCII) # Noncompliant {{Replace this use of a reluctant quantifier with "[\S\s]*".}}
2828

2929
# coverage
3030
re.match(r"(?:(?m))*?a", input)
3131
re.match(r"(?:(?m:.))*?(?:(?m))", input)
3232

3333
# This replacement might not be equivalent in case of full match, but is equivalent in case of split
34-
re.match(r".+?[^abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[abc]++".}}
34+
re.match(r".+?[^abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[abc]+".}}
3535

36-
re.match(r".+?\x{1F4A9}", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\x{1F4A9}]++".}}
37-
re.match(r"<abc.*?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]*+".}}
38-
re.match(r"<.+?>|otherstuff", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]++".}}
39-
re.match(r"(<.+?>)*", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]++".}}
36+
re.match(r".+?\x{1F4A9}", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\x{1F4A9}]+".}}
37+
re.match(r"<abc.*?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]*".}}
38+
re.match(r"<.+?>|otherstuff", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]+".}}
39+
re.match(r"(<.+?>)*", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]+".}}
4040

41-
re.match(r"\S+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\s]++".}}
42-
re.match(r"\D+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\d]++".}}
43-
re.match(r"\w+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\W]++".}}
41+
re.match(r"\S+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\s]+".}}
42+
re.match(r"\D+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\d]+".}}
43+
re.match(r"\w+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\W]+".}}
4444

45-
re.match(r"\S*?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\s]*+".}}
46-
re.match(r"\D*?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\d]*+".}}
47-
re.match(r"\w*?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\W]*+".}}
45+
re.match(r"\S*?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\s]*".}}
46+
re.match(r"\D*?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\d]*".}}
47+
re.match(r"\w*?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\W]*".}}
4848

49-
re.match(r"\S+?[^abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[abc\S]++".}}
50-
re.match(r"\s+?[^abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[abc\s]++".}}
49+
re.match(r"\S+?[^abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[abc\S]+".}}
50+
re.match(r"\s+?[^abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[abc\s]+".}}
5151

5252

5353
def compliant(input):
@@ -70,6 +70,7 @@ def compliant(input):
7070
re.match(r"\S*?(?U:\s)", input)
7171
re.match(r"\S*?(?U)\s", input)
7272

73+
7374
def no_intersection(input):
7475
re.match(r"<\d+?>", input)
7576
re.match(r"<\s+?>", input)

0 commit comments

Comments
 (0)