Skip to content

Commit c1b1ac7

Browse files
SONARPY-891 Rule S5857 Character classes should be preferred over reluctant quantifiers in regular expressions (#971)
* SONARPY-891 Rule S5857 Character classes should be preferred over reluctant quantifiers in regular expressions * Update test case * Update ITs
1 parent 351d3da commit c1b1ac7

File tree

9 files changed

+239
-1
lines changed

9 files changed

+239
-1
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
{
2+
'project:buildbot-0.8.6p1/buildbot/monkeypatches/sqlalchemy2189.py':[
3+
41,
4+
87,
5+
],
6+
'project:buildbot-0.8.6p1/buildbot/status/web/console.py':[
7+
208,
8+
],
9+
'project:django-2.2.3/django/utils/text.py':[
10+
18,
11+
18,
12+
19,
13+
],
14+
'project:django-2.2.3/django/utils/translation/template.py':[
15+
32,
16+
32,
17+
],
18+
'project:numpy-1.16.4/numpy/linalg/lapack_lite/clapack_scrub.py':[
19+
104,
20+
],
21+
'project:tensorflow/python/debug/cli/evaluator.py':[
22+
26,
23+
],
24+
'project:tornado-2.3/demos/appengine/markdown.py':[
25+
722,
26+
722,
27+
],
28+
'project:tornado-2.3/demos/blog/markdown.py':[
29+
722,
30+
722,
31+
],
32+
}

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
<mockito.version>3.9.0</mockito.version>
9292
<sonar.version>8.9.0.43852</sonar.version>
9393
<sonar.orchestrator.version>3.35.1.2719</sonar.orchestrator.version>
94-
<sonar-analyzer-commons.version>1.21.0.805</sonar-analyzer-commons.version>
94+
<sonar-analyzer-commons.version>1.21.0.807</sonar-analyzer-commons.version>
9595
<sonarlint-core.version>6.0.0.32513</sonarlint-core.version>
9696
<sslr.version>1.23</sslr.version>
9797
<protobuf.version>3.17.3</protobuf.version>

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import org.sonar.python.checks.regex.GraphemeClustersInClassesCheck;
5555
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
5656
import org.sonar.python.checks.regex.RedundantRegexAlternativesCheck;
57+
import org.sonar.python.checks.regex.ReluctantQuantifierCheck;
5758
import org.sonar.python.checks.regex.RegexLookaheadCheck;
5859
import org.sonar.python.checks.regex.ReluctantQuantifierWithEmptyContinuationCheck;
5960
import org.sonar.python.checks.regex.StringReplaceCheck;
@@ -193,6 +194,7 @@ public static Iterable<Class> getChecks() {
193194
RaiseOutsideExceptCheck.class,
194195
RedundantJumpCheck.class,
195196
RedundantRegexAlternativesCheck.class,
197+
ReluctantQuantifierCheck.class,
196198
RegexCheck.class,
197199
ReluctantQuantifierWithEmptyContinuationCheck.class,
198200
ReturnAndYieldInOneFunctionCheck.class,
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.sonar.check.Rule;
23+
import org.sonar.plugins.python.api.tree.CallExpression;
24+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
25+
import org.sonarsource.analyzer.commons.regex.finders.ReluctantQuantifierFinder;
26+
27+
@Rule(key = "S5857")
28+
public class ReluctantQuantifierCheck extends AbstractRegexCheck {
29+
30+
@Override
31+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
32+
new ReluctantQuantifierFinder(this::addIssue).visit(regexParseResult);
33+
}
34+
}
35+
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<p>Using reluctant quantifiers (also known as lazy or non-greedy quantifiers) in patterns can often lead to needless backtracking, making the regex
2+
needlessly inefficient and potentially vulnerable to <a href="https://www.regular-expressions.info/catastrophic.html">catastrophic backtracking</a>.
3+
Particularly when using <code>.*?</code> or <code>.+?</code> to match anything up to some terminating character, it is usually a better idea to
4+
instead use a greedily or possessively quantified negated character class containing the terminating character. For example <code>&lt;.+?&gt;</code>
5+
should be replaced with <code>&lt;[^&gt;]++&gt;</code>.</p>
6+
<h2>Noncompliant Code Example</h2>
7+
<pre>
8+
r'&lt;.+?&gt;'
9+
r'".*?"'
10+
</pre>
11+
<h2>Compliant Solution</h2>
12+
<pre>
13+
r'&lt;[^&gt;]++&gt;'
14+
r'"[^"]*+"'
15+
</pre>
16+
<p>or</p>
17+
<pre>
18+
r'&lt;[^&gt;]+&gt;'
19+
r'"[^"]*"'
20+
</pre>
21+
<h2>Exceptions</h2>
22+
<p>This rule only applies in cases where the reluctant quantifier can easily be replaced with a negated character class. That means the repetition has
23+
to be terminated by a single character or character class. Patterns such as the following, where the alternatives without reluctant quantifiers are
24+
more complicated, are therefore not subject to this rule:</p>
25+
<pre>
26+
/&lt;!--.*?--&gt;/
27+
-/\*.*?\*/-
28+
</pre>
29+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"title": "Character classes should be preferred over reluctant quantifiers in regular expressions",
3+
"type": "CODE_SMELL",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "3min"
8+
},
9+
"tags": [
10+
"regex"
11+
],
12+
"defaultSeverity": "Minor",
13+
"ruleSpecification": "RSPEC-5857",
14+
"sqKey": "S5857",
15+
"scope": "All",
16+
"quickfix": "unknown"
17+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@
136136
"S5842",
137137
"S5850",
138138
"S5855",
139+
"S5857",
139140
"S5864",
140141
"S5868",
141142
"S5886",
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
26+
public class ReluctantQuantifierCheckTest {
27+
28+
@Test
29+
public void test() {
30+
PythonCheckVerifier.verify("src/test/resources/checks/regex/reluctantQuantifierCheck.py", new ReluctantQuantifierCheck());
31+
}
32+
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import re
2+
3+
4+
def non_compliant(input):
5+
re.match(r"<.+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]++".}}
6+
re.match(r"<\S+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\s]++".}}
7+
re.match(r"<\D+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\d]++".}}
8+
re.match(r"<\W+?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\w]++".}}
9+
10+
re.match(r"<.{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]{2,5}+".}}
11+
re.match(r"<\S{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\s]{2,5}+".}}
12+
re.match(r"<\D{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\d]{2,5}+".}}
13+
re.match(r"<\W{2,5}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>\w]{2,5}+".}}
14+
15+
re.match(r"<.{2,}?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]{2,}+".}}
16+
re.match(r"\".*?\"", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\"]*+".}}
17+
re.match(r".*?\w", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\W*+".}}
18+
re.match(r".*?\W", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\w*+".}}
19+
re.match(r".*?\p{L}", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\P{L}*+".}}
20+
re.match(r".*?\P{L}", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\p{L}*+".}}
21+
re.match(r"\[.*?\]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\]]*+".}}
22+
re.match(r".+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc]++".}}
23+
re.match(r"(?-U:\s)*?\S", input)
24+
re.match(r"(?U:\s)*?\S", input, re.ASCII) # Noncompliant {{Replace this use of a reluctant quantifier with "[\s\S]*+".}}
25+
re.match(r"(?U:a|\s)*?\S", input)
26+
re.match(r"\S*?\s", input)
27+
re.match(r"\S*?(?-U:\s)", input)
28+
re.match(r"\S*?(?U:\s)", input, re.ASCII) # Noncompliant {{Replace this use of a reluctant quantifier with "[\S\s]*+".}}
29+
re.match(r"\S*?(?U)\s", input, re.ASCII) # Noncompliant {{Replace this use of a reluctant quantifier with "[\S\s]*+".}}
30+
31+
# coverage
32+
re.match(r"(?:(?m))*?a", input)
33+
re.match(r"(?:(?m:.))*?(?:(?m))", input)
34+
35+
# This replacement might not be equivalent in case of full match, but is equivalent in case of split
36+
re.match(r".+?[^abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[abc]++".}}
37+
38+
re.match(r".+?\x{1F4A9}", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\x{1F4A9}]++".}}
39+
re.match(r"<abc.*?>", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]*+".}}
40+
re.match(r"<.+?>|otherstuff", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]++".}}
41+
re.match(r"(<.+?>)*", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^>]++".}}
42+
43+
re.match(r"\S+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\s]++".}}
44+
re.match(r"\D+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\d]++".}}
45+
re.match(r"\w+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\W]++".}}
46+
47+
re.match(r"\S*?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\s]*+".}}
48+
re.match(r"\D*?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\d]*+".}}
49+
re.match(r"\w*?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc\W]*+".}}
50+
51+
re.match(r"\S+?[^abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[abc\S]++".}}
52+
re.match(r"\s+?[^abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[abc\s]++".}}
53+
54+
55+
def compliant(input):
56+
re.match(r"<[^>]++>", input)
57+
re.match(r"<[^>]+>", input)
58+
re.match(r"<[^>]+?>", input)
59+
re.match(r"<.{42}?>", input) # Adding a ? to a fixed quantifier is pointless, but also doesn't cause any backtracking issues
60+
re.match(r"<.+>", input)
61+
re.match(r"<.++>", input)
62+
re.match(r"<--.?-->", input)
63+
re.match(r"<--.+?-->", input)
64+
re.match(r"<--.*?-->", input)
65+
re.match(r"/\*.?\*/", input)
66+
re.match(r"<[^>]+>?", input)
67+
re.match(r"", input)
68+
re.match(r".*?(?:a|b|c)", input) # Alternatives are currently not covered even if they contain only single characters
69+
70+
# UNICODE flag is enabled by default
71+
re.match(r"(?U:\s)*?\S", input)
72+
re.match(r"\S*?(?U:\s)", input)
73+
re.match(r"\S*?(?U)\s", input)
74+
75+
def no_intersection(input):
76+
re.match(r"<\d+?>", input)
77+
re.match(r"<\s+?>", input)
78+
re.match(r"<\w+?>", input)
79+
80+
re.match(r"<\s{2,5}?>", input)
81+
re.match(r"<\d{2,5}?>", input)
82+
re.match(r"<\w{2,5}?>", input)
83+
84+
re.match(r"\d+?[abc]", input)
85+
re.match(r"\s+?[abc]", input)
86+
re.match(r"\W+?[abc]", input)
87+
88+
re.match(r"\W*?[abc]", input)
89+
re.match(r"\s*?[abc]", input)
90+
re.match(r"\d*?[abc]", input)

0 commit comments

Comments
 (0)