Skip to content

Commit 9f8824d

Browse files
SONARPY-885 Rule S5996 Regex boundaries should not be used in a way that can never be matched (#970)
* SONARPY-885 Rule S5996 Regex boundaries should not be used in a way that can never be matched * Add missing licence headers * Accept flags for re.compile * Update flag check test cases * Update ITs
1 parent d8773c7 commit 9f8824d

File tree

10 files changed

+213
-17
lines changed

10 files changed

+213
-17
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
'project:numpy-1.16.4/numpy/linalg/lapack_lite/clapack_scrub.py':[
3+
232,
4+
],
5+
}

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import org.sonar.python.checks.regex.AnchorPrecedenceCheck;
5353
import org.sonar.python.checks.regex.DuplicatesInCharacterClassCheck;
5454
import org.sonar.python.checks.regex.EmptyStringRepetitionCheck;
55+
import org.sonar.python.checks.regex.ImpossibleBoundariesCheck;
5556
import org.sonar.python.checks.regex.GraphemeClustersInClassesCheck;
5657
import org.sonar.python.checks.regex.RegexComplexityCheck;
5758
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
@@ -147,6 +148,7 @@ public static Iterable<Class> getChecks() {
147148
IdentityComparisonWithNewObjectCheck.class,
148149
IgnoredPureOperationsCheck.class,
149150
ImplicitStringConcatenationCheck.class,
151+
ImpossibleBoundariesCheck.class,
150152
IncompatibleOperandsCheck.class,
151153
InconsistentTypeHintCheck.class,
152154
IncorrectExceptionTypeCheck.class,

python-checks/src/main/java/org/sonar/python/checks/regex/AbstractRegexCheck.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public abstract class AbstractRegexCheck extends PythonSubscriptionCheck {
5353
static {
5454
REGEX_FUNCTIONS_TO_FLAG_PARAM.put("re.sub", 4);
5555
REGEX_FUNCTIONS_TO_FLAG_PARAM.put("re.subn", 4);
56-
REGEX_FUNCTIONS_TO_FLAG_PARAM.put("re.compile", null);
56+
REGEX_FUNCTIONS_TO_FLAG_PARAM.put("re.compile", 1);
5757
REGEX_FUNCTIONS_TO_FLAG_PARAM.put("re.search", 2);
5858
REGEX_FUNCTIONS_TO_FLAG_PARAM.put("re.match", 2);
5959
REGEX_FUNCTIONS_TO_FLAG_PARAM.put("re.fullmatch", 2);
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.sonar.check.Rule;
23+
import org.sonar.plugins.python.api.tree.CallExpression;
24+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
25+
import org.sonarsource.analyzer.commons.regex.finders.ImpossibleBoundaryFinder;
26+
27+
@Rule(key = "S5996")
28+
public class ImpossibleBoundariesCheck extends AbstractRegexCheck {
29+
30+
@Override
31+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
32+
new ImpossibleBoundaryFinder(this::addIssue).visit(regexParseResult);
33+
}
34+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<p>In regular expressions the boundaries <code>^</code> and <code>\A</code> can only match at the beginning of the input (or, in case of
2+
<code>^</code> in combination with the <code>MULTILINE</code> flag, the beginning of the line) and <code>$</code>, <code>\Z</code> and <code>\z</code>
3+
only at the end.</p>
4+
<p>These patterns can be misused, by accidentally switching <code>^</code> and <code>$</code> for example, to create a pattern that can never
5+
match.</p>
6+
<h2>Noncompliant Code Example</h2>
7+
<pre>
8+
# This can never match because $ and ^ have been switched around
9+
r"$[a-z]+^" # Noncompliant
10+
</pre>
11+
<h2>Compliant Solution</h2>
12+
<pre>
13+
r"^[a-z]+$"
14+
</pre>
15+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"title": "Regex boundaries should not be used in a way that can never be matched",
3+
"type": "BUG",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "10min"
8+
},
9+
"tags": [
10+
"regex"
11+
],
12+
"defaultSeverity": "Critical",
13+
"ruleSpecification": "RSPEC-5996",
14+
"sqKey": "S5996",
15+
"scope": "Main",
16+
"quickfix": "unknown"
17+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@
143143
"S5869",
144144
"S5886",
145145
"S5890",
146+
"S5996",
146147
"S6002",
147148
"S6019",
148149
"S6035"
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
26+
public class ImpossibleBoundariesCheckTest {
27+
28+
@Test
29+
public void test() {
30+
PythonCheckVerifier.verify("src/test/resources/checks/regex/impossibleBoundariesCheck.py", new ImpossibleBoundariesCheck());
31+
}
32+
33+
}
Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,34 @@
11
import re
22

3-
re.match(r'.*', "foo", re.I); # Noncompliant {{CASE_INSENSITIVE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
4-
re.match(r'.*', "foo", re.IGNORECASE); # Noncompliant {{CASE_INSENSITIVE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
3+
re.match(r'.*', "foo", re.I) # Noncompliant {{CASE_INSENSITIVE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
4+
re.match(r'.*', "foo", re.IGNORECASE) # Noncompliant {{CASE_INSENSITIVE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
55

6-
re.match(r'.*', "foo", re.M); # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
7-
re.match(r'.*', "foo", re.MULTILINE); # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
6+
re.match(r'.*', "foo", re.M) # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
7+
re.match(r'.*', "foo", re.MULTILINE) # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
88

9-
re.match(r'.*', "foo", re.S); # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS|DOTALL}}
10-
re.match(r'.*', "foo", re.DOTALL); # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS|DOTALL}}
9+
re.match(r'.*', "foo", re.S) # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS|DOTALL}}
10+
re.match(r'.*', "foo", re.DOTALL) # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS|DOTALL}}
1111

12-
re.match(r'.*', "foo", re.X); # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS|VERBOSE}}
13-
re.match(r'.*', "foo", re.VERBOSE); # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS|VERBOSE}}
12+
re.match(r'.*', "foo", re.X) # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS|VERBOSE}}
13+
re.match(r'.*', "foo", re.VERBOSE) # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS|VERBOSE}}
1414

15-
re.match(r'.*', "foo", re.U); # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
16-
re.match(r'.*', "foo", re.UNICODE); # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
15+
re.match(r'.*', "foo", re.U) # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
16+
re.match(r'.*', "foo", re.UNICODE) # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
1717

18-
re.match(r'.*', "foo", re.A); # Noncompliant {{ASCII}}
19-
re.match(r'.*', "foo", re.ASCII); # Noncompliant {{ASCII}}
18+
re.match(r'.*', "foo", re.A) # Noncompliant {{ASCII}}
19+
re.match(r'.*', "foo", re.ASCII) # Noncompliant {{ASCII}}
2020

21-
re.match(r'.*', "foo", re.UNKNOWN); # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
22-
re.match(r'.*', "foo", not_re.UNKNOWN); # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
23-
re.match(r'.*', "foo", re.I|UNKNOWN); # Noncompliant {{CASE_INSENSITIVE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
21+
re.match(r'.*', "foo", re.UNKNOWN) # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
22+
re.match(r'.*', "foo", not_re.UNKNOWN) # Noncompliant {{UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
23+
re.match(r'.*', "foo", re.I | UNKNOWN) # Noncompliant {{CASE_INSENSITIVE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
2424

25-
re.match(r'.*', "foo", re.I|re.M); # Noncompliant {{CASE_INSENSITIVE|MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
25+
re.match(r'.*', "foo", re.I | re.M) # Noncompliant {{CASE_INSENSITIVE|MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
26+
27+
re.sub(r'.*', '', '', '', re.M) # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
28+
re.subn(r'.*', '', '', '', re.M) # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
29+
re.compile(r'.*', re.M) # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
30+
re.search(r'.*', '', re.M) # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
31+
re.fullmatch(r'.*', '', re.M) # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
32+
re.split(r'.*', '', '', re.M) # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
33+
re.findall(r'.*', '', re.M) # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
34+
re.finditer(r'.*', '', re.M) # Noncompliant {{MULTILINE|UNICODE_CASE|UNICODE_CHARACTER_CLASS}}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import re
2+
3+
4+
def non_compliant(input):
5+
re.match(r'$[a-z]^', input) # Noncompliant 2
6+
# ^0
7+
re.match(r'$[a-z]', input) # Noncompliant {{Remove or replace this boundary that will never match because it appears before mandatory input.}}
8+
re.match(r'$(abc)', input) # Noncompliant
9+
re.match(r'[a-z]^', input) # Noncompliant
10+
re.match(r'\Z[a-z]', input) # Noncompliant
11+
re.match(r'\z[a-z]', input) # Noncompliant
12+
re.match(r'[a-z]\A', input) # Noncompliant
13+
re.match(r'($)a', input) # Noncompliant
14+
re.match(r'a$|$a', input) # Noncompliant
15+
re.match(r'^a|a^', input) # Noncompliant
16+
re.match(r'a(b|^)', input) # Noncompliant
17+
re.match(r'(?=abc^)', input) # Noncompliant
18+
re.match(r'(?!abc^)', input) # Noncompliant
19+
re.match(r'abc(?=^abc)', input) # Noncompliant
20+
re.match(r'abc(?<=$abc)', input) # Noncompliant
21+
re.match(r'abc(?<=abc$)def', input) # Noncompliant
22+
re.match(r'(?:abc(X|^))*Y?', input) # Noncompliant
23+
24+
re.match(r'a\Z\nb', input, re.MULTILINE) # Noncompliant
25+
re.match(r'a\zb', input, re.MULTILINE) # Noncompliant
26+
re.match(r'a\n\Ab', input, re.MULTILINE) # Noncompliant
27+
28+
# False positives because the end delimiter does not capture the newlines (SONARPHP-1238)
29+
re.match(r'a$(\n)', input) # Noncompliant
30+
re.match(r'a$./s', input) # Noncompliant
31+
re.match(r'a\Z(\n)', input) # Noncompliant
32+
33+
34+
def probably_non_compliant(input):
35+
re.match(r'$.*', input) # Noncompliant {{Remove or replace this boundary that can only match if the previous part matched the empty string because it appears before mandatory input.}}
36+
re.match(r'$.?', input) # Noncompliant
37+
38+
re.match(r'$a*', input) # Noncompliant
39+
re.match(r'$a?', input) # Noncompliant
40+
re.match(r'$[abc]*', input) # Noncompliant
41+
re.match(r'$[abc]?', input) # Noncompliant
42+
43+
re.match(r'.*^', input) # Noncompliant {{Remove or replace this boundary that can only match if the previous part matched the empty string because it appears after mandatory input.}}
44+
re.match(r'.?^', input) # Noncompliant
45+
46+
re.match(r'a*^', input) # Noncompliant
47+
re.match(r'a?^', input) # Noncompliant
48+
re.match(r'[abc]*^', input) # Noncompliant
49+
re.match(r'[abc]?^', input) # Noncompliant
50+
51+
re.match(r'$.*^', input) # Noncompliant 2
52+
re.match(r'$.?^', input) # Noncompliant 2
53+
re.match(r'$a*^', input) # Noncompliant 2
54+
re.match(r'$a?^', input) # Noncompliant 2
55+
re.match(r'$[abc]*^', input) # Noncompliant 2
56+
re.match(r'$[abc]?^', input) # Noncompliant 2
57+
58+
59+
def compliant(input):
60+
re.match(r'^[a-z]$', input)
61+
re.match(r'^$', input)
62+
re.match(r'^(?i)$', input)
63+
re.match(r'^$(?i)', input)
64+
re.match(r'^abc$|^def$', input)
65+
re.match(r'(?i)^abc$', input)
66+
re.match(r'()^abc$', input)
67+
re.match(r'^abc$()', input)
68+
re.match(r'^abc$\b', input)
69+
re.match(r'(?=abc)^abc$', input)
70+
re.match(r'(?=^abc$)abc', input)
71+
re.match(r'(?!^abc$)abc', input)
72+
re.match(r'abc(?<=^abc$)', input)
73+
re.match(r'^\d$(?<!3)', input)
74+
re.match(r'(?=$)', input)
75+
re.match(r"(?i)(true)(?=(?:[^']|'[^']*')*$)", input)
76+
re.match(r'(?:abc(X|$))*Y?', input)
77+
re.match(r'(?:x*(Xab|^)abc)*Y?', input)
78+
re.match(r'a$\nb', input, re.MULTILINE)
79+
re.match(r'a\n^b', input, re.MULTILINE)
80+
re.compile(r"(\d+)(\s+.*)$ # score, vulgar components", re.VERBOSE)

0 commit comments

Comments
 (0)