Skip to content

Commit 73fc960

Browse files
SONARPY-894 Rule S5842 Regex repetition pattern's body should not match the empty String (#960)
* SONARPY-894 Rule S5842 Regex repetition pattern's body should not match the empty String * Increase code coverage * Address review comments
1 parent 201ae0c commit 73fc960

File tree

18 files changed

+576
-4
lines changed

18 files changed

+576
-4
lines changed

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.sonar.python.checks.hotspots.StrongCryptographicKeysCheck;
5050
import org.sonar.python.checks.hotspots.UnsafeHttpMethodsCheck;
5151
import org.sonar.python.checks.hotspots.UnverifiedHostnameCheck;
52+
import org.sonar.python.checks.regex.EmptyStringRepetitionCheck;
5253
import org.sonar.python.checks.regex.StringReplaceCheck;
5354

5455
public final class CheckList {
@@ -105,6 +106,7 @@ public static Iterable<Class> getChecks() {
105106
EmailSendingCheck.class,
106107
EmptyFunctionCheck.class,
107108
EmptyNestedBlockCheck.class,
109+
EmptyStringRepetitionCheck.class,
108110
ExceptionCauseTypeCheck.class,
109111
ExceptionNotThrownCheck.class,
110112
ExceptionSuperClassDeclarationCheck.class,

python-checks/src/main/java/org/sonar/python/checks/regex/AbstractRegexCheck.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121

2222
import java.util.Arrays;
2323
import java.util.HashSet;
24+
import java.util.List;
2425
import java.util.Optional;
2526
import java.util.Set;
27+
import javax.annotation.Nullable;
2628
import org.sonar.plugins.python.api.PythonSubscriptionCheck;
2729
import org.sonar.plugins.python.api.SubscriptionContext;
2830
import org.sonar.plugins.python.api.symbols.Symbol;
@@ -33,14 +35,19 @@
3335
import org.sonar.plugins.python.api.tree.Tree;
3436
import org.sonar.python.regex.RegexContext;
3537
import org.sonar.python.tree.TreeUtils;
38+
import org.sonarsource.analyzer.commons.regex.RegexIssueLocation;
3639
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
40+
import org.sonarsource.analyzer.commons.regex.ast.RegexSyntaxElement;
3741

3842
public abstract class AbstractRegexCheck extends PythonSubscriptionCheck {
3943

4044
private static final Set<String> REGEX_FUNCTIONS = new HashSet<>(Arrays.asList("re.sub", "re.subn", "re.compile", "re.search", "re.match",
4145
"re.fullmatch", "re.split", "re.findall", "re.finditer"));
4246
protected RegexContext regexContext;
4347

48+
// We want to report only one issue per element for one rule.
49+
protected final Set<RegexSyntaxElement> reportedRegexTrees = new HashSet<>();
50+
4451
protected Set<String> lookedUpFunctionNames() {
4552
return REGEX_FUNCTIONS;
4653
}
@@ -86,4 +93,11 @@ private static Optional<StringLiteral> patternArgStringLiteral(CallExpression re
8693
return Optional.empty();
8794
}
8895

96+
public void addIssue(RegexSyntaxElement regexTree, String message, @Nullable Integer cost, List<RegexIssueLocation> secondaries) {
97+
if (reportedRegexTrees.add(regexTree)) {
98+
regexContext.addIssue(regexTree, message);
99+
// TODO: Add secondary location to the issue SONARPY-886
100+
// TODO: Add cost to the issue SONARPY-893
101+
}
102+
}
89103
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.sonar.check.Rule;
23+
import org.sonar.plugins.python.api.tree.CallExpression;
24+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
25+
import org.sonarsource.analyzer.commons.regex.finders.EmptyStringRepetitionFinder;
26+
27+
@Rule(key = "S5842")
28+
public class EmptyStringRepetitionCheck extends AbstractRegexCheck {
29+
30+
@Override
31+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
32+
new EmptyStringRepetitionFinder(this::addIssue).visit(regexParseResult);
33+
}
34+
}

python-checks/src/main/java/org/sonar/python/checks/regex/StringReplaceCheck.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.Set;
2525
import org.sonar.check.Rule;
2626
import org.sonar.plugins.python.api.tree.CallExpression;
27+
import org.sonar.python.regex.PythonRegexIssueLocation;
2728
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
2829
import org.sonarsource.analyzer.commons.regex.ast.RegexTree;
2930
import org.sonarsource.analyzer.commons.regex.ast.SequenceTree;
@@ -32,6 +33,7 @@
3233
public class StringReplaceCheck extends AbstractRegexCheck {
3334

3435
private static final String MESSAGE = "Replace this \"re.sub()\" call by a \"str.replace()\" function call.";
36+
private static final String SECONDARY_MESSAGE = "Expression without regular expression features.";
3537

3638
@Override
3739
protected Set<String> lookedUpFunctionNames() {
@@ -42,7 +44,8 @@ protected Set<String> lookedUpFunctionNames() {
4244
public void checkRegex(RegexParseResult regexParseResult, CallExpression callExpression) {
4345
RegexTree regex = regexParseResult.getResult();
4446
if (!regexParseResult.hasSyntaxErrors() && isPlainString(regex)) {
45-
regexContext.addIssue(callExpression.callee(), MESSAGE);
47+
regexContext.addIssue(callExpression.callee(), MESSAGE)
48+
.secondary(PythonRegexIssueLocation.preciseLocation(regex, SECONDARY_MESSAGE));
4649
}
4750
}
4851

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<p>A regex should never include a repetitive pattern whose body would match the empty string. This is usually a sign that a part of the regex is
2+
redundant or does not do what the author intended.</p>
3+
<h2>Noncompliant Code Example</h2>
4+
<pre>
5+
r"(?:)*" # same as the empty regex, the '*' accomplishes nothing
6+
r"(?:|x)*" # same as the empty regex, the alternative has no effect
7+
r"(?:x|)*" # same as 'x*', the empty alternative has no effect
8+
r"(?:x*|y*)*" # same as 'x*', the first alternative would always match, y* is never tried
9+
r"(?:x?)*" # same as 'x*'
10+
r"(?:x?)+" # same as 'x*'
11+
</pre>
12+
<h2>Compliant Solution</h2>
13+
<pre>
14+
r"x*"
15+
</pre>
16+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"title": "Repeated patterns in regular expressions should not match the empty string",
3+
"type": "BUG",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "5min"
8+
},
9+
"tags": [
10+
"regex"
11+
],
12+
"defaultSeverity": "Minor",
13+
"ruleSpecification": "RSPEC-5842",
14+
"sqKey": "S5842",
15+
"scope": "Main",
16+
"quickfix": "unknown"
17+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@
133133
"S5806",
134134
"S5807",
135135
"S5828",
136+
"S5842",
136137
"S5864",
137138
"S5886",
138139
"S5890"

python-checks/src/test/java/org/sonar/python/checks/regex/AbstractRegexCheckTest.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.io.File;
2323
import java.util.ArrayList;
2424
import java.util.Arrays;
25+
import java.util.Collections;
2526
import java.util.List;
2627
import org.junit.Test;
2728
import org.sonar.plugins.python.api.PythonVisitorContext;
@@ -44,6 +45,22 @@ public void test_regex_is_visited() {
4445
assertThat(check.receivedRegexParseResults).hasSize(10);
4546
}
4647

48+
@Test
49+
public void test_regex_element_is_only_reported_once() {
50+
Check check = new Check() {
51+
@Override
52+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
53+
super.checkRegex(regexParseResult, regexFunctionCall);
54+
addIssue(regexParseResult.getResult(), "MESSAGE", null, Collections.emptyList());
55+
}
56+
};
57+
58+
PythonVisitorContext fileContext = TestPythonVisitorRunner.createContext(FILE);
59+
SubscriptionVisitor.analyze(Collections.singletonList(check), fileContext);
60+
assertThat(check.reportedRegexTrees).hasSize(10);
61+
assertThat(fileContext.getIssues()).hasSize(10);
62+
}
63+
4764
@Test
4865
public void test_regex_parse_result_is_retrieved_from_cache_in_context() {
4966
PythonVisitorContext fileContext = TestPythonVisitorRunner.createContext(FILE);
@@ -59,10 +76,11 @@ public void test_regex_parse_result_is_retrieved_from_cache_in_context() {
5976
private static class Check extends AbstractRegexCheck {
6077
private final List<RegexParseResult> receivedRegexParseResults = new ArrayList<>();
6178

79+
@Override
6280
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
6381
receivedRegexParseResults.add(regexParseResult);
64-
regexContext.addIssue(regexFunctionCall, "MESSAGE");
82+
addIssue(regexParseResult.getResult(), "MESSAGE", null, Collections.emptyList());
6583
}
6684
}
6785

68-
}
86+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
public class EmptyStringRepetitionCheckTest {
26+
27+
@Test
28+
public void test() {
29+
PythonCheckVerifier.verify("src/test/resources/checks/regex/emptyStringRepetitionCheck.py", new EmptyStringRepetitionCheck());
30+
}
31+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import re
2+
3+
4+
def non_compliant(input):
5+
re.match(r"(?:)*", input) # Noncompliant {{Rework this part of the regex to not match the empty string.}}
6+
# ^^^^
7+
re.match(r"(?:)?", input) # Noncompliant
8+
re.match(r"(?:)+", input) # Noncompliant
9+
re.match(r"()*", input) # Noncompliant
10+
re.match(r"()?", input) # Noncompliant
11+
re.match(r"()+", input) # Noncompliant
12+
re.match(r"xyz|(?:)*", input) # Noncompliant
13+
re.match(r"(?:|x)*", input) # Noncompliant
14+
re.match(r"(?:x|)*", input) # Noncompliant
15+
re.match(r"(?:x|y*)*", input) # Noncompliant
16+
re.match(r"(?:x*|y*)*", input) # Noncompliant
17+
re.match(r"(?:x?|y*)*", input) # Noncompliant
18+
re.match(r"(?:x*)*", input) # Noncompliant
19+
re.match(r"(?:x?)*", input) # Noncompliant
20+
re.match(r"(?:x*)?", input) # Noncompliant
21+
re.match(r"(?:x?)?", input) # Noncompliant
22+
re.match(r"(?:x*)+", input) # Noncompliant
23+
re.match(r"(?:x?)+", input) # Noncompliant
24+
re.match(r"(x*)*", input) # Noncompliant
25+
re.match(r"((x*))*", input) # Noncompliant
26+
re.match(r"(?:x*y*)*", input) # Noncompliant
27+
re.match(r"(?:())*", input) # Noncompliant
28+
re.match(r"(?:(?:))*", input) # Noncompliant
29+
re.match(r"((?i))*", input) # Noncompliant
30+
re.match(r"(())*", input) # Noncompliant
31+
re.match(r"(()x*)*", input) # Noncompliant
32+
re.match(r"(()|x)*", input) # Noncompliant
33+
re.match(r"($)*", input) # Noncompliant
34+
re.match(r"(\b)*", input) # Noncompliant
35+
re.match(r"((?!x))*", input) # Noncompliant
36+
37+
38+
def compliant(input):
39+
re.match(r"x*|", input)
40+
re.match(r"x*|", input)
41+
re.match(r"x*", input)
42+
re.match(r"x?", input)
43+
re.match(r"(?:x|y)*", input)
44+
re.match(r"(?:x+)+", input)
45+
re.match(r"(?:x+)*", input)
46+
re.match(r"(?:x+)?", input)
47+
re.match(r"((x+))*", input)

0 commit comments

Comments
 (0)