Skip to content

Commit 8cb734e

Browse files
SONARPY-907 S5361 and initial version of the RegexSource and CharacterParser classes (#958)
* Initial implementations for AbstractRegexCheck * Rule implementation * Add license headers * Fix code smells * Add tests for the characterParser * Add tests for the abstractRegexCheck * Add ITS expected results * Add test
1 parent 0331592 commit 8cb734e

File tree

19 files changed

+630
-2
lines changed

19 files changed

+630
-2
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
'project:tensorflow/python/debug/cli/command_parser.py':[
3+
264,
4+
],
5+
'project:tensorflow/python/keras/layers/einsum_dense.py':[
6+
212,
7+
],
8+
}

pom.xml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
<mockito.version>3.9.0</mockito.version>
9292
<sonar.version>8.9.0.43852</sonar.version>
9393
<sonar.orchestrator.version>3.35.1.2719</sonar.orchestrator.version>
94-
<sonar-analyzer-commons.version>1.16.0.719</sonar-analyzer-commons.version>
94+
<sonar-analyzer-commons.version>1.21.0.804</sonar-analyzer-commons.version>
9595
<sonarlint-core.version>6.0.0.32513</sonarlint-core.version>
9696
<sslr.version>1.23</sslr.version>
9797
<protobuf.version>3.17.3</protobuf.version>
@@ -131,6 +131,11 @@
131131
<artifactId>sonar-analyzer-test-commons</artifactId>
132132
<version>${sonar-analyzer-commons.version}</version>
133133
</dependency>
134+
<dependency>
135+
<groupId>org.sonarsource.analyzer-commons</groupId>
136+
<artifactId>sonar-regex-parsing</artifactId>
137+
<version>${sonar-analyzer-commons.version}</version>
138+
</dependency>
134139
<dependency>
135140
<groupId>org.codehaus.staxmate</groupId>
136141
<artifactId>staxmate</artifactId>

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.sonar.python.checks.hotspots.StrongCryptographicKeysCheck;
5050
import org.sonar.python.checks.hotspots.UnsafeHttpMethodsCheck;
5151
import org.sonar.python.checks.hotspots.UnverifiedHostnameCheck;
52+
import org.sonar.python.checks.regex.StringReplaceCheck;
5253

5354
public final class CheckList {
5455

@@ -199,6 +200,7 @@ public static Iterable<Class> getChecks() {
199200
StringFormatCorrectnessCheck.class,
200201
StringFormatMisuseCheck.class,
201202
StringLiteralDuplicationCheck.class,
203+
StringReplaceCheck.class,
202204
StrongCryptographicKeysCheck.class,
203205
TempFileCreationCheck.class,
204206
TooManyLinesInFileCheck.class,
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import java.util.Collections;
23+
import java.util.HashSet;
24+
import java.util.Optional;
25+
import java.util.Set;
26+
import org.sonar.plugins.python.api.PythonSubscriptionCheck;
27+
import org.sonar.plugins.python.api.SubscriptionContext;
28+
import org.sonar.plugins.python.api.symbols.Symbol;
29+
import org.sonar.plugins.python.api.tree.CallExpression;
30+
import org.sonar.plugins.python.api.tree.Expression;
31+
import org.sonar.plugins.python.api.tree.RegularArgument;
32+
import org.sonar.plugins.python.api.tree.StringLiteral;
33+
import org.sonar.plugins.python.api.tree.Tree;
34+
import org.sonar.python.regex.RegexContext;
35+
import org.sonar.python.tree.TreeUtils;
36+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
37+
38+
public abstract class AbstractRegexCheck extends PythonSubscriptionCheck {
39+
40+
private static final Set<String> REGEX_FUNCTIONS = new HashSet<>(Collections.singletonList("re.sub"));
41+
protected RegexContext regexContext;
42+
43+
protected Set<String> lookedUpFunctionNames() {
44+
return REGEX_FUNCTIONS;
45+
}
46+
47+
@Override
48+
public void initialize(Context context) {
49+
context.registerSyntaxNodeConsumer(Tree.Kind.CALL_EXPR, this::checkCall);
50+
}
51+
52+
public abstract void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall);
53+
54+
private void checkCall(SubscriptionContext ctx) {
55+
regexContext = (RegexContext) ctx;
56+
CallExpression callExpression = (CallExpression) ctx.syntaxNode();
57+
Symbol calleeSymbol = callExpression.calleeSymbol();
58+
if (calleeSymbol == null || calleeSymbol.fullyQualifiedName() == null) {
59+
return;
60+
}
61+
if (lookedUpFunctionNames().contains(calleeSymbol.fullyQualifiedName())) {
62+
patternArgStringLiteral(callExpression)
63+
.flatMap(this::regexForStringLiteral)
64+
.ifPresent(parseResult -> checkRegex(parseResult, callExpression));
65+
}
66+
}
67+
68+
private Optional<RegexParseResult> regexForStringLiteral(StringLiteral literal) {
69+
// TODO: for now we only handle strings with an "r" prefix. This will be extended.
70+
if (literal.stringElements().size() == 1 && "r".equalsIgnoreCase(literal.stringElements().get(0).prefix())) {
71+
return Optional.of(regexContext.regexForStringElement(literal.stringElements().get(0)));
72+
}
73+
return Optional.empty();
74+
}
75+
76+
private static Optional<StringLiteral> patternArgStringLiteral(CallExpression regexFunctionCall) {
77+
RegularArgument patternArgument = TreeUtils.nthArgumentOrKeyword(0, "pattern", regexFunctionCall.arguments());
78+
if (patternArgument == null) {
79+
return Optional.empty();
80+
}
81+
Expression patternArgumentExpression = patternArgument.expression();
82+
if (patternArgumentExpression.is(Tree.Kind.STRING_LITERAL)) {
83+
return Optional.of((StringLiteral) patternArgumentExpression);
84+
}
85+
return Optional.empty();
86+
}
87+
88+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import java.util.Collections;
23+
import java.util.HashSet;
24+
import java.util.Set;
25+
import org.sonar.check.Rule;
26+
import org.sonar.plugins.python.api.tree.CallExpression;
27+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
28+
import org.sonarsource.analyzer.commons.regex.ast.RegexTree;
29+
import org.sonarsource.analyzer.commons.regex.ast.SequenceTree;
30+
31+
@Rule(key = "S5361")
32+
public class StringReplaceCheck extends AbstractRegexCheck {
33+
34+
private static final String MESSAGE = "Replace this \"re.sub()\" call by a \"str.replace()\" function call.";
35+
36+
@Override
37+
protected Set<String> lookedUpFunctionNames() {
38+
return new HashSet<>(Collections.singletonList("re.sub"));
39+
}
40+
41+
@Override
42+
public void checkRegex(RegexParseResult regexParseResult, CallExpression callExpression) {
43+
RegexTree regex = regexParseResult.getResult();
44+
if (!regexParseResult.hasSyntaxErrors() && isPlainString(regex)) {
45+
regexContext.addIssue(callExpression.callee(), MESSAGE);
46+
}
47+
}
48+
49+
private static boolean isPlainString(RegexTree regex) {
50+
return regex.is(RegexTree.Kind.CHARACTER)
51+
|| (regex.is(RegexTree.Kind.SEQUENCE)
52+
&& !((SequenceTree) regex).getItems().isEmpty()
53+
&& ((SequenceTree) regex).getItems().stream().allMatch(item -> item.is(RegexTree.Kind.CHARACTER)));
54+
}
55+
56+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
@ParametersAreNonnullByDefault
21+
package org.sonar.python.checks.regex;
22+
23+
import javax.annotation.ParametersAreNonnullByDefault;
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<p>An <code>re.sub</code> call always performs an evaluation of the first argument as a regular expression, even if no regular expression features
2+
were used. This has a significant performance cost and therefore should be used with care.</p>
3+
<p>When <code>re.sub</code> is used, the first argument should be a real regular expression. If it’s not the case, <code>str.replace</code> does
4+
exactly the same thing as <code>re.sub</code> without the performance drawback of the regex.</p>
5+
<p>This rule raises an issue for each <code>re.sub</code> used with a simple string as first argument which doesn’t contains special regex character
6+
or pattern.</p>
7+
<h2>Noncompliant Code Example</h2>
8+
<pre>
9+
input = "Bob is a Bird... Bob is a Plane... Bob is Superman!"
10+
changed = re.sub("Bob is", "It's", input) # Noncompliant
11+
changed = re.sub("\.\.\.", ";", changed) # Noncompliant
12+
</pre>
13+
<h2>Compliant Solution</h2>
14+
<pre>
15+
input = "Bob is a Bird... Bob is a Plane... Bob is Superman!"
16+
changed = str.replace("Bob is", "It's", input)
17+
changed = str.replace("...", ";", changed)
18+
</pre>
19+
<p>Or, with a regex:</p>
20+
<pre>
21+
input = "Bob is a Bird... Bob is a Plane... Bob is Superman!"
22+
changed = re.sub(r"\w*\sis", "It's", input)
23+
changed = re.sub(r"\.{3}", ";", changed)
24+
</pre>
25+
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"type": "CODE_SMELL",
3+
"status": "ready",
4+
"remediation": {
5+
"func": "Constant\/Issue",
6+
"constantCost": "2min"
7+
},
8+
"tags": [
9+
"regex",
10+
"performance"
11+
],
12+
"defaultSeverity": "Critical",
13+
"ruleSpecification": "RSPEC-5361",
14+
"sqKey": "S5361",
15+
"scope": "All",
16+
"quickfix": "unknown",
17+
"title": "`str.replace` should be preferred to `re.sub`"
18+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
"S5122",
9494
"S5247",
9595
"S5332",
96+
"S5361",
9697
"S5443",
9798
"S5445",
9899
"S5527",
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import java.io.File;
23+
import java.util.ArrayList;
24+
import java.util.Arrays;
25+
import java.util.Collections;
26+
import java.util.List;
27+
import org.junit.Test;
28+
import org.sonar.plugins.python.api.PythonCheck;
29+
import org.sonar.plugins.python.api.PythonSubscriptionCheck;
30+
import org.sonar.plugins.python.api.PythonVisitorContext;
31+
import org.sonar.plugins.python.api.tree.CallExpression;
32+
import org.sonar.python.SubscriptionVisitor;
33+
import org.sonar.python.TestPythonVisitorRunner;
34+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
35+
36+
import static org.assertj.core.api.Assertions.assertThat;
37+
38+
public class AbstractRegexCheckTest {
39+
40+
private static final File FILE = new File("src/test/resources/checks/regex/abstractRegexCheck.py");
41+
42+
private static PythonVisitorContext fileContext(File file) {
43+
return TestPythonVisitorRunner.createContext(file);
44+
}
45+
46+
private static List<PythonCheck.PreciseIssue> scanFileForIssues(PythonVisitorContext fileContext, List<PythonSubscriptionCheck> checks) {
47+
checks.forEach(c -> c.scanFile(fileContext));
48+
SubscriptionVisitor.analyze(checks, fileContext);
49+
return fileContext.getIssues();
50+
}
51+
52+
@Test
53+
public void test_regex_is_visited() {
54+
Check check = new Check();
55+
List<PythonCheck.PreciseIssue> issues = scanFileForIssues(fileContext(FILE), Collections.singletonList(check));
56+
assertThat(check.receivedRegexParseResults).hasSize(1);
57+
assertThat(issues).hasSize(1);
58+
}
59+
60+
@Test
61+
public void test_regex_parse_result_is_retrieved_from_cache_in_context() {
62+
PythonVisitorContext fileContext = fileContext(FILE);
63+
64+
Check checkOne = new Check();
65+
Check checkTwo = new Check();
66+
scanFileForIssues(fileContext, Arrays.asList(checkOne, checkTwo));
67+
68+
assertThat(checkOne.receivedRegexParseResults.get(0)).isSameAs(checkTwo.receivedRegexParseResults.get(0));
69+
}
70+
71+
private static class Check extends AbstractRegexCheck {
72+
private final List<RegexParseResult> receivedRegexParseResults = new ArrayList<>();
73+
74+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
75+
receivedRegexParseResults.add(regexParseResult);
76+
regexContext.addIssue(regexFunctionCall, "MESSAGE");
77+
}
78+
}
79+
80+
}

0 commit comments

Comments
 (0)