Skip to content

Commit 2f05ae1

Browse files
SONARPY-921 Rule S5856 Regular expressions should be syntactically valid (#974)
* SONARPY-921 Rule S5856 Regular expressions should be syntactically valid * Increase code coverage * Add false positive test cases * Update analyzer commons and update test cases and ITs results
1 parent 2b0d850 commit 2f05ae1

File tree

11 files changed

+201
-6
lines changed

11 files changed

+201
-6
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
'project:biopython/Bio/motifs/pfm.py':[
3+
338,
4+
],
5+
'project:buildbot-slave-0.8.6p1/buildslave/runprocess.py':[
6+
300,
7+
],
8+
'project:django-2.2.3/django/utils/translation/trans_real.py':[
9+
36,
10+
],
11+
'project:mypy-0.782/test-data/stdlib-samples/3.2/glob.py':[
12+
76,
13+
77,
14+
],
15+
'project:numpy-1.16.4/numpy/distutils/mingw32ccompiler.py':[
16+
53,
17+
],
18+
'project:tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py':[
19+
75,
20+
],
21+
'project:tensorflow/python/keras/callbacks.py':[
22+
1373,
23+
],
24+
'project:tensorflow/tools/docs/py_guide_parser.py':[
25+
65,
26+
],
27+
'project:tornado-2.3/demos/appengine/markdown.py':[
28+
826,
29+
],
30+
'project:tornado-2.3/demos/blog/markdown.py':[
31+
826,
32+
],
33+
'project:twisted-12.1.0/twisted/words/protocols/jabber/sasl.py':[
34+
84,
35+
],
36+
}

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
<mockito.version>3.9.0</mockito.version>
9292
<sonar.version>8.9.0.43852</sonar.version>
9393
<sonar.orchestrator.version>3.35.1.2719</sonar.orchestrator.version>
94-
<sonar-analyzer-commons.version>1.21.0.807</sonar-analyzer-commons.version>
94+
<sonar-analyzer-commons.version>1.21.0.809</sonar-analyzer-commons.version>
9595
<sonarlint-core.version>6.0.0.32513</sonarlint-core.version>
9696
<sslr.version>1.23</sslr.version>
9797
<protobuf.version>3.17.3</protobuf.version>

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import org.sonar.python.checks.regex.EmptyStringRepetitionCheck;
5555
import org.sonar.python.checks.regex.ImpossibleBoundariesCheck;
5656
import org.sonar.python.checks.regex.GraphemeClustersInClassesCheck;
57+
import org.sonar.python.checks.regex.InvalidRegexCheck;
5758
import org.sonar.python.checks.regex.RegexComplexityCheck;
5859
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
5960
import org.sonar.python.checks.regex.RedundantRegexAlternativesCheck;
@@ -158,6 +159,7 @@ public static Iterable<Class> getChecks() {
158159
InstanceAndClassMethodsAtLeastOnePositionalCheck.class,
159160
InstanceMethodSelfAsFirstCheck.class,
160161
InvalidOpenModeCheck.class,
162+
InvalidRegexCheck.class,
161163
InvariantReturnCheck.class,
162164
ItemOperationsTypeCheck.class,
163165
IterationOnNonIterableCheck.class,
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import java.util.List;
23+
import java.util.stream.Collectors;
24+
import org.sonar.check.Rule;
25+
import org.sonar.plugins.python.api.tree.CallExpression;
26+
import org.sonarsource.analyzer.commons.regex.RegexIssueLocation;
27+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
28+
import org.sonarsource.analyzer.commons.regex.SyntaxError;
29+
import org.sonarsource.analyzer.commons.regex.ast.RegexSyntaxElement;
30+
31+
@Rule(key = "S5856")
32+
public class InvalidRegexCheck extends AbstractRegexCheck {
33+
34+
private static final String MESSAGE_FORMAT = "Fix the syntax error%s inside this regex.";
35+
36+
@Override
37+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
38+
List<SyntaxError> syntaxErrors = regexParseResult.getSyntaxErrors();
39+
if (!syntaxErrors.isEmpty()) {
40+
reportSyntaxErrors(syntaxErrors);
41+
}
42+
}
43+
44+
private void reportSyntaxErrors(List<SyntaxError> syntaxErrors) {
45+
// report on the first issue
46+
RegexSyntaxElement tree = syntaxErrors.get(0).getOffendingSyntaxElement();
47+
List<RegexIssueLocation> secondaries = syntaxErrors.stream()
48+
.map(error -> new RegexIssueLocation(error.getOffendingSyntaxElement(), error.getMessage()))
49+
.collect(Collectors.toList());
50+
51+
String msg = String.format(MESSAGE_FORMAT, secondaries.size() > 1 ? "s" : "");
52+
addIssue(tree, msg, null, secondaries);
53+
}
54+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<p>Regular expressions have their own syntax that is understood by regular expression engines. Those engines will throw an exception at runtime if
2+
they are given a regular expression that does not conform to that syntax.</p>
3+
<p>To avoid syntax errors, special characters should be escaped with backslashes when they are intended to be matched literally and references to
4+
capturing groups should use the correctly spelled name or number of the group.</p>
5+
<p>To match a literal string, rather than a regular expression, either all special characters should be escaped or methods that don’t use regular
6+
expressions should be used.</p>
7+
<h2>Noncompliant Code Example</h2>
8+
<pre>
9+
re.compile(r"([")
10+
re.sub(r"([", input, "{")
11+
re.compile(r"(\w+-(\d+)")
12+
</pre>
13+
<h2>Compliant Solution</h2>
14+
<pre>
15+
re.compile(r"\(\[")
16+
input.replace("([", "{")
17+
re.compile(r"(\w+)-(\d+)")
18+
</pre>
19+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"title": "Regular expressions should be syntactically valid",
3+
"type": "BUG",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "15min"
8+
},
9+
"tags": [
10+
"regex"
11+
],
12+
"defaultSeverity": "Critical",
13+
"ruleSpecification": "RSPEC-5856",
14+
"sqKey": "S5856",
15+
"scope": "Main",
16+
"quickfix": "unknown"
17+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
public class InvalidRegexCheckTest {
26+
27+
@Test
28+
public void test() {
29+
PythonCheckVerifier.verify("src/test/resources/checks/regex/invalidRegexCheck.py", new InvalidRegexCheck());
30+
}
31+
}

python-checks/src/test/resources/checks/regex/duplicatesInCharacterClassCheck.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ def compliant(input):
9090
re.match(r"[[a-z&&b-e]c]", input) # FN because we don't support intersections
9191
re.match(r"(?i)[A-_d-{]", input) # FN because we ignore case insensitivity unless both ends of the ranges are letters
9292
re.match(r"(?i)[A-z_]", input) # FN because A-z gets misinterpreted as A-Za-z due to the way we handle case insensitivity
93-
re.match(r"[\p{Armenian}x]", input) # FN because we don't support \p at the moment
9493
re.match(r"[\abc]", input)
9594
re.match(r'[\s\'"\:\{\}\[\],&\*\#\?]', input)
9695
re.match(r"[0-9\\d]", input) # Compliant
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import re
2+
3+
4+
def non_compliant(input):
5+
re.match(r'(', input) # Noncompliant {{Fix the syntax error inside this regex.}}
6+
# ^
7+
# ^@-1< {{Expected ')', but found the end of the regex}}
8+
re.match(r'x{1,2,3}|(', input) # Noncompliant {{Fix the syntax errors inside this regex.}}
9+
# ^
10+
# ^@-1< {{Expected '}', but found ','}}
11+
# ^@-2< {{Expected ')', but found the end of the regex}}
12+
re.match(r'$[a-z^', input) # Noncompliant {{Fix the syntax error inside this regex.}}
13+
# ^
14+
# ^@-1< {{Expected ']', but found the end of the regex}}
15+
re.match(r'(\w+-(\d+)', input) # Noncompliant {{Fix the syntax error inside this regex.}}
16+
# ^
17+
# ^@-1< {{Expected ')', but found the end of the regex}}
18+
19+
20+
def compliant(input):
21+
re.match(r'\(\[', input)
22+
23+
24+
def unsupported_feature(input):
25+
re.match(r'\p{Lower}', input) # Noncompliant
26+
re.match(r'(?>x)', input) # Noncompliant
27+
re.match(r'x*+', input) # Noncompliant
28+
29+
30+
def false_positives():
31+
re.compile(r"\s*([ACGT])\s*[[]*[|]*\s*([0-9.\s]+)\s*[]]*\s*") # Noncompliant
32+
# Noncompliant@+3
33+
re.compile(r'''
34+
([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*) # "en", "en-au", "x-y-z", "es-419", "*"
35+
(?:\s*;\s*q=(0(?:\.\d{,3})?|1(?:\.0{,3})?))? # Optional "q=1.00", "q=0.8"
36+
(?:\s*,\s*|$) # Multiple accepts per header.
37+
''', re.VERBOSE)
38+
re.compile(r'^\s+\[([\s*[0-9]*)\] ([a-zA-Z0-9_]*)') # Noncompliant
39+
re.compile(r'([^,[\]]*)(\[([^\]]+)\])?$') # Noncompliant
40+
re.compile(r'{.*}') # Noncompliant

python-checks/src/test/resources/checks/regex/reluctantQuantifierCheck.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ def non_compliant(input):
1616
re.match(r"\".*?\"", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\"]*+".}}
1717
re.match(r".*?\w", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\W*+".}}
1818
re.match(r".*?\W", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\w*+".}}
19-
re.match(r".*?\p{L}", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\P{L}*+".}}
20-
re.match(r".*?\P{L}", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\p{L}*+".}}
2119
re.match(r"\[.*?\]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\]]*+".}}
2220
re.match(r".+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc]++".}}
2321
re.match(r"(?-U:\s)*?\S", input)

0 commit comments

Comments
 (0)