Skip to content

Commit 1fab039

Browse files
SONARPY-982 Rule S6331: Regular expressions should not contain empty groups (#1107)
1 parent 54e812b commit 1fab039

File tree

8 files changed

+174
-5
lines changed

8 files changed

+174
-5
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
'project:numpy-1.16.4/numpy/f2py/crackfortran.py':[
3+
1393,
4+
],
5+
}

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,17 @@
5151
import org.sonar.python.checks.hotspots.UnverifiedHostnameCheck;
5252
import org.sonar.python.checks.regex.AnchorPrecedenceCheck;
5353
import org.sonar.python.checks.regex.DuplicatesInCharacterClassCheck;
54+
import org.sonar.python.checks.regex.EmptyGroupCheck;
5455
import org.sonar.python.checks.regex.EmptyStringRepetitionCheck;
55-
import org.sonar.python.checks.regex.ImpossibleBoundariesCheck;
5656
import org.sonar.python.checks.regex.GraphemeClustersInClassesCheck;
57+
import org.sonar.python.checks.regex.ImpossibleBoundariesCheck;
5758
import org.sonar.python.checks.regex.InvalidRegexCheck;
58-
import org.sonar.python.checks.regex.RegexComplexityCheck;
59-
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
6059
import org.sonar.python.checks.regex.RedundantRegexAlternativesCheck;
61-
import org.sonar.python.checks.regex.ReluctantQuantifierCheck;
60+
import org.sonar.python.checks.regex.RegexComplexityCheck;
6261
import org.sonar.python.checks.regex.RegexLookaheadCheck;
62+
import org.sonar.python.checks.regex.ReluctantQuantifierCheck;
6363
import org.sonar.python.checks.regex.ReluctantQuantifierWithEmptyContinuationCheck;
64+
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
6465
import org.sonar.python.checks.regex.StringReplaceCheck;
6566

6667
public final class CheckList {
@@ -120,6 +121,7 @@ public static Iterable<Class> getChecks() {
120121
DynamicCodeExecutionCheck.class,
121122
ElseAfterLoopsWithoutBreakCheck.class,
122123
EmailSendingCheck.class,
124+
EmptyGroupCheck.class,
123125
EmptyFunctionCheck.class,
124126
EmptyNestedBlockCheck.class,
125127
EmptyStringRepetitionCheck.class,
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2022 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.sonar.check.Rule;
23+
import org.sonar.plugins.python.api.tree.CallExpression;
24+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
25+
import org.sonarsource.analyzer.commons.regex.finders.EmptyGroupFinder;
26+
27+
@Rule(key = "S6331")
28+
public class EmptyGroupCheck extends AbstractRegexCheck {
29+
30+
@Override
31+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
32+
new EmptyGroupFinder(this::addIssue).visit(regexParseResult);
33+
}
34+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<p>There are several reasons to use a group in a regular expression:</p>
2+
<ul>
3+
<li> to change the precedence (e.g. <code>do(g|or)</code> will match 'dog' and 'door') </li>
4+
<li> to remember parenthesised part of the match in the case of capturing group </li>
5+
<li> to improve readability </li>
6+
</ul>
7+
<p>In any case, having an empty group is most probably a mistake. Either it is a leftover after refactoring and should be removed, or the actual
8+
parentheses were intended and were not escaped.</p>
9+
<h2>Noncompliant Code Example</h2>
10+
<pre>
11+
r"foo()" # Noncompliant, will match only 'foo'
12+
</pre>
13+
<h2>Compliant Solution</h2>
14+
<pre>
15+
r"foo\(\)" # Matches 'foo()'
16+
</pre>
17+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"title": "Regular expressions should not contain empty groups",
3+
"type": "CODE_SMELL",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "5min"
8+
},
9+
"tags": [
10+
"regex"
11+
],
12+
"defaultSeverity": "Major",
13+
"ruleSpecification": "RSPEC-6331",
14+
"sqKey": "S6331",
15+
"scope": "Main",
16+
"quickfix": "unknown"
17+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@
151151
"S5996",
152152
"S6002",
153153
"S6019",
154-
"S6035"
154+
"S6035",
155+
"S6331"
155156
]
156157
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2022 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
public class EmptyGroupCheckTest {
26+
27+
@Test
28+
public void test() {
29+
PythonCheckVerifier.verify("src/test/resources/checks/regex/emptyGroupCheck.py", new EmptyGroupCheck());
30+
}
31+
32+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import re
2+
3+
4+
def non_compliant(input):
5+
re.match(r"foo()bar", input) # Noncompliant {{Remove this empty group.}}
6+
# ^^
7+
re.match(r"foo(?:)bar", input) # Noncompliant
8+
# ^^^^
9+
re.match(r"foo(?=)bar", input) # Noncompliant
10+
# ^^^^
11+
re.match(r"foo(?!)bar", input) # Noncompliant
12+
# ^^^^
13+
re.match(r"foo(?<=)bar", input) # Noncompliant
14+
# ^^^^^
15+
re.match(r"foo(?<!)bar", input) # Noncompliant
16+
# ^^^^^
17+
18+
re.match(r"(foo()bar)", input) # Noncompliant
19+
# ^^
20+
re.match(r"(foo(?:)bar)", input) # Noncompliant
21+
# ^^^^
22+
re.match(r"(foo(?=)bar)", input) # Noncompliant
23+
# ^^^^
24+
re.match(r"(foo(?!)bar)", input) # Noncompliant
25+
# ^^^^
26+
re.match(r"(foo(?<=)bar)", input) # Noncompliant
27+
# ^^^^^
28+
re.match(r"(foo(?<!)bar)", input) # Noncompliant
29+
# ^^^^^
30+
31+
32+
def compliant(input):
33+
re.match(r"foo(?-)bar", input)
34+
re.match(r"foo(?-x)bar", input)
35+
re.match(r"(foo(?-)bar)", input)
36+
37+
re.match(r"foo(x)bar", input)
38+
re.match(r"foo(?:x)bar", input)
39+
re.match(r"foo(?>x)bar", input)
40+
re.match(r"foo(?=x)bar", input)
41+
re.match(r"foo(?!x)bar", input)
42+
re.match(r"foo(?<=x)bar", input)
43+
re.match(r"foo(?<!x)bar", input)
44+
45+
re.match(r"[foo()bar]", input)
46+
re.match(r"[foo(?-)bar]", input)
47+
re.match(r"[foo(?:)bar]", input)
48+
re.match(r"[foo(?>)bar]", input)
49+
re.match(r"[foo(?=x)bar]", input)
50+
re.match(r"[foo(?!x)bar]", input)
51+
re.match(r"[foo(?<=x)bar]", input)
52+
re.match(r"[foo(?<!x)bar]", input)
53+
54+
re.match(r"(foo(|)bar)", input)
55+
re.match(r"(foo(?-|)bar)", input)
56+
re.match(r"(foo(?:|)bar)", input)
57+
re.match(r"(foo(?>|)bar)", input)
58+
re.match(r"(foo(?=|)bar)", input)
59+
re.match(r"(foo(?!|)bar)", input)
60+
re.match(r"(foo(?<=|)bar)", input)
61+
re.match(r"(foo(?<!|)bar)", input)

0 commit comments

Comments
 (0)