Skip to content

Commit c457676

Browse files
SONARPY-978 Rule S6397: Character classes in regular expressions shou… (#1105)
1 parent 98b2d8f commit c457676

File tree

8 files changed

+245
-1
lines changed

8 files changed

+245
-1
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
{
2+
'project:biopython/Bio/motifs/pfm.py':[
3+
338,
4+
],
5+
'project:buildbot-0.8.6p1/buildbot/steps/package/rpm/rpmspec.py':[
6+
31,
7+
32,
8+
],
9+
'project:buildbot-0.8.6p1/buildbot/steps/shell.py':[
10+
382,
11+
],
12+
'project:buildbot-slave-0.8.6p1/buildslave/scripts/runner.py':[
13+
160,
14+
],
15+
'project:mypy-0.782/test-data/stdlib-samples/3.2/base64.py':[
16+
236,
17+
],
18+
'project:numpy-1.16.4/numpy/distutils/conv_template.py':[
19+
144,
20+
],
21+
'project:numpy-1.16.4/numpy/distutils/fcompiler/ibm.py':[
22+
82,
23+
],
24+
'project:numpy-1.16.4/numpy/f2py/capi_maps.py':[
25+
307,
26+
307,
27+
],
28+
'project:numpy-1.16.4/numpy/f2py/crackfortran.py':[
29+
640,
30+
2938,
31+
],
32+
'project:numpy-1.16.4/numpy/f2py/f2py2e.py':[
33+
509,
34+
509,
35+
509,
36+
516,
37+
516,
38+
516,
39+
516,
40+
516,
41+
516,
42+
534,
43+
534,
44+
534,
45+
534,
46+
538,
47+
538,
48+
538,
49+
567,
50+
567,
51+
],
52+
'project:numpy-1.16.4/numpy/random/mtrand/generate_mtrand_c.py':[
53+
27,
54+
],
55+
'project:tornado-2.3/demos/appengine/markdown.py':[
56+
628,
57+
628,
58+
628,
59+
629,
60+
629,
61+
629,
62+
630,
63+
630,
64+
630,
65+
],
66+
'project:tornado-2.3/demos/blog/markdown.py':[
67+
628,
68+
628,
69+
628,
70+
629,
71+
629,
72+
629,
73+
630,
74+
630,
75+
630,
76+
],
77+
}

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
import org.sonar.python.checks.regex.RegexLookaheadCheck;
6363
import org.sonar.python.checks.regex.ReluctantQuantifierCheck;
6464
import org.sonar.python.checks.regex.ReluctantQuantifierWithEmptyContinuationCheck;
65+
import org.sonar.python.checks.regex.SingleCharCharacterClassCheck;
6566
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
6667
import org.sonar.python.checks.regex.StringReplaceCheck;
6768
import org.sonar.python.checks.regex.UnquantifiedNonCapturingGroupCheck;
@@ -228,6 +229,7 @@ public static Iterable<Class> getChecks() {
228229
SillyEqualityCheck.class,
229230
SillyIdentityCheck.class,
230231
SingleCharacterAlternationCheck.class,
232+
SingleCharCharacterClassCheck.class,
231233
SpecialMethodParamListCheck.class,
232234
SQLQueriesCheck.class,
233235
StandardInputCheck.class,
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2022 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import java.util.Optional;
23+
import java.util.regex.Pattern;
24+
import org.sonar.check.Rule;
25+
import org.sonar.plugins.python.api.tree.CallExpression;
26+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
27+
import org.sonarsource.analyzer.commons.regex.finders.SingleCharCharacterClassFinder;
28+
29+
@Rule(key = "S6397")
30+
public class SingleCharCharacterClassCheck extends AbstractRegexCheck {
31+
32+
@Override
33+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
34+
Optional.ofNullable(regexFunctionCall.calleeSymbol())
35+
.flatMap(symbol -> Optional.ofNullable(symbol.fullyQualifiedName()))
36+
.filter(fqn -> lookedUpFunctions().containsKey(fqn))
37+
.filter(fqn -> !regexParseResult.getResult().activeFlags().contains(Pattern.COMMENTS))
38+
.ifPresent(fqn -> new SingleCharCharacterClassFinder(this::addIssue).visit(regexParseResult));
39+
}
40+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<p>Character classes in regular expressions are a convenient way to match one of several possible characters by listing the allowed characters or
2+
ranges of characters. If a character class contains only one character, the effect is the same as just writing the character without a character
3+
class.</p>
4+
<p>Thus, having only one character in a character class is usually a simple oversight that remained after removing other characters of the class.</p>
5+
<h2>Noncompliant Code Example</h2>
6+
<pre>
7+
r"a[b]c"
8+
</pre>
9+
<h2>Compliant Solution</h2>
10+
<pre>
11+
r"abc"
12+
</pre>
13+
<h2>Exceptions</h2>
14+
<p>This rule does not raise when the character inside the class is a metacharacter. This notation is sometimes used to avoid escaping (e.g.,
15+
<code>[.]{3}</code> to match three dots).</p>
16+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"title": "Character classes in regular expressions should not contain only one character",
3+
"type": "CODE_SMELL",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "5min"
8+
},
9+
"tags": [
10+
"regex"
11+
],
12+
"defaultSeverity": "Major",
13+
"ruleSpecification": "RSPEC-6397",
14+
"sqKey": "S6397",
15+
"scope": "All",
16+
"quickfix": "unknown"
17+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@
155155
"S6331",
156156
"S6035",
157157
"S6395",
158-
"S6396"
158+
"S6396",
159+
"S6397"
159160
]
160161
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2022 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
public class SingleCharCharacterClassCheckTest {
26+
27+
@Test
28+
public void test() {
29+
PythonCheckVerifier.verify("src/test/resources/checks/regex/singleCharCharacterClassCheck.py", new SingleCharCharacterClassCheck());
30+
}
31+
32+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import re
2+
3+
4+
def non_compliant():
5+
input = "Bob is a Bird... Bob is a Plane... Bob is Superman!"
6+
changed = re.match(r"[0]", input) # Noncompliant {{Replace this character class by the character itself.}}
7+
# ^
8+
9+
changed = re.match(r"[B]", input) # Noncompliant {{Replace this character class by the character itself.}}
10+
# ^
11+
changed = re.match(r"[ ]", input) # Noncompliant {{Replace this character class by the character itself.}}
12+
# ^
13+
changed = re.match(r"[:]", input) # Noncompliant {{Replace this character class by the character itself.}}
14+
# ^
15+
changed = re.match(r"([)])", input) # Noncompliant {{Replace this character class by the character itself.}}
16+
# ^
17+
changed = re.match(r"[:]", input) # Noncompliant {{Replace this character class by the character itself.}}
18+
# ^
19+
changed = re.match(r"[]]", input) # Noncompliant {{Replace this character class by the character itself.}}
20+
# ^
21+
changed = re.match(r"([\[])", input) # Noncompliant {{Replace this character class by the character itself.}}
22+
# ^^
23+
changed = re.match(r'[b][c]', input, re.M) # Noncompliant 2
24+
25+
26+
def compliant():
27+
input = "abcdefghijklmnopqa"
28+
changed = re.match(re.escape("test"), input)
29+
changed = re.match(r"[abc]", input)
30+
changed = re.match(r"[a-c]", input)
31+
changed = re.match(r"ab|cd", input)
32+
changed = re.match(r"^|$", input)
33+
changed = re.match(r"|", input)
34+
changed = re.match(r"[\[a\]]", input)
35+
# # Special characters do not raise warning
36+
changed = re.match(r"a[.]a", input)
37+
changed = re.match(r"a[*]a", input)
38+
changed = re.match(r"a[+]a", input)
39+
changed = re.match(r"a[^]a", input)
40+
changed = re.match(r"a[{m}]a", input)
41+
changed = re.match(r"a[\d]a", input)
42+
changed = re.match(r"a[\w]a", input)
43+
changed = re.match(r"a[?]a", input)
44+
changed = re.match(r"a[|]a", input)
45+
changed = re.match(r"a[\W]a", input)
46+
changed = re.match(r"a[\]a", input)
47+
changed = re.match(r"a a", input)
48+
49+
changed = re.compile(r'[ \t # comment]', re.X)
50+
51+
changed = re.compile(r'[ \t ]', re.X)
52+
changed = re.compile(r'[ \t]', re.X)
53+
changed = re.match(r'[ \t]', input, re.X)
54+
55+
# TODO : False Negatives. We deactivated the SingleCharCharacterClassFinder whenever the flag X or VERBOSE is set.
56+
# see https://github.com/SonarSource/sonar-analyzer-commons/issues/217
57+
changed = re.compile(r'[\t]', re.X)
58+
changed = re.compile(r'[a]', re.VERBOSE)
59+
changed = re.match(r'[b][c]', input, re.M | re.X)

0 commit comments

Comments
 (0)