Skip to content

Commit 9746835

Browse files
SONARPY-980 Rule S6353: Regular expression quantifiers and character classes should be used concisely (#1108)
1 parent 65f1b6a commit 9746835

File tree

8 files changed

+320
-1
lines changed

8 files changed

+320
-1
lines changed
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
{
2+
'project:biopython/Bio/Application/__init__.py':[
3+
33,
4+
],
5+
'project:biopython/Bio/FSSP/__init__.py':[
6+
51,
7+
51,
8+
53,
9+
53,
10+
53,
11+
],
12+
'project:biopython/Bio/GenBank/Scanner.py':[
13+
1101,
14+
],
15+
'project:biopython/Bio/Phylo/PAML/_parse_codeml.py':[
16+
23,
17+
80,
18+
],
19+
'project:biopython/Bio/Phylo/PhyloXML.py':[
20+
542,
21+
1053,
22+
1478,
23+
],
24+
'project:biopython/Bio/PopGen/GenePop/Controller.py':[
25+
654,
26+
654,
27+
654,
28+
],
29+
'project:buildbot-0.8.6p1/buildbot/process/mtrlogobserver.py':[
30+
85,
31+
86,
32+
88,
33+
89,
34+
89,
35+
],
36+
'project:buildbot-0.8.6p1/buildbot/status/builder.py':[
37+
243,
38+
244,
39+
],
40+
'project:buildbot-0.8.6p1/buildbot/status/web/change_hook.py':[
41+
101,
42+
],
43+
'project:buildbot-0.8.6p1/buildbot/steps/shell.py':[
44+
382,
45+
382,
46+
],
47+
'project:buildbot-0.8.6p1/buildbot/steps/source/oldsource.py':[
48+
977,
49+
977,
50+
978,
51+
978,
52+
979,
53+
979,
54+
],
55+
'project:buildbot-0.8.6p1/buildbot/steps/vstudio.py':[
56+
41,
57+
42,
58+
],
59+
'project:buildbot-slave-0.8.6p1/buildslave/runprocess.py':[
60+
300,
61+
],
62+
'project:django-2.2.3/django/views/static.py':[
63+
125,
64+
],
65+
'project:docker-compose-1.24.1/compose/config/interpolation.py':[
66+
195,
67+
],
68+
'project:docker-compose-1.24.1/script/release/release/utils.py':[
69+
10,
70+
10,
71+
10,
72+
10,
73+
10,
74+
10,
75+
],
76+
'project:mypy-0.782/misc/upload-pypi.py':[
77+
36,
78+
],
79+
'project:mypy-0.782/mypy/messages.py':[
80+
1954,
81+
],
82+
'project:mypy-0.782/mypy/stubdoc.py':[
83+
21,
84+
],
85+
'project:mypy-0.782/mypy/test/data.py':[
86+
82,
87+
87,
88+
92,
89+
100,
90+
106,
91+
285,
92+
289,
93+
],
94+
'project:mypy-0.782/mypy/test/testfinegrained.py':[
95+
195,
96+
318,
97+
],
98+
'project:numpy-1.16.4/numpy/distutils/mingw32ccompiler.py':[
99+
53,
100+
],
101+
'project:numpy-1.16.4/numpy/lib/polynomial.py':[
102+
977,
103+
],
104+
'project:numpy-1.16.4/numpy/testing/_private/parameterized.py':[
105+
490,
106+
],
107+
'project:numpy-1.16.4/numpy/tests/test_numpy_version.py':[
108+
12,
109+
12,
110+
12,
111+
12,
112+
12,
113+
12,
114+
],
115+
'project:numpy-1.16.4/pavement.py':[
116+
234,
117+
],
118+
'project:numpy-1.16.4/tools/c_coverage/c_coverage_report.py':[
119+
124,
120+
124,
121+
125,
122+
125,
123+
],
124+
'project:tensorflow/python/debug/cli/tensor_format.py':[
125+
32,
126+
],
127+
'project:tensorflow/python/module/module.py':[
128+
306,
129+
],
130+
'project:tensorflow/python/ops/structured/structured_tensor.py':[
131+
790,
132+
],
133+
'project:tensorflow/tools/ci_build/update_version.py':[
134+
126,
135+
126,
136+
157,
137+
158,
138+
159,
139+
],
140+
'project:tensorflow/tools/docs/parser.py':[
141+
1132,
142+
],
143+
'project:tornado-2.3/tornado/simple_httpclient.py':[
144+
341,
145+
],
146+
'project:tornado-2.3/tornado/test/web_test.py':[
147+
210,
148+
],
149+
'project:twisted-12.1.0/twisted/persisted/aot.py':[
150+
159,
151+
],
152+
'project:twisted-12.1.0/twisted/web/test/test_static.py':[
153+
1054,
154+
1054,
155+
1054,
156+
],
157+
'project:twisted-12.1.0/twisted/words/xish/xpathparser.py':[
158+
339,
159+
343,
160+
],
161+
}

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
6666
import org.sonar.python.checks.regex.StringReplaceCheck;
6767
import org.sonar.python.checks.regex.UnquantifiedNonCapturingGroupCheck;
68+
import org.sonar.python.checks.regex.VerboseRegexCheck;
6869

6970
public final class CheckList {
7071

@@ -261,6 +262,7 @@ public static Iterable<Class> getChecks() {
261262
UselessParenthesisCheck.class,
262263
UselessStatementCheck.class,
263264
UseOfEmptyReturnValueCheck.class,
265+
VerboseRegexCheck.class,
264266
VerifiedSslTlsCertificateCheck.class,
265267
WeakSSLProtocolCheck.class,
266268
WildcardImportCheck.class,
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2022 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.sonar.check.Rule;
23+
import org.sonar.plugins.python.api.tree.CallExpression;
24+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
25+
import org.sonarsource.analyzer.commons.regex.finders.VerboseRegexFinder;
26+
27+
@Rule(key = "S6353")
28+
public class VerboseRegexCheck extends AbstractRegexCheck {
29+
30+
@Override
31+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
32+
new VerboseRegexFinder(this::addIssue).visit(regexParseResult);
33+
}
34+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<p>With regular expressions syntax, it’s possible to express the same thing in many ways. For example, to match a two-digit number, one could write
2+
<code>[0-9]{2,2}</code> or <code>\d{2}</code>. Latter is not only shorter in terms of expression length, but also easier to read and thus to maintain.
3+
This rule recommends to replace some bulky quantifiers and character classes with more concise equivalents:</p>
4+
<ul>
5+
<li> <code>\d</code> for <code>[0-9]</code> and <code>\D</code> for <code>[^0-9]</code> </li>
6+
<li> <code>\w</code> for <code>[A-Za-z0-9_]</code> and <code>\W</code> for <code>[^A-Za-z0-9_]</code> </li>
7+
<li> <code>.</code> for character classes matching everything (e.g. <code>[\w\W]</code>, <code>[\d\D]</code>, or <code>[\s\S]</code> with
8+
<code>s</code> flag) </li>
9+
<li> <code>x?</code> for <code>x{0,1}</code>, <code>x*</code> for <code>x{0,}</code>, <code>x+</code> for <code>x{1,}</code>, <code>x{N}</code> for
10+
<code>x{N,N}</code> </li>
11+
</ul>
12+
<h2>Noncompliant Code Example</h2>
13+
<pre>
14+
r"[0-9]" # Noncompliant - same as r"\d"
15+
r"[^0-9]" # Noncompliant - same as r"\D"
16+
r"[A-Za-z0-9_]" # Noncompliant - same as r"\w"
17+
r"[\w\W]" # Noncompliant - same as r"."
18+
r"a{0,}" # Noncompliant - same as r"a*"
19+
</pre>
20+
<h2>Compliant Solution</h2>
21+
<pre>
22+
r"\d"
23+
r"\D"
24+
r"\w"
25+
r"."
26+
r"a*"
27+
</pre>
28+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"title": "Regular expression quantifiers and character classes should be used concisely",
3+
"type": "CODE_SMELL",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "5min"
8+
},
9+
"tags": [
10+
"regex"
11+
],
12+
"defaultSeverity": "Minor",
13+
"ruleSpecification": "RSPEC-6353",
14+
"sqKey": "S6353",
15+
"scope": "Main",
16+
"quickfix": "unknown"
17+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,9 @@
151151
"S5996",
152152
"S6002",
153153
"S6019",
154-
"S6035",
155154
"S6323",
156155
"S6331",
156+
"S6035",
157157
"S6395"
158158
]
159159
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2022 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
public class VerboseRegexCheckTest {
26+
27+
@Test
28+
public void test() {
29+
PythonCheckVerifier.verify("src/test/resources/checks/regex/verboseRegexCheck.py", new VerboseRegexCheck());
30+
}
31+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import re
2+
3+
4+
def non_compliant(input):
5+
re.match(r"[\s\S]", input, re.DOTALL) # Noncompliant {{Use concise character class syntax '.' instead of '[\s\S]'.}}
6+
# ^^^^^^
7+
re.match(r"[\d\D]", input) # Noncompliant {{Use concise character class syntax '.' instead of '[\d\D]'.}}
8+
re.match(r"[\w\W]", input) # Noncompliant {{Use concise character class syntax '.' instead of '[\w\W]'.}}
9+
re.match(r"[0-9]", input) # Noncompliant {{Use concise character class syntax '\d' instead of '[0-9]'.}}
10+
re.match(r"foo[0-9]barr", input) # Noncompliant
11+
# ^^^^^
12+
re.match(r"[^0-9]", input) # Noncompliant {{Use concise character class syntax '\D' instead of '[^0-9]'.}}
13+
re.match(r"[A-Za-z0-9_]",input) # Noncompliant {{Use concise character class syntax '\w' instead of '[A-Za-z0-9_]'.}}
14+
re.match(r"[0-9_A-Za-z]", input) # Noncompliant
15+
re.match(r"[^A-Za-z0-9_]", input) # Noncompliant {{Use concise character class syntax '\W' instead of '[^A-Za-z0-9_]'.}}
16+
re.match(r"[^0-9_A-Za-z]", input) # Noncompliant
17+
re.match(r"x{0,1}", input) # Noncompliant {{Use concise quantifier syntax '?' instead of '{0,1}'.}}
18+
re.match(r"x{0,1}?", input) # Noncompliant
19+
re.match(r"x{0,}", input) # Noncompliant {{Use concise quantifier syntax '*' instead of '{0,}'.}}
20+
re.match(r"x{0,}?", input) # Noncompliant
21+
re.match(r"x{1,}", input) # Noncompliant {{Use concise quantifier syntax '+' instead of '{1,}'.}}
22+
re.match(r"x{1,}?", input) # Noncompliant
23+
re.match(r"x{2,2}", input) # Noncompliant {{Use concise quantifier syntax '{2}' instead of '{2,2}'.}}
24+
re.match(r"x{2,2}?", input) # Noncompliant
25+
26+
27+
def compliant(input):
28+
re.match(r"[x]", input)
29+
re.match(r"[12]", input)
30+
re.match(r"[1234]", input)
31+
re.match(r"[1-3]", input)
32+
re.match(r"[1-9abc]", input)
33+
re.match(r"[1-9a-bAB]", input)
34+
re.match(r"[1-9a-bA-Z!]", input)
35+
re.match(r"[1-2[a][b][c]]", input)
36+
re.match(r"[0-9[a][b][c]]", input)
37+
re.match(r"[0-9a-z[b][c]]", input)
38+
re.match(r"[0-9a-zA-Z[c]]", input)
39+
re.match(r"x?", input)
40+
re.match(r"x*", input)
41+
re.match(r"x+", input)
42+
re.match(r"x{2}", input)
43+
re.match(r"[\s\S]", input)
44+
re.match(r"[\w\S]", input)
45+
re.match(r"[\d\S]", input)
46+
re.match(r"[\s\d]", input)

0 commit comments

Comments
 (0)