Skip to content

Commit 2444d65

Browse files
SONARPY-884 Rule S6035 Single-character alternations in regular expressions should be replaced with character classes (#964)
* SONARPY-884 Rule S6035 Single-character alternations in regular expressions should be replaced with character classes * Update ruling
1 parent e26eca1 commit 2444d65

File tree

8 files changed

+158
-2
lines changed

8 files changed

+158
-2
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
'project:buildbot-0.8.6p1/buildbot/__init__.py':[
3+
33,
4+
],
5+
'project:buildbot-slave-0.8.6p1/buildslave/__init__.py':[
6+
33,
7+
],
8+
'project:mypy-0.782/test-data/stdlib-samples/3.2/test/support.py':[
9+
1048,
10+
],
11+
'project:numpy-1.16.4/numpy/distutils/from_template.py':[
12+
56,
13+
58,
14+
],
15+
'project:numpy-1.16.4/numpy/f2py/capi_maps.py':[
16+
307,
17+
307,
18+
],
19+
'project:numpy-1.16.4/numpy/f2py/crackfortran.py':[
20+
350,
21+
350,
22+
],
23+
'project:tensorflow/python/debug/cli/command_parser.py':[
24+
187,
25+
],
26+
'project:tensorflow/python/debug/cli/tensor_format.py':[
27+
32,
28+
],
29+
'project:tornado-2.3/demos/appengine/markdown.py':[
30+
1319,
31+
],
32+
'project:tornado-2.3/demos/blog/markdown.py':[
33+
1319,
34+
],
35+
}

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import org.sonar.python.checks.hotspots.UnsafeHttpMethodsCheck;
5151
import org.sonar.python.checks.hotspots.UnverifiedHostnameCheck;
5252
import org.sonar.python.checks.regex.EmptyStringRepetitionCheck;
53+
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
5354
import org.sonar.python.checks.regex.RedundantRegexAlternativesCheck;
5455
import org.sonar.python.checks.regex.RegexLookaheadCheck;
5556
import org.sonar.python.checks.regex.ReluctantQuantifierWithEmptyContinuationCheck;
@@ -201,6 +202,7 @@ public static Iterable<Class> getChecks() {
201202
SetDuplicateKeyCheck.class,
202203
SillyEqualityCheck.class,
203204
SillyIdentityCheck.class,
205+
SingleCharacterAlternationCheck.class,
204206
SpecialMethodParamListCheck.class,
205207
SQLQueriesCheck.class,
206208
StandardInputCheck.class,
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.sonar.check.Rule;
23+
import org.sonar.plugins.python.api.tree.CallExpression;
24+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
25+
import org.sonarsource.analyzer.commons.regex.finders.SingleCharacterAlternationFinder;
26+
27+
@Rule(key = "S6035")
28+
public class SingleCharacterAlternationCheck extends AbstractRegexCheck {
29+
30+
@Override
31+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
32+
new SingleCharacterAlternationFinder(this::addIssue).visit(regexParseResult);
33+
}
34+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<p>When an alternation contains multiple alternatives that consist of a single character, it can be rewritten as a character class. This should be
2+
preferred because it is more efficient and can even help prevent stack overflows when used inside a repetition (see rule {rule:python:S5998}).</p>
3+
<h2>Noncompliant Code Example</h2>
4+
<pre>
5+
r"a|b|c" # Noncompliant
6+
</pre>
7+
<h2>Compliant Solution</h2>
8+
<pre>
9+
r"[abc]"
10+
# or
11+
r"[a-c]"
12+
</pre>
13+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"title": "Single-character alternations in regular expressions should be replaced with character classes",
3+
"type": "CODE_SMELL",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "5min"
8+
},
9+
"tags": [
10+
"regex"
11+
],
12+
"defaultSeverity": "Major",
13+
"ruleSpecification": "RSPEC-6035",
14+
"sqKey": "S6035",
15+
"scope": "Main",
16+
"quickfix": "unknown"
17+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@
138138
"S5864",
139139
"S5886",
140140
"S5890",
141-
"S6002"
142-
"S6019"
141+
"S6002",
142+
"S6019",
143+
"S6035"
143144
]
144145
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
26+
public class SingleCharacterAlternationCheckTest {
27+
28+
@Test
29+
public void test() {
30+
PythonCheckVerifier.verify("src/test/resources/checks/regex/singleCharacterAlternationCheck.py", new SingleCharacterAlternationCheck());
31+
}
32+
33+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import re
2+
3+
4+
def non_compliant(input):
5+
re.match(r"a|b|c", input) # Noncompliant {{Replace this alternation with a character class.}}
6+
# ^^^^^
7+
re.match(r"a|(b|c)", input) # Noncompliant
8+
re.match(r"abcd|(e|f)gh", input) # Noncompliant
9+
re.match(r"(a|b|c)*", input) # Noncompliant
10+
re.match(r"\d|x", input) # Noncompliant
11+
re.match(r"\u1234|\x{12345}", input) # Noncompliant
12+
re.match(r"😂|😊", input) # Noncompliant
13+
re.match(r"\ud800\udc00|\udbff\udfff", input) # Noncompliant
14+
15+
16+
def compliant(input):
17+
re.match(r"[abc]", input)
18+
re.match(r"[a-c]", input)
19+
re.match(r"ab|cd", input)
20+
re.match(r"^|$", input)
21+
re.match(r"|", input)

0 commit comments

Comments
 (0)