Skip to content

Commit 83d078c

Browse files
SONARPY-893 Rule S5843 Regular expressions should not be too complicated (#973)
* SONARPY-893 Rule S5843 Regular expressions should not be too complicated * Update ITs
1 parent c1b1ac7 commit 83d078c

File tree

9 files changed

+228
-0
lines changed

9 files changed

+228
-0
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
'project:buildbot-0.8.6p1/buildbot/steps/shell.py':[
3+
382,
4+
],
5+
'project:django-2.2.3/django/core/management/commands/runserver.py':[
6+
15,
7+
],
8+
'project:django-2.2.3/django/utils/text.py':[
9+
307,
10+
],
11+
'project:django-2.2.3/django/utils/translation/template.py':[
12+
29,
13+
],
14+
'project:numpy-1.16.4/numpy/f2py/crackfortran.py':[
15+
874,
16+
878,
17+
880,
18+
1488,
19+
1492,
20+
1494,
21+
],
22+
'project:numpy-1.16.4/numpy/linalg/lapack_lite/clapack_scrub.py':[
23+
232,
24+
],
25+
'project:tensorflow/python/debug/cli/tensor_format_test.py':[
26+
48,
27+
],
28+
'project:tornado-2.3/demos/appengine/markdown.py':[
29+
717,
30+
],
31+
'project:tornado-2.3/demos/blog/markdown.py':[
32+
717,
33+
],
34+
}

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import org.sonar.python.checks.regex.AnchorPrecedenceCheck;
5353
import org.sonar.python.checks.regex.EmptyStringRepetitionCheck;
5454
import org.sonar.python.checks.regex.GraphemeClustersInClassesCheck;
55+
import org.sonar.python.checks.regex.RegexComplexityCheck;
5556
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
5657
import org.sonar.python.checks.regex.RedundantRegexAlternativesCheck;
5758
import org.sonar.python.checks.regex.ReluctantQuantifierCheck;
@@ -224,6 +225,7 @@ public static Iterable<Class> getChecks() {
224225
TrailingCommentCheck.class,
225226
TrailingWhitespaceCheck.class,
226227
ReferencedBeforeAssignmentCheck.class,
228+
RegexComplexityCheck.class,
227229
RegexLookaheadCheck.class,
228230
UndefinedNameAllPropertyCheck.class,
229231
UnreachableExceptCheck.class,
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.sonar.check.Rule;
23+
import org.sonar.check.RuleProperty;
24+
import org.sonar.plugins.python.api.tree.CallExpression;
25+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
26+
import org.sonarsource.analyzer.commons.regex.finders.ComplexRegexFinder;
27+
28+
@Rule(key = "S5843")
29+
public class RegexComplexityCheck extends AbstractRegexCheck {
30+
31+
private static final int DEFAULT_MAX = 20;
32+
33+
@RuleProperty(
34+
key = "maxComplexity",
35+
description = "The maximum authorized complexity.",
36+
defaultValue = "" + DEFAULT_MAX)
37+
public int max = DEFAULT_MAX;
38+
39+
@Override
40+
public void checkRegex(RegexParseResult regex, CallExpression methodInvocation) {
41+
new ComplexRegexFinder(this::addIssue, max).visit(regex);
42+
}
43+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<p>Overly complicated regular expressions are hard to read and to maintain and can easily cause hard-to-find bugs. If a regex is too complicated, you
2+
should consider replacing it or parts of it with regular code or splitting it apart into multiple patterns at least.</p>
3+
<p>The complexity of a regular expression is determined as follows:</p>
4+
<p>Each of the following operators increases the complexity by an amount equal to the current nesting level and also increases the current nesting
5+
level by one for its arguments:</p>
6+
<ul>
7+
<li> <code>|</code> - when multiple <code>|</code> operators are used together, the subsequent ones only increase the complexity by 1 </li>
8+
<li> Quantifiers (<code>*</code>, <code>+</code>, <code>?</code>, <code>{n,m}</code>, <code>{n,}</code> or <code>{n}</code>) </li>
9+
<li> Non-capturing groups that set flags (such as <code>(?i:some_pattern)</code> or <code>(?i)some_pattern</code>) </li>
10+
<li> Lookahead and lookbehind assertions </li>
11+
</ul>
12+
<p>Additionally, each use of the following features increase the complexity by 1 regardless of nesting:</p>
13+
<ul>
14+
<li> character classes </li>
15+
<li> back references </li>
16+
</ul>
17+
<h2>Noncompliant Code Example</h2>
18+
<pre>
19+
p = re.compile(r"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[13-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$")
20+
21+
if p.match($dateString):
22+
handleDate($dateString)
23+
</pre>
24+
<h2>Compliant Solution</h2>
25+
<pre>
26+
p = re.compile("^\d{1,2}([-/.])\d{1,2}\1\d{1,4}$")
27+
if p.match($dateString):
28+
dateParts = re.split(r"[-/.]", dateString)
29+
day = intval(dateParts[0])
30+
month = intval(dateParts[1])
31+
year = intval($dateParts[2])
32+
// Put logic to validate and process the date based on its integer parts here
33+
</pre>
34+
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"title": "Regular expressions should not be too complicated",
3+
"type": "CODE_SMELL",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Linear with offset",
7+
"linearDesc": "number of complexity points over the configurable limit",
8+
"linearOffset": "8min",
9+
"linearFactor": "2min"
10+
},
11+
"tags": [
12+
"regex"
13+
],
14+
"defaultSeverity": "Major",
15+
"ruleSpecification": "RSPEC-5843",
16+
"sqKey": "S5843",
17+
"scope": "Main",
18+
"quickfix": "unknown"
19+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@
134134
"S5807",
135135
"S5828",
136136
"S5842",
137+
"S5843",
137138
"S5850",
138139
"S5855",
139140
"S5857",
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
public class RegexComplexityCheckTest {
26+
27+
@Test
28+
public void test() {
29+
PythonCheckVerifier.verify("src/test/resources/checks/regex/regexComplexityCheck.py", new RegexComplexityCheck());
30+
}
31+
32+
@Test
33+
public void test_max_parameter() {
34+
RegexComplexityCheck check = new RegexComplexityCheck();
35+
check.max = 21;
36+
PythonCheckVerifier.verify("src/test/resources/checks/regex/regexComplexityCheck-21.py", check);
37+
}
38+
39+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import re
2+
3+
4+
def character_classes(input):
5+
re.match(r'[a][b][c][d][e][f][g][h][i][j][k][l][m][n][o][p][q][r][s][t][u]', input)
6+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 22 to the 21 allowed.}}
7+
re.match(r'[a][b][c][d][e][f][g][h][i][j][k][l][m][n][o][p][q][r][s][t][u][v]', input)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import re
2+
3+
4+
def character_classes(input):
5+
re.match(r'[a][b][c][d][e][f][g][h][i][j][k][l][m][n][o][p][q][r][s][t]', input)
6+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
7+
re.match(r'[a][b][c][d][e][f][g][h][i][j][k][l][m][n][o][p][q][r][s][t][u]', input)
8+
9+
10+
def disjunction(input):
11+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
12+
re.match(r'(a|(b|(c|(d|(e|(f|(gh)))))))', input) # 1+2+3+4+5+6=21
13+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
14+
re.match(r'(a|(b|(c|(d|(e|(((f|(gh)))))))))', input) # 1+2+3+4+5+6=21
15+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 23 to the 20 allowed.}}
16+
re.match(r'(a|(b|(c|(d|(e|(f|g|h|i))))))', input) # 1+2+3+4+5+8=23
17+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 28 to the 20 allowed.}}
18+
re.match(r'(a|(b|(c|(d|(e|(f|(g|(hi))))))))', input) # 1+2+3+4+5+6+7=28
19+
20+
21+
def repetition(input):
22+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
23+
re.match(r'(a(b(c(d(ef+)+)+)+)+)+', input) # 6+5+4+3+2+1=21
24+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
25+
re.match(r'(a(b(c(d(ef*)*)*)*)*)*', input) # 6+5+4+3+2+1=21
26+
27+
28+
def non_capturing_group(input):
29+
re.match(r'(?:a(?:b(?:c(?:d(?:e(?:f))))))', input) # 0
30+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
31+
re.match(r'(?i:a(?i:b(?i:c(?i:d(?i:e(?i:f))))))', input) # 1+2+3+4+5+6=21
32+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
33+
re.match(r'(?i:a(?i:b(?i:c(?i:d(?i:e((?i)f))))))', input) # 1+2+3+4+5+6=21
34+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
35+
re.match(r'(?-i:a(?-i:b(?-i:c(?-i:d(?-i:e(?-i:f))))))', input) # 1+2+3+4+5+6=21
36+
37+
38+
def back_reference(input):
39+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
40+
re.match(r'(abc)(a|(b|(c|(d|(e|(f|gh))))))', input) # 1+2+3+4+5+6=21
41+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 22 to the 20 allowed.}}
42+
re.match(r'(abc)(a|(b|(c|(d|(e|(f|\1))))))', input) # 1+2+3+4+5+6+1=22
43+
44+
45+
def look_around(input):
46+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
47+
re.match(r'(a|(b|(c|(d|(e|(f(?!g)))))))', input) # 1+2+3+4+5+6=21
48+
# Noncompliant@+1 {{Simplify this regular expression to reduce its complexity from 21 to the 20 allowed.}}
49+
re.match(r'(a|(b|(c|(d|(e(?!(f|g)))))))', input) # 1+2+3+4+5+6=21

0 commit comments

Comments
 (0)