Skip to content

Commit e1ef98e

Browse files
SONARPY-883 Rule S6019 Reluctant quantifiers in regular expressions should be followed by an expression that can't match the empty string (#963)
* SONARPY-883 Rule S6019 Reluctant quantifiers in regular expressions should be followed by an expression that can't match the empty string * Update ruling
1 parent 91731ea commit e1ef98e

File tree

8 files changed

+128
-1
lines changed

8 files changed

+128
-1
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
'project:biopython/Bio/Phylo/PAML/_parse_yn00.py':[
3+
137,
4+
],
5+
'project:biopython/Bio/SearchIO/ExonerateIO/exonerate_text.py':[
6+
32,
7+
],
8+
}

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import org.sonar.python.checks.hotspots.UnverifiedHostnameCheck;
5252
import org.sonar.python.checks.regex.EmptyStringRepetitionCheck;
5353
import org.sonar.python.checks.regex.RedundantRegexAlternativesCheck;
54+
import org.sonar.python.checks.regex.ReluctantQuantifierWithEmptyContinuationCheck;
5455
import org.sonar.python.checks.regex.StringReplaceCheck;
5556

5657
public final class CheckList {
@@ -187,6 +188,7 @@ public static Iterable<Class> getChecks() {
187188
RedundantJumpCheck.class,
188189
RedundantRegexAlternativesCheck.class,
189190
RegexCheck.class,
191+
ReluctantQuantifierWithEmptyContinuationCheck.class,
190192
ReturnAndYieldInOneFunctionCheck.class,
191193
ReturnYieldOutsideFunctionCheck.class,
192194
RobustCipherAlgorithmCheck.class,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.sonar.check.Rule;
23+
import org.sonar.plugins.python.api.tree.CallExpression;
24+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
25+
import org.sonarsource.analyzer.commons.regex.finders.ReluctantQuantifierWithEmptyContinuationFinder;
26+
27+
@Rule(key = "S6019")
28+
public class ReluctantQuantifierWithEmptyContinuationCheck extends AbstractRegexCheck {
29+
30+
@Override
31+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
32+
new ReluctantQuantifierWithEmptyContinuationFinder(this::addIssue).visit(regexParseResult);
33+
}
34+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<p>When a reluctant (or lazy) quantifier is followed by a pattern that can match the empty string or directly by the end of the regex, it will always
2+
match zero times for <code>*?</code> or one time for <code>+?</code>. If a reluctant quantifier is followed directly by the end anchor
3+
(<code>$</code>), it behaves indistinguishably from a greedy quantifier while being less efficient.</p>
4+
<p>This is likely a sign that the regex does not work as intended.</p>
5+
<h2>Noncompliant Code Example</h2>
6+
<pre>
7+
re.replace(r"start\w*?(end)?", "x", "start123endstart456") # Noncompliant. In contrast to what one would expect, the result is not "xx"
8+
9+
re.match(r"^\d*?$", "123456789") # Noncompliant. Matches the same as "/^\d*$/", but will backtrack in every position.
10+
</pre>
11+
<h2>Compliant Solution</h2>
12+
<pre>
13+
re.replace(r"start\w*?(end|$)", "x", "start123endstart456") # Result is "xx"
14+
15+
re.match(r"^\d*$", "123456789")
16+
</pre>
17+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"title": "Reluctant quantifiers in regular expressions should be followed by an expression that can\u0027t match the empty string",
3+
"type": "CODE_SMELL",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "10min"
8+
},
9+
"tags": [
10+
"regex"
11+
],
12+
"defaultSeverity": "Major",
13+
"ruleSpecification": "RSPEC-6019",
14+
"sqKey": "S6019",
15+
"scope": "Main",
16+
"quickfix": "unknown"
17+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@
137137
"S5855",
138138
"S5864",
139139
"S5886",
140-
"S5890"
140+
"S5890",
141+
"S6019"
141142
]
142143
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2021 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
public class ReluctantQuantifierWithEmptyContinuationCheckTest {
26+
27+
@Test
28+
public void test() throws Exception {
29+
PythonCheckVerifier.verify("src/test/resources/checks/regex/reluctantQuantifierWithEmptyContinuationCheck.py", new ReluctantQuantifierWithEmptyContinuationCheck());
30+
}
31+
32+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import re
2+
3+
4+
def non_compliant(input):
5+
re.match(r".*?x?", input) # Noncompliant {{Fix this reluctant quantifier that will only ever match 0 repetitions.}}
6+
# ^^^
7+
re.match(r".+?x?", input) # Noncompliant {{Fix this reluctant quantifier that will only ever match 1 repetition.}}
8+
re.match(r".{2,4}?x?", input) # Noncompliant {{Fix this reluctant quantifier that will only ever match 2 repetitions.}}
9+
re.match(r".*?$", input) # Noncompliant {{Remove the '?' from this unnecessarily reluctant quantifier.}}
10+
re.match(r".*?()$", input) # Noncompliant {{Remove the '?' from this unnecessarily reluctant quantifier.}}
11+
12+
13+
def compliant(input):
14+
re.match(r".*?x", input)
15+
re.match(r".*?x$", input)
16+
re.match(r".*?[abc]", input)

0 commit comments

Comments
 (0)