Skip to content

Commit d9ea782

Browse files
SONARPY-981 Rule S6328: Replacement strings should reference existing regular expression groups (#1112)
1 parent ab986be commit d9ea782

File tree

8 files changed

+205
-1
lines changed

8 files changed

+205
-1
lines changed

its/ruling/src/test/java/org/sonar/python/it/RulingHelper.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ static List<String> bugRuleKeys() {
9191
"S5996",
9292
"S6002",
9393
"S6323",
94+
"S6328",
9495
"S905",
9596
"S930"
9697
);

python-checks/src/main/java/org/sonar/python/checks/CheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.sonar.python.checks.regex.EmptyAlternativeCheck;
5656
import org.sonar.python.checks.regex.EmptyStringRepetitionCheck;
5757
import org.sonar.python.checks.regex.GraphemeClustersInClassesCheck;
58+
import org.sonar.python.checks.regex.GroupReplacementCheck;
5859
import org.sonar.python.checks.regex.ImpossibleBoundariesCheck;
5960
import org.sonar.python.checks.regex.InvalidRegexCheck;
6061
import org.sonar.python.checks.regex.MultipleWhitespaceCheck;
@@ -151,6 +152,7 @@ public static Iterable<Class> getChecks() {
151152
FunctionUsingLoopVariableCheck.class,
152153
GenericExceptionRaisedCheck.class,
153154
GraphemeClustersInClassesCheck.class,
155+
GroupReplacementCheck.class,
154156
HardCodedCredentialsCheck.class,
155157
HardcodedIPCheck.class,
156158
HashingDataCheck.class,
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2022 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import java.util.ArrayList;
23+
import java.util.Collections;
24+
import java.util.HashSet;
25+
import java.util.List;
26+
import java.util.Map;
27+
import java.util.Optional;
28+
import java.util.Set;
29+
import java.util.regex.Matcher;
30+
import java.util.regex.Pattern;
31+
import java.util.stream.Collectors;
32+
import org.sonar.check.Rule;
33+
import org.sonar.plugins.python.api.tree.CallExpression;
34+
import org.sonar.plugins.python.api.tree.RegularArgument;
35+
import org.sonar.plugins.python.api.tree.StringLiteral;
36+
import org.sonar.plugins.python.api.tree.Tree;
37+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
38+
import org.sonarsource.analyzer.commons.regex.ast.CapturingGroupTree;
39+
import org.sonarsource.analyzer.commons.regex.ast.RegexBaseVisitor;
40+
41+
import static org.sonar.python.tree.TreeUtils.nthArgumentOrKeyword;
42+
43+
@Rule(key = "S6328")
44+
public class GroupReplacementCheck extends AbstractRegexCheck {
45+
46+
private static final String MESSAGE = "Referencing non-existing group%s: %s.";
47+
private static final Pattern REFERENCE_PATTERN = Pattern.compile("\\\\(\\d+)|\\\\g<(\\d+)>");
48+
49+
@Override
50+
protected Map<String, Integer> lookedUpFunctions() {
51+
return Collections.singletonMap("re.sub", 4);
52+
}
53+
54+
@Override
55+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
56+
GroupFinder groupFinder = new GroupFinder();
57+
groupFinder.visit(regexParseResult);
58+
checkReplacement(regexFunctionCall, groupFinder.groups);
59+
}
60+
61+
private void checkReplacement(CallExpression tree, Set<CapturingGroupTree> groups) {
62+
RegularArgument regArg = nthArgumentOrKeyword(1, "replacement", tree.arguments());
63+
if (regArg == null) return;
64+
65+
if (regArg.expression().is(Tree.Kind.STRING_LITERAL)) {
66+
StringLiteral expression = (StringLiteral) regArg.expression();
67+
List<Integer> references = collectReferences(expression.trimmedQuotesValue());
68+
references.removeIf(reference -> groups.stream().anyMatch(group -> group.getGroupNumber() == reference));
69+
if (!references.isEmpty()) {
70+
List<String> stringReferences = references.stream().map(String::valueOf).collect(Collectors.toList());
71+
regexContext.addIssue(expression, String.format(MESSAGE, references.size() == 1 ? "" : "s", String.join(", ", stringReferences)));
72+
}
73+
}
74+
}
75+
76+
private static List<Integer> collectReferences(String replacement) {
77+
Matcher match = REFERENCE_PATTERN.matcher(replacement);
78+
List<Integer> references = new ArrayList<>();
79+
while (match.find()) {
80+
// extract reference number out of one of the possible 2 groups of the regex
81+
Optional.ofNullable(match.group(1)).map(Integer::valueOf).filter(ref -> ref != 0).ifPresent(references::add);
82+
Optional.ofNullable(match.group(2)).map(Integer::valueOf).filter(ref -> ref != 0).ifPresent(references::add);
83+
}
84+
return references;
85+
}
86+
87+
static class GroupFinder extends RegexBaseVisitor {
88+
89+
private final Set<CapturingGroupTree> groups = new HashSet<>();
90+
91+
@Override
92+
public void visitCapturingGroup(CapturingGroupTree group) {
93+
groups.add(group);
94+
super.visitCapturingGroup(group);
95+
}
96+
}
97+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<p>The regex function <code>re.sub</code> can be used to perform a search and replace based on regular expression matches. The <code>repl</code>
2+
parameter can contain references to capturing groups used in the <code>pattern</code> parameter. This can be achieved with <code>\n</code> to
3+
reference the n’th group.</p>
4+
<p>When referencing a nonexistent group an error will be thrown for Python &lt; 3.5 or replaced by an empty string for Python &gt;= 3.5.</p>
5+
<h2>Noncompliant Code Example</h2>
6+
<pre>
7+
re.sub(r"(a)(b)(c)", r"\1, \9, \3", "abc") # Noncompliant - result is an re.error: invalid group reference
8+
</pre>
9+
<h2>Compliant Solution</h2>
10+
<pre>
11+
re.sub(r"(a)(b)(c)", r"\1, \2, \3", "abc")
12+
</pre>
13+
<h2>See</h2>
14+
<ul>
15+
<li> <a href="https://docs.python.org/3.10/library/re.html#re.sub">re.sub</a> - Python Documentation </li>
16+
</ul>
17+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"title": "Replacement strings should reference existing regular expression groups",
3+
"type": "BUG",
4+
"status": "ready",
5+
"remediation": {
6+
"func": "Constant\/Issue",
7+
"constantCost": "10min"
8+
},
9+
"tags": [
10+
"regex"
11+
],
12+
"defaultSeverity": "Major",
13+
"ruleSpecification": "RSPEC-6328",
14+
"sqKey": "S6328",
15+
"scope": "Main",
16+
"quickfix": "unknown"
17+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,11 @@
151151
"S5996",
152152
"S6002",
153153
"S6019",
154+
"S6035",
154155
"S6323",
155156
"S6326",
157+
"S6328",
156158
"S6331",
157-
"S6035",
158159
"S6395",
159160
"S6396",
160161
"S6397"
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) 2011-2022 SonarSource SA
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 3 of the License, or (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public License
17+
* along with this program; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19+
*/
20+
package org.sonar.python.checks.regex;
21+
22+
import org.junit.Test;
23+
import org.sonar.python.checks.utils.PythonCheckVerifier;
24+
25+
public class GroupReplacementCheckTest {
26+
27+
@Test
28+
public void test() {
29+
PythonCheckVerifier.verify("src/test/resources/checks/regex/groupReplacementCheck.py", new GroupReplacementCheck());
30+
}
31+
32+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import re
2+
3+
4+
def non_compliant():
5+
input = "Bob is a Bird... Bob is a Plane... Bob is Superman!"
6+
7+
re.sub(r"(a)(b)(c)", r"\1, \9, \3", "abc") # Noncompliant
8+
# ^^^^^^^^^^^^^
9+
re.sub("(a)", r"\2", input) # Noncompliant {{Referencing non-existing group: 2.}}
10+
# ^^^^^
11+
re.sub("(a)", r'\g<2>', input) # Noncompliant
12+
re.sub("a", r"\g<1>", input) # Noncompliant
13+
re.sub("(?!a)", r"\g<1>", input) # Noncompliant
14+
re.sub("(a)", r'\2', input) # Noncompliant
15+
re.sub("(a)", r"\g<2> \2", input) # Noncompliant
16+
re.sub("(a)", r"\3 \g<2>", input) # Noncompliant {{Referencing non-existing groups: 3, 2.}}
17+
re.sub("(a)", r"\2 \1", input) # Noncompliant
18+
re.sub("(s)", r"\12 \1", input) # Noncompliant
19+
20+
21+
def compliant():
22+
input = "Bob is a Bird... Bob is a Plane... Bob is Superman!"
23+
re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE)
24+
25+
re.sub("(a)", r"\0c", input)
26+
re.sub("(a)", r"\g<0>c", input)
27+
re.sub("(a)", r'\1', input)
28+
re.sub("(a)", r"\g<1>ttt", input)
29+
re.sub("(a)(b)", r"\g<1>aaa \2bbb", input)
30+
re.sub("(a(n))", r"\1a \g<2>", input)
31+
32+
# coverage
33+
re.match("up", input)
34+
pattern = r"a"
35+
re.sub("(ab)", pattern, input)
36+
re.sub("(a)", r"[\?]", input)
37+
re.sub("(A)")

0 commit comments

Comments
 (0)