Skip to content

Commit 213aece

Browse files
SONARPY-928 Nested character classes are not supported by Python (#984)
1 parent 43295e2 commit 213aece

File tree

4 files changed

+14
-28
lines changed

4 files changed

+14
-28
lines changed
Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,8 @@
11
{
2-
'project:biopython/Bio/motifs/pfm.py':[
3-
338,
4-
],
5-
'project:mypy-0.782/test-data/stdlib-samples/3.2/glob.py':[
6-
76,
7-
77,
8-
],
9-
'project:numpy-1.16.4/numpy/distutils/mingw32ccompiler.py':[
10-
53,
11-
],
12-
'project:tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py':[
13-
75,
14-
],
152
'project:tornado-2.3/demos/appengine/markdown.py':[
16-
826,
3+
822,
174
],
185
'project:tornado-2.3/demos/blog/markdown.py':[
19-
826,
6+
822,
207
],
218
}

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
<mockito.version>3.9.0</mockito.version>
9292
<sonar.version>8.9.0.43852</sonar.version>
9393
<sonar.orchestrator.version>3.35.1.2719</sonar.orchestrator.version>
94-
<sonar-analyzer-commons.version>1.21.0.821</sonar-analyzer-commons.version>
94+
<sonar-analyzer-commons.version>1.21.0.829</sonar-analyzer-commons.version>
9595
<sonarlint-core.version>6.0.0.32513</sonarlint-core.version>
9696
<sslr.version>1.23</sslr.version>
9797
<protobuf.version>3.17.3</protobuf.version>

python-checks/src/test/resources/checks/regex/duplicatesInCharacterClassCheck.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,6 @@ def non_compliant(input):
3131
re.match(r"[\"\".]", input) # Noncompliant
3232
re.match(r"[\x{F600}-\x{F637}\x{F608}]", input) # Noncompliant
3333
re.match(r"[\Qxx\E]", input) # Noncompliant
34-
re.match(r"[[a][a]]", input) # Noncompliant
35-
re.match(r"[[abc][b]]", input) # Noncompliant
36-
re.match(r"[[^a]b]", input) # Noncompliant
37-
re.match(r"[[^a]z]", input) # Noncompliant
38-
re.match(r"[a[^z]]", input) # Noncompliant
39-
re.match(r"[z[^a]]", input) # Noncompliant
4034
re.match(r"[\s\Sx]", input) # Noncompliant
4135
re.match(r"(?U)[\s\Sx]", input) # Noncompliant
4236
re.match(r"[\w\d]", input) # Noncompliant
@@ -54,14 +48,15 @@ def non_compliant(input):
5448
re.match(r"(?i)[äÄ]", input) # Noncompliant
5549
re.match(r"(?i)[Ä-Üä]", input) # Noncompliant
5650
re.match(r"(?i)[a-Öö]", input) # Noncompliant
51+
re.match(r"[[^\s\S]x]", input) # Noncompliant
52+
re.match(r"(?U)[[^\W]a]", input) # Noncompliant
5753

5854

5955
def compliant(input):
6056
re.match(r"a-z\d", input)
6157
re.match(r"[0-9][0-9]?", input)
6258
re.match(r"[xX]", input)
6359
re.match(r"[\s\S]", input)
64-
re.match(r"[[^\s\S]x]", input)
6560
re.match(r"(?U)[\s\S]", input)
6661
re.match(r"(?U)[\S\u0085\u2028\u2029]", input)
6762
re.match(r"[\d\D]", input)
@@ -85,8 +80,6 @@ def compliant(input):
8580
re.match(r"[z-a9-0]", input) # Illegal character class should not make the check explode
8681
re.match(r"[aa", input) # Check should not run on syntactically invalid regexen
8782
re.match(r"(?U)[\wä]", input) # False negative because we don't support Unicode characters in \w and \W
88-
re.match(r"(?U)[[^\W]a]", input) # False negative because once we negate a character class whose contents we don't
89-
# fully understand, we ignore it to avoid false positives
9083
re.match(r"[[a-z&&b-e]c]", input) # FN because we don't support intersections
9184
re.match(r"(?i)[A-_d-{]", input) # FN because we ignore case insensitivity unless both ends of the ranges are letters
9285
re.match(r"(?i)[A-z_]", input) # FN because A-z gets misinterpreted as A-Za-z due to the way we handle case insensitivity

python-checks/src/test/resources/checks/regex/invalidRegexCheck.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ def unsupported_feature(input):
2525

2626

2727
def false_positives():
28-
re.compile(r"\s*([ACGT])\s*[[]*[|]*\s*([0-9.\s]+)\s*[]]*\s*") # Noncompliant
29-
re.compile(r'^\s+\[([\s*[0-9]*)\] ([a-zA-Z0-9_]*)') # Noncompliant
30-
re.compile(r'([^,[\]]*)(\[([^\]]+)\])?$') # Noncompliant
28+
re.compile(r'''
29+
# Match tail of: [text][id]
30+
[ ]? # one optional space
31+
(?:\n[ ]*)? # one optional newline followed by spaces
32+
\[
33+
(?P<id>.*?)
34+
\]
35+
''', re.X | re.S)
36+
# Noncompliant@-5

0 commit comments

Comments
 (0)