Skip to content

Commit 34b054f

Browse files
committed
Python: Add consistency checks
1 parent d658ef1 commit 34b054f

File tree

14 files changed

+535
-1
lines changed

14 files changed

+535
-1
lines changed

python/ql/test/library-tests/regex/Alternation.expected

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
| (?P<name>[\\w]+)\| | 0 | 16 | (?P<name>[\\w]+)\| | 16 | 16 | |
99
| (\\033\|~{) | 1 | 8 | \\033\|~{ | 1 | 5 | \\033 |
1010
| (\\033\|~{) | 1 | 8 | \\033\|~{ | 6 | 8 | ~{ |
11+
| \\+0 | 0 | 3 | \\+0 | 0 | 2 | \\+ |
12+
| \\+0 | 0 | 3 | \\+0 | 0 | 3 | \\+0 |
1113
| \\\|\\[\\][123]\|\\{\\} | 0 | 16 | \\\|\\[\\][123]\|\\{\\} | 0 | 11 | \\\|\\[\\][123] |
1214
| \\\|\\[\\][123]\|\\{\\} | 0 | 16 | \\\|\\[\\][123]\|\\{\\} | 12 | 16 | \\{\\} |
1315
| \|x | 0 | 2 | \|x | 0 | 0 | |
@@ -19,4 +21,4 @@
1921
| x\| | 0 | 2 | x\| | 0 | 1 | x |
2022
| x\| | 0 | 2 | x\| | 2 | 2 | |
2123
| x\|(?<!\\w)l | 0 | 10 | x\|(?<!\\w)l | 0 | 1 | x |
22-
| x\|(?<!\\w)l | 0 | 10 | x\|(?<!\\w)l | 2 | 10 | (?<!\\w)l |
24+
| x\|(?<!\\w)l | 0 | 10 | x\|(?<!\\w)l | 2 | 10 | (?<!\\w)l |

python/ql/test/library-tests/regex/Characters.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
| [^A-Z] | 2 | 3 |
5353
| [^A-Z] | 4 | 5 |
5454
| [^]] | 2 | 3 |
55+
| \\+0 | 0 | 2 |
56+
| \\+0 | 0 | 3 |
5557
| \\A[+-]?\\d+ | 0 | 2 |
5658
| \\A[+-]?\\d+ | 3 | 4 |
5759
| \\A[+-]?\\d+ | 4 | 5 |

python/ql/test/library-tests/regex/Consistency.expected

Whitespace-only changes.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/**
2+
* Flags regular expressions that are parsed ambigously
3+
*/
4+
5+
import python
6+
import semmle.python.regex
7+
8+
from string str, Location loc, int counter
9+
where
10+
counter = strictcount(Regex term | term.getLocation() = loc and term.getText() = str) and
11+
counter > 1
12+
select str, counter, loc

python/ql/test/library-tests/regex/FirstLast.expected

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@
4242
| [^A-Z] | last | 0 | 6 |
4343
| [^]] | first | 0 | 4 |
4444
| [^]] | last | 0 | 4 |
45+
| \\+0 | first | 0 | 2 |
46+
| \\+0 | first | 0 | 3 |
47+
| \\+0 | last | 0 | 2 |
48+
| \\+0 | last | 0 | 3 |
4549
| \\A[+-]?\\d+ | first | 0 | 2 |
4650
| \\A[+-]?\\d+ | last | 7 | 9 |
4751
| \\A[+-]?\\d+ | last | 7 | 10 |

python/ql/test/library-tests/regex/Regex.expected

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@
113113
| [^]] | char | 2 | 3 |
114114
| [^]] | char-set | 0 | 4 |
115115
| [^]] | sequence | 0 | 4 |
116+
| \\+0 | char | 0 | 2 |
117+
| \\+0 | char | 0 | 3 |
118+
| \\+0 | choice | 0 | 3 |
119+
| \\+0 | sequence | 0 | 2 |
120+
| \\+0 | sequence | 0 | 3 |
116121
| \\A[+-]?\\d+ | char | 0 | 2 |
117122
| \\A[+-]?\\d+ | char | 3 | 4 |
118123
| \\A[+-]?\\d+ | char | 4 | 5 |

python/ql/test/library-tests/regex/test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,6 @@
7070
# FP reported in https://github.com/github/codeql/issues/3712
7171
# This does not define a regex (but could be used by other code to do so)
7272
escaped = re.escape("https://www.humblebundle.com/home/library")
73+
74+
# Consistency check
75+
baz = re.compile(r'\+0')
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
| \\+0 | 2 | test.py:2:18:2:23 | test.py:2 |
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/**
2+
* Flags regular expressions that are parsed ambigously
3+
*/
4+
5+
import python
6+
import semmle.python.RegexTreeView
7+
8+
from string str, int counter, Location loc
9+
where
10+
counter =
11+
strictcount(RegExpTerm term |
12+
term.getLocation() = loc and term.isRootTerm() and term.toString() = str
13+
) and
14+
counter > 1
15+
select str, counter, loc
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import re
2+
3+
# linear
4+
# https://github.com/github/codeql-python-CVE-coverage/issues/439
5+
rex_blame = re.compile(r'\s*(\d+)\s*(\S+) (.*)')
6+
7+
# https://github.com/github/codeql-python-CVE-coverage/issues/402
8+
whitespace = br"[\000\011\012\014\015\040]"
9+
whitespace_optional = whitespace + b"*"
10+
newline_only = br"[\r\n]+"
11+
newline = whitespace_optional + newline_only + whitespace_optional
12+
toFlag = re.compile(newline)
13+
14+
# https://github.com/github/codeql-python-CVE-coverage/issues/400
15+
re.compile(r'[+-]?(\d+)*\.\d+%?')
16+
re.compile(r'"""\s+(?:.|\n)*?\s+"""')
17+
re.compile(r'(\{\s+)(\S+)(\s+[^}]+\s+\}\s)')
18+
re.compile(r'".*``.*``.*"')
19+
re.compile(r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)')
20+
re.compile(r'(%config)(\s*\(\s*)(\w+)(\s*=\s*)(.*?)(\s*\)\s*)')
21+
re.compile(r'(%new)(\s*)(\()(\s*.*?\s*)(\))')
22+
re.compile(r'(\$)(evoque|overlay)(\{(%)?)(\s*[#\w\-"\'.]+[^=,%}]+?)?')
23+
re.compile(r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)')
24+
25+
# linear
26+
# https://github.com/github/codeql-python-CVE-coverage/issues/392
27+
simple_email_re = re.compile(r"^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$")
28+
29+
# https://github.com/github/codeql-python-CVE-coverage/issues/249
30+
rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
31+
'realm=(["\']?)([^"\']*)\\2', re.I)
32+
33+
# https://github.com/github/codeql-python-CVE-coverage/issues/248
34+
gauntlet = re.compile(
35+
r"""^([-/:,#%.'"\s!\w]|\w-\w|'[\s\w]+'\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$""",
36+
flags=re.U
37+
)
38+
39+
# https://github.com/github/codeql-python-CVE-coverage/issues/227
40+
# from .compat import tobytes
41+
42+
WS = "[ \t]"
43+
OWS = WS + "{0,}?"
44+
45+
# RFC 7230 Section 3.2.6 "Field Value Components":
46+
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
47+
# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
48+
# / DIGIT / ALPHA
49+
# obs-text = %x80-FF
50+
TCHAR = r"[!#$%&'*+\-.^_`|~0-9A-Za-z]"
51+
OBS_TEXT = r"\x80-\xff"
52+
TOKEN = TCHAR + "{1,}"
53+
# RFC 5234 Appendix B.1 "Core Rules":
54+
# VCHAR = %x21-7E
55+
# ; visible (printing) characters
56+
VCHAR = r"\x21-\x7e"
57+
# header-field = field-name ":" OWS field-value OWS
58+
# field-name = token
59+
# field-value = *( field-content / obs-fold )
60+
# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
61+
# field-vchar = VCHAR / obs-text
62+
# Errata from: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
63+
# changes field-content to:
64+
#
65+
# field-content = field-vchar [ 1*( SP / HTAB / field-vchar )
66+
# field-vchar ]
67+
68+
FIELD_VCHAR = "[" + VCHAR + OBS_TEXT + "]"
69+
FIELD_CONTENT = FIELD_VCHAR + "([ \t" + VCHAR + OBS_TEXT + "]+" + FIELD_VCHAR + "){,1}"
70+
FIELD_VALUE = "(" + FIELD_CONTENT + "){0,}"
71+
72+
HEADER_FIELD = re.compile(
73+
# tobytes(
74+
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
75+
# )
76+
)
77+
78+
# https://github.com/github/codeql-python-CVE-coverage/issues/224
79+
pattern = re.compile(
80+
r'^(:?(([a-zA-Z]{1})|([a-zA-Z]{1}[a-zA-Z]{1})|' # domain pt.1
81+
r'([a-zA-Z]{1}[0-9]{1})|([0-9]{1}[a-zA-Z]{1})|' # domain pt.2
82+
r'([a-zA-Z0-9][-_a-zA-Z0-9]{0,61}[a-zA-Z0-9]))\.)+' # domain pt.3
83+
r'([a-zA-Z]{2,13}|(xn--[a-zA-Z0-9]{2,30}))$' # TLD
84+
)
85+
86+
# https://github.com/github/codeql-python-CVE-coverage/issues/189
87+
URL_REGEX = (
88+
r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|'
89+
r'[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|'
90+
r'(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|'
91+
r'[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))' # "emacs!
92+
)
93+
94+
url = re.compile(URL_REGEX)

0 commit comments

Comments
 (0)