Skip to content

Commit 52d9191

Browse files
committed
Merge branch 'python-port-ReDoS' of github.com:yoff/codeql into python-port-ReDoS
2 parents 09e71cf + c19522e commit 52d9191

File tree

4 files changed

+65
-21
lines changed

4 files changed

+65
-21
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
lgtm,codescanning
2-
* Ported _Inefficient regular expression_ (`py/redos`) query from javascript.
3-
* Ported _Polynomial regular expression used on uncontrolled data_ [`py/polynomial-redos`] query from javascript.
2+
* Added _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
3+
* Added _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.

python/ql/src/semmle/python/regex.qll

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -130,36 +130,57 @@ abstract class RegexString extends Expr {
130130
/** result is true for those start chars that actually mark a start of a char set. */
131131
boolean char_set_start(int pos) {
132132
exists(int index |
133-
char_set_delimiter(index, pos) = true and
133+
// is opening bracket
134+
this.char_set_delimiter(index, pos) = true and
134135
(
135-
index = 1 and result = true // if a '[' is first in the string (among brackets), it starts a char set
136+
// if this is the first bracket, `pos` starts a char set
137+
index = 1 and result = true
136138
or
139+
// if the previous char set delimiter was not a closing bracket, `pos` does
140+
// not start a char set. This is needed to handle cases such as `[[]` (a
141+
// char set that matches the `[` char)
137142
index > 1 and
138-
not char_set_delimiter(index - 1, _) = false and
143+
not this.char_set_delimiter(index - 1, _) = false and
139144
result = false
140145
or
141-
exists(int p1 |
142-
char_set_delimiter(index - 1, p1) = false and // if it is preceded by a closing bracket, it starts a char set
146+
// special handling of cases such as `[][]` (the character-set of the characters `]` and `[`).
147+
exists(int prev_closing_bracket_pos |
148+
// previous bracket is a closing bracket
149+
this.char_set_delimiter(index - 1, prev_closing_bracket_pos) = false and
143150
if
144-
exists(int p2 |
145-
p1 = p2 + 1
146-
or
147-
this.getChar(p2 + 1) = "^" and
148-
p1 = p2 + 2
151+
// check if the character that comes before the previous closing bracket
152+
// is an opening bracket (taking `^` into account)
153+
exists(int pos_before_prev_closing_bracket |
154+
if this.getChar(prev_closing_bracket_pos - 1) = "^"
155+
then pos_before_prev_closing_bracket = prev_closing_bracket_pos - 2
156+
else pos_before_prev_closing_bracket = prev_closing_bracket_pos - 1
149157
|
150-
char_set_delimiter(index - 2, p2) = true // but the closing bracket only closes...
158+
this.char_set_delimiter(index - 2, pos_before_prev_closing_bracket) = true
151159
)
152160
then
153-
exists(int p2 | char_set_delimiter(index - 2, p2) = true |
154-
result = char_set_start(p2).booleanNot() // ...if it is not the first in a char set
161+
// brackets without anything in between is not valid character ranges, so
162+
// the first closing bracket in `[]]` and `[^]]` does not count,
163+
//
164+
// and we should _not_ mark the second opening bracket in `[][]` and `[^][]`
165+
// as starting a new char set. ^ ^
166+
exists(int pos_before_prev_closing_bracket |
167+
this.char_set_delimiter(index - 2, pos_before_prev_closing_bracket) = true
168+
|
169+
result = this.char_set_start(pos_before_prev_closing_bracket).booleanNot()
155170
)
156-
else result = true
171+
else
172+
// if not, `pos` does in fact mark a real start of a character range
173+
result = true
157174
)
158175
)
159176
)
160177
}
161178

162-
/** result denotes if the index is a left bracket */
179+
/**
180+
* Helper predicate for chars that could be character-set delimiters.
181+
* Holds if the (non-escaped) char at `pos` in the string, is the (one-based) `index` occurrence of a bracket (`[` or `]`) in the string.
182+
* Result if `true` is the char is `[`, and `false` if the char is `]`.
183+
*/
163184
boolean char_set_delimiter(int index, int pos) {
164185
pos = rank[index](int p | this.nonEscapedCharAt(p) = "[" or this.nonEscapedCharAt(p) = "]") and
165186
(

python/ql/test/library-tests/regex/charRangeTest.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,26 @@
11
import re
22

3-
re.compile(r'[]-[]') #$ charRange=1:2-3:4
3+
4+
re.compile(r'[A-Z]') #$ charRange=1:2-3:4
5+
6+
try:
7+
re.compile(r'[]-[]') #$ SPURIOUS: charRange=1:2-3:4
8+
raise Exception("this should not be reached")
9+
except re.error:
10+
pass
11+
412
re.compile(r'[---]') #$ charRange=1:2-3:4
513
re.compile(r'[\---]') #$ charRange=1:3-4:5
614
re.compile(r'[--\-]') #$ charRange=1:2-3:5
715
re.compile(r'[\--\-]') #$ charRange=1:3-4:6
816
re.compile(r'[0-9-A-Z]') #$ charRange=1:2-3:4 charRange=5:6-7:8
917
re.compile(r'[0\-9-A-Z]') #$ charRange=4:5-6:7
10-
re.compile(r'[0--9-A-Z]') #$ charRange=1:2-3:4 charRange=4:5-6:7
18+
19+
try:
20+
re.compile(r'[0--9-A-Z]') #$ SPURIOUS: charRange=1:2-3:4 charRange=4:5-6:7
21+
raise Exception("this should not be reached")
22+
except re.error:
23+
pass
1124

1225
re.compile(r'[^A-Z]') #$ charRange=2:3-4:5
1326

python/ql/test/library-tests/regex/charSetTest.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,18 @@
1010
re.compile("[[]]") #$ charSet=0:3
1111
re.compile("[^]]") #$ charSet=0:4
1212
re.compile("[^-]") #$ charSet=0:4
13-
re.compile("[]-[]") #$ charSet=0:5
14-
re.compile("[^]-[]") #$ charSet=0:6
13+
14+
try:
15+
re.compile("[]-[]") #$ SPURIOUS: charSet=0:5
16+
raise Exception("this should not be reached")
17+
except re.error:
18+
pass
19+
20+
try:
21+
re.compile("[^]-[]") #$ SPURIOUS: charSet=0:6
22+
raise Exception("this should not be reached")
23+
except re.error:
24+
pass
1525

1626
re.compile("]]][[[[]") #$ charSet=3:8
1727

0 commit comments

Comments
 (0)