Skip to content

Commit 5a2ef83

Browse files
authored
Merge pull request #7120 from github/nickrolfe/regexp_g_anchor
Ruby/Python: parse anchors in regexes as special characters
2 parents 4128f56 + 0541576 commit 5a2ef83

File tree

14 files changed

+200
-110
lines changed

14 files changed

+200
-110
lines changed

python/ql/lib/semmle/python/RegexTreeView.qll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -539,8 +539,8 @@ private int toHex(string hex) {
539539
/**
540540
* A word boundary, that is, a regular expression term of the form `\b`.
541541
*/
542-
class RegExpWordBoundary extends RegExpEscape {
543-
RegExpWordBoundary() { this.getUnescaped() = "b" }
542+
class RegExpWordBoundary extends RegExpSpecialChar {
543+
RegExpWordBoundary() { this.getChar() = "\\b" }
544544
}
545545

546546
/**
@@ -809,7 +809,7 @@ class RegExpDot extends RegExpSpecialChar {
809809
}
810810

811811
/**
812-
* A dollar assertion `$` matching the end of a line.
812+
* A dollar assertion `$` or `\Z` matching the end of a line.
813813
*
814814
* Example:
815815
*
@@ -818,13 +818,13 @@ class RegExpDot extends RegExpSpecialChar {
818818
* ```
819819
*/
820820
class RegExpDollar extends RegExpSpecialChar {
821-
RegExpDollar() { this.getChar() = "$" }
821+
RegExpDollar() { this.getChar() = ["$", "\\Z"] }
822822

823823
override string getPrimaryQLClass() { result = "RegExpDollar" }
824824
}
825825

826826
/**
827-
* A caret assertion `^` matching the beginning of a line.
827+
* A caret assertion `^` or `\A` matching the beginning of a line.
828828
*
829829
* Example:
830830
*
@@ -833,7 +833,7 @@ class RegExpDollar extends RegExpSpecialChar {
833833
* ```
834834
*/
835835
class RegExpCaret extends RegExpSpecialChar {
836-
RegExpCaret() { this.getChar() = "^" }
836+
RegExpCaret() { this.getChar() = ["^", "\\A"] }
837837

838838
override string getPrimaryQLClass() { result = "RegExpCaret" }
839839
}

python/ql/lib/semmle/python/regex.qll

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -437,11 +437,18 @@ abstract class RegexString extends Expr {
437437
}
438438

439439
predicate specialCharacter(int start, int end, string char) {
440+
not this.inCharSet(start) and
440441
this.character(start, end) and
441-
end = start + 1 and
442-
char = this.getChar(start) and
443-
(char = "$" or char = "^" or char = ".") and
444-
not this.inCharSet(start)
442+
(
443+
end = start + 1 and
444+
char = this.getChar(start) and
445+
(char = "$" or char = "^" or char = ".")
446+
or
447+
end = start + 2 and
448+
this.escapingChar(start) and
449+
char = this.getText().substring(start, end) and
450+
char = ["\\A", "\\Z", "\\b", "\\B"]
451+
)
445452
}
446453

447454
/** Whether the text in the range start,end is a group */
@@ -901,7 +908,8 @@ abstract class RegexString extends Expr {
901908
exists(int x | this.firstPart(x, end) |
902909
this.emptyMatchAtStartGroup(x, start) or
903910
this.qualifiedItem(x, start, true, _) or
904-
this.specialCharacter(x, start, "^")
911+
// ^ and \A match the start of the string
912+
this.specialCharacter(x, start, ["^", "\\A"])
905913
)
906914
or
907915
exists(int y | this.firstPart(start, y) |
@@ -926,9 +934,8 @@ abstract class RegexString extends Expr {
926934
or
927935
this.qualifiedItem(end, y, true, _)
928936
or
929-
this.specialCharacter(end, y, "$")
930-
or
931-
y = end + 2 and this.escapingChar(end) and this.getChar(end + 1) = "Z"
937+
// $ and \Z match the end of the string.
938+
this.specialCharacter(end, y, ["$", "\\Z"])
932939
)
933940
or
934941
exists(int x |

python/ql/test/library-tests/regex/Characters.expected

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@
5858
| \\A[+-]?\\d+ | 3 | 4 |
5959
| \\A[+-]?\\d+ | 4 | 5 |
6060
| \\A[+-]?\\d+ | 7 | 9 |
61+
| \\Afoo\\Z | 0 | 2 |
62+
| \\Afoo\\Z | 2 | 3 |
63+
| \\Afoo\\Z | 3 | 4 |
64+
| \\Afoo\\Z | 4 | 5 |
65+
| \\Afoo\\Z | 5 | 7 |
6166
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 0 | 2 |
6267
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 12 | 13 |
6368
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 16 | 18 |
@@ -71,6 +76,11 @@
7176
| \\\|\\[\\][123]\|\\{\\} | 9 | 10 |
7277
| \\\|\\[\\][123]\|\\{\\} | 12 | 14 |
7378
| \\\|\\[\\][123]\|\\{\\} | 14 | 16 |
79+
| \\bfoo\\B | 0 | 2 |
80+
| \\bfoo\\B | 2 | 3 |
81+
| \\bfoo\\B | 3 | 4 |
82+
| \\bfoo\\B | 4 | 5 |
83+
| \\bfoo\\B | 5 | 7 |
7484
| \|x | 1 | 2 |
7585
| ^(^y\|^z)(u$\|v$)$ | 0 | 1 |
7686
| ^(^y\|^z)(u$\|v$)$ | 2 | 3 |

python/ql/test/library-tests/regex/FirstLast.expected

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,25 @@
4545
| \\+0 | first | 0 | 2 |
4646
| \\+0 | last | 2 | 3 |
4747
| \\A[+-]?\\d+ | first | 0 | 2 |
48+
| \\A[+-]?\\d+ | first | 2 | 6 |
49+
| \\A[+-]?\\d+ | first | 2 | 7 |
50+
| \\A[+-]?\\d+ | first | 7 | 9 |
51+
| \\A[+-]?\\d+ | first | 7 | 10 |
4852
| \\A[+-]?\\d+ | last | 7 | 9 |
4953
| \\A[+-]?\\d+ | last | 7 | 10 |
54+
| \\Afoo\\Z | first | 0 | 2 |
55+
| \\Afoo\\Z | first | 2 | 3 |
56+
| \\Afoo\\Z | last | 4 | 5 |
57+
| \\Afoo\\Z | last | 5 | 7 |
5058
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | first | 0 | 2 |
5159
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | last | 28 | 32 |
5260
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | last | 28 | 33 |
5361
| \\\|\\[\\][123]\|\\{\\} | first | 0 | 2 |
5462
| \\\|\\[\\][123]\|\\{\\} | first | 12 | 14 |
5563
| \\\|\\[\\][123]\|\\{\\} | last | 6 | 11 |
5664
| \\\|\\[\\][123]\|\\{\\} | last | 14 | 16 |
65+
| \\bfoo\\B | first | 0 | 2 |
66+
| \\bfoo\\B | last | 5 | 7 |
5767
| \|x | first | 1 | 2 |
5868
| \|x | last | 1 | 2 |
5969
| ^(^y\|^z)(u$\|v$)$ | first | 0 | 1 |

python/ql/test/library-tests/regex/Regex.expected

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,20 @@
116116
| \\+0 | char | 0 | 2 |
117117
| \\+0 | char | 2 | 3 |
118118
| \\+0 | sequence | 0 | 3 |
119-
| \\A[+-]?\\d+ | char | 0 | 2 |
119+
| \\A[+-]?\\d+ | \\A | 0 | 2 |
120120
| \\A[+-]?\\d+ | char | 3 | 4 |
121121
| \\A[+-]?\\d+ | char | 4 | 5 |
122122
| \\A[+-]?\\d+ | char | 7 | 9 |
123123
| \\A[+-]?\\d+ | char-set | 2 | 6 |
124124
| \\A[+-]?\\d+ | qualified | 2 | 7 |
125125
| \\A[+-]?\\d+ | qualified | 7 | 10 |
126126
| \\A[+-]?\\d+ | sequence | 0 | 10 |
127+
| \\Afoo\\Z | \\A | 0 | 2 |
128+
| \\Afoo\\Z | \\Z | 5 | 7 |
129+
| \\Afoo\\Z | char | 2 | 3 |
130+
| \\Afoo\\Z | char | 3 | 4 |
131+
| \\Afoo\\Z | char | 4 | 5 |
132+
| \\Afoo\\Z | sequence | 0 | 7 |
127133
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 0 | 2 |
128134
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 12 | 13 |
129135
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 16 | 18 |
@@ -148,6 +154,12 @@
148154
| \\\|\\[\\][123]\|\\{\\} | choice | 0 | 16 |
149155
| \\\|\\[\\][123]\|\\{\\} | sequence | 0 | 11 |
150156
| \\\|\\[\\][123]\|\\{\\} | sequence | 12 | 16 |
157+
| \\bfoo\\B | \\B | 5 | 7 |
158+
| \\bfoo\\B | \\b | 0 | 2 |
159+
| \\bfoo\\B | char | 2 | 3 |
160+
| \\bfoo\\B | char | 3 | 4 |
161+
| \\bfoo\\B | char | 4 | 5 |
162+
| \\bfoo\\B | sequence | 0 | 7 |
151163
| \|x | char | 1 | 2 |
152164
| \|x | choice | 0 | 2 |
153165
| \|x | sequence | 1 | 2 |

python/ql/test/library-tests/regex/test.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,7 @@
7373

7474
# Consistency check
7575
baz = re.compile(r'\+0')
76+
77+
# Anchors
78+
re.compile(r'\Afoo\Z')
79+
re.compile(r'\bfoo\B')

python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,5 +100,8 @@
100100
| redos.py:371:25:371:35 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
101101
| redos.py:380:35:380:41 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings starting with '/' and containing many repetitions of '!'. |
102102
| redos.py:381:35:381:41 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings starting with '/' and containing many repetitions of '!'. |
103+
| redos.py:384:26:384:32 | (\\d\|0)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
104+
| redos.py:385:24:385:30 | (\\d\|0)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
105+
| redos.py:386:26:386:32 | (\\d\|0)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
103106
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. |
104107
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |

python/ql/test/query-tests/Security/CWE-730-ReDoS/redos.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,4 +378,9 @@
378378

379379
# BAD
380380
bad88 = re.compile(r'/("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)X')
381-
bad89 = re.compile(r'/("[^"]*?"|[^"\s]+)+(?=X)')
381+
bad89 = re.compile(r'/("[^"]*?"|[^"\s]+)+(?=X)')
382+
383+
# BAD
384+
bad90 = re.compile(r'\A(\d|0)*x')
385+
bad91 = re.compile(r'(\d|0)*\Z')
386+
bad92 = re.compile(r'\b(\d|0)*x')

ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ class RegExp extends AST::RegExpLiteral {
397397
end = start + 2 and
398398
this.escapingChar(start) and
399399
char = this.getText().substring(start, end) and
400-
char = ["\\A", "\\Z", "\\z"]
400+
char = ["\\A", "\\Z", "\\z", "\\G", "\\b", "\\B"]
401401
)
402402
}
403403

ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -462,8 +462,8 @@ private int toHex(string hex) {
462462
/**
463463
* A word boundary, that is, a regular expression term of the form `\b`.
464464
*/
465-
class RegExpWordBoundary extends RegExpEscape {
466-
RegExpWordBoundary() { this.getUnescaped() = "b" }
465+
class RegExpWordBoundary extends RegExpSpecialChar {
466+
RegExpWordBoundary() { this.getChar() = "\\b" }
467467
}
468468

469469
/**

0 commit comments

Comments
 (0)