Skip to content

Commit ae4c76c

Browse files
authored
Merge pull request github#13975 from yoff/python/parsemodechars-not-chars
2 parents dd27442 + 68cd422 commit ae4c76c

File tree

9 files changed

+42
-17
lines changed

9 files changed

+42
-17
lines changed

python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -683,24 +683,43 @@ class RegExp extends Expr instanceof StrConst {
683683
* Holds if a parse mode starts between `start` and `end`.
684684
*/
685685
private predicate flag_group_start(int start, int end) {
686+
this.flag_group_start_no_modes(start, _) and
687+
end = max(int i | this.mode_character(start, i) | i + 1)
688+
}
689+
690+
/**
691+
* Holds if the initial part of a parse mode, not containing any
692+
* mode characters is between `start` and `end`.
693+
*/
694+
private predicate flag_group_start_no_modes(int start, int end) {
686695
this.isGroupStart(start) and
687696
this.getChar(start + 1) = "?" and
688697
this.getChar(start + 2) in ["i", "L", "m", "s", "u", "x"] and
689698
end = start + 2
690699
}
691700

692701
/**
693-
* Holds if a parse mode group is between `start` and `end`, and includes the
694-
* mode flag `c`. For example the following span, with mode flag `i`:
702+
* Holds if `pos` contains a mo character from the
703+
* flag group starting at `start`.
704+
*/
705+
private predicate mode_character(int start, int pos) {
706+
this.flag_group_start_no_modes(start, pos)
707+
or
708+
this.mode_character(start, pos - 1) and
709+
this.getChar(pos) in ["i", "L", "m", "s", "u", "x"]
710+
}
711+
712+
/**
713+
* Holds if a parse mode group includes the mode flag `c`.
714+
* For example the following parse mode group, with mode flag `i`:
695715
* ```
696716
* (?i)
697717
* ```
698718
*/
699-
private predicate flag_group(int start, int end, string c) {
700-
exists(int inStart, int inEnd |
701-
this.flag_group_start(start, inStart) and
702-
this.groupContents(start, end, inStart, inEnd) and
703-
this.getChar([inStart .. inEnd - 1]) = c
719+
private predicate flag(string c) {
720+
exists(int pos |
721+
this.mode_character(_, pos) and
722+
this.getChar(pos) = c
704723
)
705724
}
706725

@@ -709,7 +728,7 @@ class RegExp extends Expr instanceof StrConst {
709728
* it is defined by a prefix.
710729
*/
711730
string getModeFromPrefix() {
712-
exists(string c | this.flag_group(_, _, c) |
731+
exists(string c | this.flag(c) |
713732
c = "i" and result = "IGNORECASE"
714733
or
715734
c = "L" and result = "LOCALE"

python/ql/test/library-tests/regex/Characters.expected

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 22 | 23 |
3737
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 24 | 25 |
3838
| (?P<name>[\\w]+)\| | 10 | 12 |
39-
| (?m)^(?!$) | 2 | 3 |
4039
| (?m)^(?!$) | 4 | 5 |
4140
| (?m)^(?!$) | 8 | 9 |
4241
| (\\033\|~{) | 1 | 5 |

python/ql/test/library-tests/regex/FirstLast.expected

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
| (?P<name>[\\w]+)\| | first | 9 | 14 |
2323
| (?P<name>[\\w]+)\| | last | 9 | 13 |
2424
| (?P<name>[\\w]+)\| | last | 9 | 14 |
25-
| (?m)^(?!$) | first | 2 | 3 |
25+
| (?m)^(?!$) | first | 4 | 5 |
26+
| (?m)^(?!$) | first | 8 | 9 |
2627
| (?m)^(?!$) | last | 4 | 5 |
2728
| (?m)^(?!$) | last | 8 | 9 |
2829
| (\\033\|~{) | first | 1 | 5 |

python/ql/test/library-tests/regex/GroupContents.expected

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 10 | (?:[^%]\|^) | 3 | 9 | [^%]\|^ |
99
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 14 | 19 | (\\w*) | 15 | 18 | \\w* |
1010
| (?P<name>[\\w]+)\| | 0 | 15 | (?P<name>[\\w]+) | 9 | 14 | [\\w]+ |
11-
| (?m)^(?!$) | 0 | 4 | (?m) | 2 | 3 | m |
1211
| (?m)^(?!$) | 5 | 10 | (?!$) | 8 | 9 | $ |
1312
| (\\033\|~{) | 0 | 9 | (\\033\|~{) | 1 | 8 | \\033\|~{ |
1413
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 2 | 16 | (?P<txt>[^[]*) | 10 | 15 | [^[]* |

python/ql/test/library-tests/regex/Regex.expected

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,9 @@
7777
| (?P<name>[\\w]+)\| | sequence | 0 | 15 |
7878
| (?m)^(?!$) | $ | 8 | 9 |
7979
| (?m)^(?!$) | ^ | 4 | 5 |
80-
| (?m)^(?!$) | char | 2 | 3 |
80+
| (?m)^(?!$) | empty group | 0 | 4 |
8181
| (?m)^(?!$) | empty group | 5 | 10 |
82-
| (?m)^(?!$) | non-empty group | 0 | 4 |
8382
| (?m)^(?!$) | sequence | 0 | 10 |
84-
| (?m)^(?!$) | sequence | 2 | 3 |
8583
| (?m)^(?!$) | sequence | 8 | 9 |
8684
| (\\033\|~{) | char | 1 | 5 |
8785
| (\\033\|~{) | char | 6 | 7 |

python/ql/test/query-tests/Security/CWE-116-BadTagFilter/BadTagFilter.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
| tst.py:4:20:4:43 | <script.*?>.*?<\\/script> | This regular expression does not match script end tags like </script >. |
22
| tst.py:5:20:5:43 | <script.*?>.*?<\\/script> | This regular expression does not match script end tags like </script >. |
33
| tst.py:9:20:9:30 | <!--.*--!?> | This regular expression does not match comments containing newlines. |
4+
| tst.py:11:20:11:34 | (?i)<!--.*--!?> | This regular expression does not match comments containing newlines. |
45
| tst.py:12:20:12:53 | <script.*?>(.\|\\s)*?<\\/script[^>]*> | This regular expression matches <script></script>, but not <script \\n></script> |
56
| tst.py:13:20:13:51 | <script[^>]*?>.*?<\\/script[^>]*> | This regular expression matches <script>...</script>, but not <script >...\\n</script> |
67
| tst.py:14:20:14:58 | <script(\\s\|\\w\|=\|")*?>.*?<\\/script[^>]*> | This regular expression does not match script tags where the attribute uses single-quotes. |

python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
| redos.py:21:57:21:76 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t'' and containing many repetitions of '\\\\\\\\'. |
88
| redos.py:21:81:21:100 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t(' and containing many repetitions of '\\\\\\\\'. |
99
| redos.py:33:64:33:65 | .* | This part of the regular expression may cause exponential backtracking on strings starting with '!\|\\n-\|\\n' and containing many repetitions of '\|\|\\n'. |
10-
| redos.py:38:33:38:42 | (\\\\\\/\|.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\/'. |
10+
| redos.py:38:33:38:42 | (\\\\\\/\|.)*? | This part of the regular expression may cause exponential backtracking on strings starting with '/' and containing many repetitions of '\\\\/'. |
1111
| redos.py:43:37:43:38 | .* | This part of the regular expression may cause exponential backtracking on strings starting with '#' and containing many repetitions of '#'. |
1212
| redos.py:49:41:49:43 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '"' and containing many repetitions of '""'. |
1313
| redos.py:49:47:49:49 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with ''' and containing many repetitions of ''''. |
@@ -105,5 +105,6 @@
105105
| redos.py:391:15:391:25 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
106106
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\\u00c6'. |
107107
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
108-
| unittests.py:11:20:11:28 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 's' and containing many repetitions of '\\n'. |
109-
| unittests.py:12:21:12:29 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 'is' and containing many repetitions of '\\n'. |
108+
| unittests.py:11:20:11:28 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
109+
| unittests.py:12:21:12:29 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
110+
| unittests.py:13:22:13:30 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 'x' and containing many repetitions of '\\n'. |

python/ql/test/query-tests/Security/CWE-730-ReDoS/unittests.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@
1010
re.compile(r'(?i)(?:.|\n)*b') # No ReDoS.
1111
re.compile(r'(?s)(?:.|\n)*b') # Has ReDoS.
1212
re.compile(r'(?is)(?:.|\n)*b') # Has ReDoS.
13+
re.compile(r'(?is)X(?:.|\n)*Y') # Has ReDoS.

shared/regex/codeql/regex/nfa/NfaUtils.qll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,12 @@ module Make<RegexTreeViewSig TreeImpl> {
760760
or
761761
exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
762762
or
763+
exists(RegExpGroup grp | lbl = Epsilon() |
764+
not exists(grp.getAChild()) and
765+
q1 = before(grp) and
766+
q2 = before(grp.getSuccessor())
767+
)
768+
or
763769
exists(EffectivelyStar star | lbl = Epsilon() |
764770
q1 = before(star) and q2 = before(star.getChild(0))
765771
or

0 commit comments

Comments
 (0)