Skip to content

Commit 7ad1a21

Browse files
committed
Python: make mode characters not be characters
They are simply considered part of the group start.
1 parent a834703 commit 7ad1a21

File tree

2 files changed

+29
-8
lines changed

2 files changed

+29
-8
lines changed

python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -683,24 +683,45 @@ class RegExp extends Expr instanceof StrConst {
683683
* Holds if a parse mode starts between `start` and `end`.
684684
*/
685685
private predicate flag_group_start(int start, int end) {
686+
exists(int no_modes_end |
687+
this.flag_group_start_no_modes(start, no_modes_end) and
688+
end = max(int i | this.mode_character(start, i) | i + 1)
689+
)
690+
}
691+
692+
/**
693+
* Holds if the initial part of a parse mode, not containing any
694+
* mode characters is between `start` and `end`.
695+
*/
696+
private predicate flag_group_start_no_modes(int start, int end) {
686697
this.isGroupStart(start) and
687698
this.getChar(start + 1) = "?" and
688699
this.getChar(start + 2) in ["i", "L", "m", "s", "u", "x"] and
689700
end = start + 2
690701
}
691702

703+
/**
704+
* Holds if `pos` contains a mo character from the
705+
* flag group starting at `start`.
706+
*/
707+
private predicate mode_character(int start, int pos) {
708+
this.flag_group_start_no_modes(start, pos)
709+
or
710+
this.mode_character(start, pos - 1) and
711+
this.getChar(pos) in ["i", "L", "m", "s", "u", "x"]
712+
}
713+
692714
/**
693715
* Holds if a parse mode group is between `start` and `end`, and includes the
694716
* mode flag `c`. For example the following span, with mode flag `i`:
695717
* ```
696718
* (?i)
697719
* ```
698720
*/
699-
private predicate flag_group(int start, int end, string c) {
700-
exists(int inStart, int inEnd |
701-
this.flag_group_start(start, inStart) and
702-
this.groupContents(start, end, inStart, inEnd) and
703-
this.getChar([inStart .. inEnd - 1]) = c
721+
private predicate flag(string c) {
722+
exists(int pos |
723+
this.mode_character(_, pos) and
724+
this.getChar(pos) = c
704725
)
705726
}
706727

@@ -709,7 +730,7 @@ class RegExp extends Expr instanceof StrConst {
709730
* it is defined by a prefix.
710731
*/
711732
string getModeFromPrefix() {
712-
exists(string c | this.flag_group(_, _, c) |
733+
exists(string c | this.flag(c) |
713734
c = "i" and result = "IGNORECASE"
714735
or
715736
c = "L" and result = "LOCALE"

python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,5 +105,5 @@
105105
| redos.py:391:15:391:25 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
106106
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\\u00c6'. |
107107
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
108-
| unittests.py:11:20:11:28 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 's' and containing many repetitions of '\\n'. |
109-
| unittests.py:12:21:12:29 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 'is' and containing many repetitions of '\\n'. |
108+
| unittests.py:11:20:11:28 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
109+
| unittests.py:12:21:12:29 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |

0 commit comments

Comments
 (0)