Skip to content

Commit 21007d2

Browse files
committed
Python: track if qualifiers allow unbounded
repeats. This in preparation for ReDoS
1 parent 74ca1d0 commit 21007d2

File tree

4 files changed

+81
-60
lines changed

4 files changed

+81
-60
lines changed

python/ql/src/semmle/python/regex.qll

Lines changed: 62 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ abstract class RegexString extends Expr {
382382
not c = "[" and
383383
not c = ")" and
384384
not c = "|" and
385-
not this.qualifier(start, _, _)
385+
not this.qualifier(start, _, _, _)
386386
)
387387
}
388388

@@ -688,41 +688,51 @@ abstract class RegexString extends Expr {
688688
this.backreference(start, end)
689689
}
690690

691-
private predicate qualifier(int start, int end, boolean maybe_empty) {
692-
this.short_qualifier(start, end, maybe_empty) and not this.getChar(end) = "?"
691+
private predicate qualifier(int start, int end, boolean maybe_empty, boolean may_repeat_forever) {
692+
this.short_qualifier(start, end, maybe_empty, may_repeat_forever) and
693+
not this.getChar(end) = "?"
693694
or
694-
exists(int short_end | this.short_qualifier(start, short_end, maybe_empty) |
695+
exists(int short_end | this.short_qualifier(start, short_end, maybe_empty, may_repeat_forever) |
695696
if this.getChar(short_end) = "?" then end = short_end + 1 else end = short_end
696697
)
697698
}
698699

699-
private predicate short_qualifier(int start, int end, boolean maybe_empty) {
700+
private predicate short_qualifier(
701+
int start, int end, boolean maybe_empty, boolean may_repeat_forever
702+
) {
700703
(
701-
this.getChar(start) = "+" and maybe_empty = false
704+
this.getChar(start) = "+" and maybe_empty = false and may_repeat_forever = true
702705
or
703-
this.getChar(start) = "*" and maybe_empty = true
706+
this.getChar(start) = "*" and maybe_empty = true and may_repeat_forever = true
704707
or
705-
this.getChar(start) = "?" and maybe_empty = true
708+
this.getChar(start) = "?" and maybe_empty = true and may_repeat_forever = false
706709
) and
707710
end = start + 1
708711
or
709-
exists(int endin | end = endin + 1 |
710-
this.getChar(start) = "{" and
711-
this.getChar(endin) = "}" and
712-
end > start and
713-
exists(string multiples | multiples = this.getText().substring(start + 1, endin) |
714-
multiples.regexpMatch("0+") and maybe_empty = true
715-
or
716-
multiples.regexpMatch("0*,[0-9]*") and maybe_empty = true
717-
or
718-
multiples.regexpMatch("0*[1-9][0-9]*") and maybe_empty = false
719-
or
720-
multiples.regexpMatch("0*[1-9][0-9]*,[0-9]*") and maybe_empty = false
721-
) and
722-
not exists(int mid |
723-
this.getChar(mid) = "}" and
724-
mid > start and
725-
mid < endin
712+
exists(string lower, string upper |
713+
this.multiples(start, end, lower, upper) and
714+
(if lower = "" or lower.toInt() = 0 then maybe_empty = true else maybe_empty = false) and
715+
if upper = "" then may_repeat_forever = true else may_repeat_forever = false
716+
)
717+
}
718+
719+
/**
720+
* Holds if a repetition quantifier is found between `start` and `end`,
721+
* with the given lower and upper bounds. If a bound is omitted, the corresponding
722+
* string is empty.
723+
*/
724+
predicate multiples(int start, int end, string lower, string upper) {
725+
this.getChar(start) = "{" and
726+
this.getChar(end - 1) = "}" and
727+
exists(string inner | inner = this.getText().substring(start + 1, end - 1) |
728+
inner.regexpMatch("[0-9]+") and
729+
lower = inner and
730+
upper = lower
731+
or
732+
inner.regexpMatch("[0-9]*,[0-9]*") and
733+
exists(int commaIndex | commaIndex = inner.indexOf(",") |
734+
lower = inner.prefix(commaIndex) and
735+
upper = inner.suffix(commaIndex + 1)
726736
)
727737
)
728738
}
@@ -731,19 +741,29 @@ abstract class RegexString extends Expr {
731741
* Whether the text in the range start,end is a qualified item, where item is a character,
732742
* a character set or a group.
733743
*/
734-
predicate qualifiedItem(int start, int end, boolean maybe_empty) {
735-
this.qualifiedPart(start, _, end, maybe_empty)
744+
predicate qualifiedItem(int start, int end, boolean maybe_empty, boolean may_repeat_forever) {
745+
this.qualifiedPart(start, _, end, maybe_empty, may_repeat_forever)
736746
}
737747

738-
private predicate qualifiedPart(int start, int part_end, int end, boolean maybe_empty) {
748+
/**
749+
* Holds if a qualified part is found between `start` and `part_end` and the qualifier is
750+
* found between `part_end` and `end`.
751+
*
752+
* `maybe_empty` is true if the part is optional.
753+
* `may_repeat_forever` is true if the part may be repeated unboundedly.
754+
*/
755+
predicate qualifiedPart(
756+
int start, int part_end, int end, boolean maybe_empty, boolean may_repeat_forever
757+
) {
739758
this.baseItem(start, part_end) and
740-
this.qualifier(part_end, end, maybe_empty)
759+
this.qualifier(part_end, end, maybe_empty, may_repeat_forever)
741760
}
742761

743-
private predicate item(int start, int end) {
744-
this.qualifiedItem(start, end, _)
762+
/** Holds if the range `start`, `end` contains a character, a quantifier, a character set or a group. */
763+
predicate item(int start, int end) {
764+
this.qualifiedItem(start, end, _, _)
745765
or
746-
this.baseItem(start, end) and not this.qualifier(end, _, _)
766+
this.baseItem(start, end) and not this.qualifier(end, _, _, _)
747767
}
748768

749769
private predicate subsequence(int start, int end) {
@@ -766,7 +786,7 @@ abstract class RegexString extends Expr {
766786
*/
767787
predicate sequence(int start, int end) {
768788
this.sequenceOrQualified(start, end) and
769-
not this.qualifiedItem(start, end, _)
789+
not this.qualifiedItem(start, end, _, _)
770790
}
771791

772792
private predicate sequenceOrQualified(int start, int end) {
@@ -777,7 +797,8 @@ abstract class RegexString extends Expr {
777797
private predicate item_start(int start) {
778798
this.character(start, _) or
779799
this.isGroupStart(start) or
780-
this.charSet(start, _)
800+
this.charSet(start, _) or
801+
this.backreference(start, _)
781802
}
782803

783804
private predicate item_end(int end) {
@@ -787,7 +808,7 @@ abstract class RegexString extends Expr {
787808
or
788809
this.charSet(_, end)
789810
or
790-
this.qualifier(_, end, _)
811+
this.qualifier(_, end, _, _)
791812
}
792813

793814
private predicate top_level(int start, int end) {
@@ -839,14 +860,14 @@ abstract class RegexString extends Expr {
839860
or
840861
exists(int x | this.firstPart(x, end) |
841862
this.emptyMatchAtStartGroup(x, start) or
842-
this.qualifiedItem(x, start, true) or
863+
this.qualifiedItem(x, start, true, _) or
843864
this.specialCharacter(x, start, "^")
844865
)
845866
or
846867
exists(int y | this.firstPart(start, y) |
847868
this.item(start, end)
848869
or
849-
this.qualifiedPart(start, end, y, _)
870+
this.qualifiedPart(start, end, y, _, _)
850871
)
851872
or
852873
exists(int x, int y | this.firstPart(x, y) |
@@ -863,7 +884,7 @@ abstract class RegexString extends Expr {
863884
exists(int y | this.lastPart(start, y) |
864885
this.emptyMatchAtEndGroup(end, y)
865886
or
866-
this.qualifiedItem(end, y, true)
887+
this.qualifiedItem(end, y, true, _)
867888
or
868889
this.specialCharacter(end, y, "$")
869890
or
@@ -875,7 +896,7 @@ abstract class RegexString extends Expr {
875896
this.item(start, end)
876897
)
877898
or
878-
exists(int y | this.lastPart(start, y) | this.qualifiedPart(start, end, y, _))
899+
exists(int y | this.lastPart(start, y) | this.qualifiedPart(start, end, y, _, _))
879900
or
880901
exists(int x, int y | this.lastPart(x, y) |
881902
this.groupContents(x, y, start, end)
@@ -892,7 +913,7 @@ abstract class RegexString extends Expr {
892913
(
893914
this.character(start, end)
894915
or
895-
this.qualifiedItem(start, end, _)
916+
this.qualifiedItem(start, end, _, _)
896917
or
897918
this.charSet(start, end)
898919
) and
@@ -907,7 +928,7 @@ abstract class RegexString extends Expr {
907928
(
908929
this.character(start, end)
909930
or
910-
this.qualifiedItem(start, end, _)
931+
this.qualifiedItem(start, end, _, _)
911932
or
912933
this.charSet(start, end)
913934
) and
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
| (?!not-this)^[A-Z_]+$ | 13 | 20 | false |
2-
| (?:(?:\n\r?)\|^)( *)\\S | 7 | 9 | true |
3-
| (?:(?:\n\r?)\|^)( *)\\S | 14 | 16 | true |
4-
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 11 | true |
5-
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 15 | 18 | true |
6-
| (?P<name>[\\w]+)\| | 9 | 14 | false |
7-
| \\A[+-]?\\d+ | 2 | 7 | true |
8-
| \\A[+-]?\\d+ | 7 | 10 | false |
9-
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 10 | 15 | true |
10-
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 28 | 33 | true |
11-
| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false |
12-
| ax{01,3} | 1 | 8 | false |
13-
| ax{3,} | 1 | 6 | false |
14-
| ax{3} | 1 | 5 | false |
15-
| ax{,3} | 1 | 6 | true |
1+
| (?!not-this)^[A-Z_]+$ | 13 | 20 | false | true |
2+
| (?:(?:\n\r?)\|^)( *)\\S | 7 | 9 | true | false |
3+
| (?:(?:\n\r?)\|^)( *)\\S | 14 | 16 | true | true |
4+
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 11 | true | false |
5+
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 15 | 18 | true | true |
6+
| (?P<name>[\\w]+)\| | 9 | 14 | false | true |
7+
| \\A[+-]?\\d+ | 2 | 7 | true | false |
8+
| \\A[+-]?\\d+ | 7 | 10 | false | true |
9+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 10 | 15 | true | true |
10+
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 28 | 33 | true | true |
11+
| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false | true |
12+
| ax{01,3} | 1 | 8 | false | false |
13+
| ax{3,} | 1 | 6 | false | true |
14+
| ax{3} | 1 | 5 | false | false |
15+
| ax{,3} | 1 | 6 | true | false |
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import python
22
import semmle.python.regex
33

4-
from Regex r, int start, int end, boolean maybe_empty
5-
where r.qualifiedItem(start, end, maybe_empty)
6-
select r.getText(), start, end, maybe_empty
4+
from Regex r, int start, int end, boolean maybe_empty, boolean may_repeat_forever
5+
where r.qualifiedItem(start, end, maybe_empty, may_repeat_forever)
6+
select r.getText(), start, end, maybe_empty, may_repeat_forever

python/ql/test/library-tests/regex/Regex.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ predicate part(Regex r, int start, int end, string kind) {
1616
or
1717
r.group(start, end) and not r.zeroWidthMatch(start, end) and kind = "non-empty group"
1818
or
19-
r.qualifiedItem(start, end, _) and kind = "qualified"
19+
r.qualifiedItem(start, end, _, _) and kind = "qualified"
2020
}
2121

2222
from Regex r, int start, int end, string kind

0 commit comments

Comments
 (0)