2
2
* Provides precicates for reasoning about bad tag filter vulnerabilities.
3
3
*/
4
4
5
- import performance.ReDoSUtil
5
+ import performance.RegexpMatching
6
6
7
7
/**
8
- * A module for determining if a regexp matches a given string,
9
- * and reasoning about which capture groups are filled by a given string .
8
+ * Holds if the regexp `root` should be tested against `str`.
9
+ * Implements the `isRegexpMatchingCandidateSig` signature from `RegexpMatching` .
10
10
*/
11
- private module RegexpMatching {
12
- /**
13
- * A class to test whether a regular expression matches a string.
14
- * Override this class and extend `test`/`testWithGroups` to configure which strings should be tested for acceptance by this regular expression.
15
- * The result can afterwards be read from the `matches` predicate.
16
- *
17
- * Strings in the `testWithGroups` predicate are also tested for which capture groups are filled by the given string.
18
- * The result is available in the `fillCaptureGroup` predicate.
19
- */
20
- abstract class MatchedRegExp extends RegExpTerm {
21
- MatchedRegExp ( ) { this .isRootTerm ( ) }
22
-
23
- /**
24
- * Holds if it should be tested whether this regular expression matches `str`.
25
- *
26
- * If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
27
- * E.g. a regular expression `/foo$/` will match any string that ends with "foo",
28
- * but if `ignorePrefix` is true, it will only match "foo".
29
- */
30
- predicate test ( string str , boolean ignorePrefix ) {
31
- none ( ) // maybe overridden in subclasses
32
- }
33
-
34
- /**
35
- * Same as `test(..)`, but where the `fillsCaptureGroup` afterwards tells which capture groups were filled by the given string.
36
- */
37
- predicate testWithGroups ( string str , boolean ignorePrefix ) {
38
- none ( ) // maybe overridden in subclasses
39
- }
40
-
41
- /**
42
- * Holds if this RegExp matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
43
- */
44
- final predicate matches ( string str ) {
45
- exists ( State state | state = getAState ( this , str .length ( ) - 1 , str , _) |
46
- epsilonSucc * ( state ) = Accept ( _)
47
- )
48
- }
49
-
50
- /**
51
- * Holds if matching `str` may fill capture group number `g`.
52
- * Only holds if `str` is in the `testWithGroups` predicate.
53
- */
54
- final predicate fillsCaptureGroup ( string str , int g ) {
55
- exists ( State s |
56
- s = getAStateThatReachesAccept ( this , _, str , _) and
57
- g = group ( s .getRepr ( ) )
58
- )
59
- }
60
- }
61
-
62
- /**
63
- * Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
64
- * The regular expression is modeled as a non-determistic finite automaton,
65
- * the regular expression can therefore be in multiple states after matching a character.
66
- *
67
- * It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
68
- */
69
- private State getAState ( MatchedRegExp reg , int i , string str , boolean ignorePrefix ) {
70
- // start state, the -1 position before any chars have been matched
71
- i = - 1 and
72
- (
73
- reg .test ( str , ignorePrefix )
74
- or
75
- reg .testWithGroups ( str , ignorePrefix )
76
- ) and
77
- result .getRepr ( ) .getRootTerm ( ) = reg and
78
- isStartState ( result )
79
- or
80
- // recursive case
81
- result = getAStateAfterMatching ( reg , _, str , i , _, ignorePrefix )
82
- }
83
-
84
- /**
85
- * Gets the next state after the `prev` state from `reg`.
86
- * `prev` is the state after matching `fromIndex` chars in `str`,
87
- * and the result is the state after matching `toIndex` chars in `str`.
88
- *
89
- * This predicate is used as a step relation in the forwards search (`getAState`),
90
- * and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
91
- */
92
- private State getAStateAfterMatching (
93
- MatchedRegExp reg , State prev , string str , int toIndex , int fromIndex , boolean ignorePrefix
94
- ) {
95
- // the basic recursive case - outlined into a noopt helper to make performance work out.
96
- result = getAStateAfterMatchingAux ( reg , prev , str , toIndex , fromIndex , ignorePrefix )
97
- or
98
- // we can skip past word boundaries if the next char is a non-word char.
99
- fromIndex = toIndex and
100
- prev .getRepr ( ) instanceof RegExpWordBoundary and
101
- prev = getAState ( reg , toIndex , str , ignorePrefix ) and
102
- after ( prev .getRepr ( ) ) = result and
103
- str .charAt ( toIndex + 1 ) .regexpMatch ( "\\W" ) // \W matches any non-word char.
104
- }
105
-
106
- pragma [ noopt]
107
- private State getAStateAfterMatchingAux (
108
- MatchedRegExp reg , State prev , string str , int toIndex , int fromIndex , boolean ignorePrefix
109
- ) {
110
- prev = getAState ( reg , fromIndex , str , ignorePrefix ) and
111
- fromIndex = toIndex - 1 and
112
- exists ( string char | char = str .charAt ( toIndex ) | specializedDeltaClosed ( prev , char , result ) ) and
113
- not discardedPrefixStep ( prev , result , ignorePrefix )
114
- }
115
-
116
- /** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
117
- private predicate discardedPrefixStep ( State prev , State next , boolean ignorePrefix ) {
118
- prev = mkMatch ( any ( RegExpRoot r ) ) and
119
- ignorePrefix = true and
120
- next = prev
121
- }
122
-
123
- // The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
124
- private predicate specializedDeltaClosed ( State prev , string char , State next ) {
125
- deltaClosed ( prev , specializedGetAnInputSymbolMatching ( char ) , next )
126
- }
127
-
128
- // The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
129
- pragma [ noinline]
130
- private InputSymbol specializedGetAnInputSymbolMatching ( string char ) {
131
- exists ( string s , MatchedRegExp r |
132
- r .test ( s , _)
133
- or
134
- r .testWithGroups ( s , _)
135
- |
136
- char = s .charAt ( _)
137
- ) and
138
- result = getAnInputSymbolMatching ( char )
139
- }
140
-
141
- /**
142
- * Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
143
- * Starts with an accepting state as found by `getAState` and searches backwards
144
- * to the start state through the reachable states (as found by `getAState`).
145
- *
146
- * This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
147
- * and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
148
- * The result state is therefore always on a valid path where `reg` accepts `str`.
149
- *
150
- * This predicate is only used to find which capture groups a regular expression has filled,
151
- * and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
152
- */
153
- private State getAStateThatReachesAccept (
154
- MatchedRegExp reg , int i , string str , boolean ignorePrefix
155
- ) {
156
- // base case, reaches an accepting state from the last state in `getAState(..)`
157
- reg .testWithGroups ( str , ignorePrefix ) and
158
- i = str .length ( ) - 1 and
159
- result = getAState ( reg , i , str , ignorePrefix ) and
160
- epsilonSucc * ( result ) = Accept ( _)
11
+ private predicate isBadTagFilterCandidate (
12
+ RootTerm root , string str , boolean ignorePrefix , boolean testWithGroups
13
+ ) {
14
+ // the regexp must mention "<" and ">" explicitly.
15
+ forall ( string angleBracket | angleBracket = [ "<" , ">" ] |
16
+ any ( RegExpConstant term | term .getValue ( ) .matches ( "%" + angleBracket + "%" ) ) .getRootTerm ( ) =
17
+ root
18
+ ) and
19
+ ignorePrefix = true and
20
+ (
21
+ str = [ "<!-- foo -->" , "<!-- foo --!>" , "<!- foo ->" , "<foo>" , "<script>" ] and
22
+ testWithGroups = true
161
23
or
162
- // recursive case. `next` is the next state to be matched after matching `prev`.
163
- // this predicate is doing a backwards search, so `prev` is the result we are looking for.
164
- exists ( State next , State prev , int fromIndex , int toIndex |
165
- next = getAStateThatReachesAccept ( reg , toIndex , str , ignorePrefix ) and
166
- next = getAStateAfterMatching ( reg , prev , str , toIndex , fromIndex , ignorePrefix ) and
167
- i = fromIndex and
168
- result = prev
169
- )
170
- }
171
-
172
- /** Gets the capture group number that `term` belongs to. */
173
- private int group ( RegExpTerm term ) {
174
- exists ( RegExpGroup grp | grp .getNumber ( ) = result | term .getParent * ( ) = grp )
175
- }
176
- }
177
-
178
- /** A class to test whether a regular expression matches certain HTML tags. */
179
- class HtmlMatchingRegExp extends RegexpMatching:: MatchedRegExp {
180
- HtmlMatchingRegExp ( ) {
181
- // the regexp must mention "<" and ">" explicitly.
182
- forall ( string angleBracket | angleBracket = [ "<" , ">" ] |
183
- any ( RegExpConstant term | term .getValue ( ) .matches ( "%" + angleBracket + "%" ) ) .getRootTerm ( ) =
184
- this
185
- )
186
- }
187
-
188
- override predicate testWithGroups ( string str , boolean ignorePrefix ) {
189
- ignorePrefix = true and
190
- str = [ "<!-- foo -->" , "<!-- foo --!>" , "<!- foo ->" , "<foo>" , "<script>" ]
191
- }
192
-
193
- override predicate test ( string str , boolean ignorePrefix ) {
194
- ignorePrefix = true and
195
24
str =
196
25
[
197
26
"<!-- foo -->" , "<!- foo ->" , "<!-- foo --!>" , "<!-- foo\n -->" , "<script>foo</script>" ,
@@ -200,12 +29,25 @@ class HtmlMatchingRegExp extends RegexpMatching::MatchedRegExp {
200
29
"<script src='foo'></script>" , "<SCRIPT>foo</SCRIPT>" , "<script\tsrc=\"foo\"/>" ,
201
30
"<script\tsrc='foo'></script>" , "<sCrIpT>foo</ScRiPt>" , "<script src=\"foo\">foo</script >" ,
202
31
"<script src=\"foo\">foo</script foo=\"bar\">" , "<script src=\"foo\">foo</script\t\n bar>"
203
- ]
204
- }
32
+ ] and
33
+ testWithGroups = false
34
+ )
205
35
}
206
36
207
- /** DEPRECATED: Alias for HtmlMatchingRegExp */
208
- deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp ;
37
+ /**
38
+ * A regexp that matches some string from the `isBadTagFilterCandidate` predicate.
39
+ */
40
+ class HtmlMatchingRegExp extends RootTerm {
41
+ HtmlMatchingRegExp ( ) { RegexpMatching< isBadTagFilterCandidate / 4 > :: matches ( this , _) }
42
+
43
+ /** Holds if this regexp matched `str`, where `str` is one of the string from `isBadTagFilterCandidate`. */
44
+ predicate matches ( string str ) { RegexpMatching< isBadTagFilterCandidate / 4 > :: matches ( this , str ) }
45
+
46
+ /** Holds if this regexp fills capture group `g' when matching `str', where `str` is one of the string from `isBadTagFilterCandidate`. */
47
+ predicate fillsCaptureGroup ( string str , int g ) {
48
+ RegexpMatching< isBadTagFilterCandidate / 4 > :: fillsCaptureGroup ( this , str , g )
49
+ }
50
+ }
209
51
210
52
/**
211
53
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
0 commit comments