|
14 | 14 | */
|
15 | 15 |
|
16 | 16 | import javascript
|
17 |
| - |
18 |
| -/** |
19 |
| - * A regexp term that matches substrings that should be replaced with the empty string. |
20 |
| - */ |
21 |
| -class EmptyReplaceRegExpTerm extends RegExpTerm { |
22 |
| - EmptyReplaceRegExpTerm() { |
23 |
| - exists(StringReplaceCall replace | |
24 |
| - [replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and |
25 |
| - this = replace.getRegExp().getRoot().getAChild*() |
26 |
| - ) |
27 |
| - } |
28 |
| -} |
29 |
| - |
30 |
| -/** |
31 |
| - * A prefix that may be dangerous to sanitize explicitly. |
32 |
| - * |
33 |
| - * Note that this class exists solely as a (necessary) optimization for this query. |
34 |
| - */ |
35 |
| -class DangerousPrefix extends string { |
36 |
| - DangerousPrefix() { |
37 |
| - this = ["/..", "../"] or |
38 |
| - this = "<!--" or |
39 |
| - this = "<" + ["iframe", "script", "cript", "scrip", "style"] |
40 |
| - } |
41 |
| -} |
42 |
| - |
43 |
| -/** |
44 |
| - * A substring of a prefix that may be dangerous to sanitize explicitly. |
45 |
| - */ |
46 |
| -class DangerousPrefixSubstring extends string { |
47 |
| - DangerousPrefixSubstring() { |
48 |
| - exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()])) |
49 |
| - } |
50 |
| -} |
51 |
| - |
52 |
| -/** |
53 |
| - * Gets a dangerous prefix that is in the prefix language of `t`. |
54 |
| - */ |
55 |
| -DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) { |
56 |
| - result = getADangerousMatchedPrefixSubstring(t) and |
57 |
| - not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable()) |
58 |
| -} |
59 |
| - |
60 |
| -private import semmle.javascript.security.regexp.NfaUtils as NfaUtils |
61 |
| - |
62 |
| -/** |
63 |
| - * Gets a char from a dangerous prefix that is matched by `t`. |
64 |
| - */ |
65 |
| -pragma[noinline] |
66 |
| -DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) { |
67 |
| - t.isNullable() and result = "" |
68 |
| - or |
69 |
| - t.getAMatchedString() = result |
70 |
| - or |
71 |
| - // A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe"). |
72 |
| - exists(NfaUtils::CharacterClass cc | |
73 |
| - cc = NfaUtils::getCanonicalCharClass(t) and |
74 |
| - cc.matches(result) and |
75 |
| - result.regexpMatch("\\w") and |
76 |
| - // excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for. |
77 |
| - not cc.matches(">") |
78 |
| - ) |
79 |
| - or |
80 |
| - t instanceof RegExpDot and |
81 |
| - result.length() = 1 |
82 |
| - or |
83 |
| - ( |
84 |
| - t instanceof RegExpOpt or |
85 |
| - t instanceof RegExpStar or |
86 |
| - t instanceof RegExpPlus or |
87 |
| - t instanceof RegExpGroup or |
88 |
| - t instanceof RegExpAlt |
89 |
| - ) and |
90 |
| - result = getADangerousMatchedChar(t.getAChild()) |
91 |
| -} |
92 |
| - |
93 |
| -/** |
94 |
| - * Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds). |
95 |
| - * |
96 |
| - * Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported. |
97 |
| - */ |
98 |
| -DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) { |
99 |
| - result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor()) |
100 |
| - or |
101 |
| - result = getADangerousMatchedChar(t) |
102 |
| - or |
103 |
| - // loop around for repetitions (only considering alphanumeric characters in the repetition) |
104 |
| - exists(RepetitionMatcher repetition | t = repetition | |
105 |
| - result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar() |
106 |
| - ) |
107 |
| -} |
108 |
| - |
109 |
| -class RepetitionMatcher extends EmptyReplaceRegExpTerm { |
110 |
| - string char; |
111 |
| - |
112 |
| - pragma[noinline] |
113 |
| - RepetitionMatcher() { |
114 |
| - (this instanceof RegExpPlus or this instanceof RegExpStar) and |
115 |
| - char = getADangerousMatchedChar(this.getAChild()) and |
116 |
| - char.regexpMatch("\\w") |
117 |
| - } |
118 |
| - |
119 |
| - pragma[noinline] |
120 |
| - string getAChar() { result = char } |
121 |
| -} |
122 |
| - |
123 |
| -/** |
124 |
| - * Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerablity of kind `kind`. |
125 |
| - */ |
126 |
| -predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) { |
127 |
| - prefix = getADangerousMatchedPrefix(t) and |
128 |
| - ( |
129 |
| - kind = "path injection" and |
130 |
| - // upwards navigation |
131 |
| - prefix = ["/..", "../"] and |
132 |
| - not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*") // explicit path name mentions make this an unlikely sanitizer |
133 |
| - or |
134 |
| - kind = "HTML element injection" and |
135 |
| - ( |
136 |
| - // comments |
137 |
| - prefix = "<!--" and |
138 |
| - not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*") // explicit comment content mentions make this an unlikely sanitizer |
139 |
| - or |
140 |
| - // specific tags |
141 |
| - prefix = "<" + ["iframe", "script", "cript", "scrip", "style"] // the `cript|scrip` case has been observed in the wild several times |
142 |
| - ) |
143 |
| - ) |
144 |
| - or |
145 |
| - kind = "HTML attribute injection" and |
146 |
| - prefix = |
147 |
| - [ |
148 |
| - // ordinary event handler prefix |
149 |
| - "on", |
150 |
| - // angular prefixes |
151 |
| - "ng-", "ng:", "data-ng-", "x-ng-" |
152 |
| - ] and |
153 |
| - ( |
154 |
| - // explicit matching: `onclick` and `ng-bind` |
155 |
| - t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+") |
156 |
| - or |
157 |
| - // regexp-based matching: `on[a-z]+` |
158 |
| - exists(EmptyReplaceRegExpTerm start | start = t.getAChild() | |
159 |
| - start.getConstantValue().regexpMatch("(?i)[^a-z]*" + prefix) and |
160 |
| - isCommonWordMatcher(start.getSuccessor()) |
161 |
| - ) |
162 |
| - ) |
163 |
| -} |
164 |
| - |
165 |
| -/** |
166 |
| - * Holds if `t` is a common pattern for matching words |
167 |
| - */ |
168 |
| -predicate isCommonWordMatcher(RegExpTerm t) { |
169 |
| - exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) | |
170 |
| - // [a-z]+ and similar |
171 |
| - quantified |
172 |
| - .(RegExpCharacterClass) |
173 |
| - .getAChild() |
174 |
| - .(RegExpCharacterRange) |
175 |
| - .isRange(["a", "A"], ["z", "Z"]) |
176 |
| - or |
177 |
| - // \w+ or [\w]+ |
178 |
| - [quantified, quantified.(RegExpCharacterClass).getAChild()] |
179 |
| - .(RegExpCharacterClassEscape) |
180 |
| - .getValue() = "w" |
181 |
| - ) |
182 |
| -} |
| 17 | +private import semmle.javascript.security.IncompleteMultiCharacterSanitization |
183 | 18 |
|
184 | 19 | from
|
185 | 20 | StringReplaceCall replace, EmptyReplaceRegExpTerm regexp, EmptyReplaceRegExpTerm dangerous,
|
|
0 commit comments