|
2 | 2 | * Provides predicates for reasoning about bad tag filter vulnerabilities.
|
3 | 3 | */
|
4 | 4 |
|
5 |
| -import regexp.RegexpMatching |
6 |
| - |
7 |
| -/** |
8 |
| - * Holds if the regexp `root` should be tested against `str`. |
9 |
| - * Implements the `isRegexpMatchingCandidateSig` signature from `RegexpMatching`. |
10 |
| - * `ignorePrefix` toggles whether the regular expression should be treated as accepting any prefix if it's unanchored. |
11 |
| - * `testWithGroups` toggles whether it's tested which groups are filled by a given input string. |
12 |
| - */ |
13 |
| -private predicate isBadTagFilterCandidate( |
14 |
| - RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups |
15 |
| -) { |
16 |
| - // the regexp must mention "<" and ">" explicitly. |
17 |
| - forall(string angleBracket | angleBracket = ["<", ">"] | |
18 |
| - any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() = |
19 |
| - root |
20 |
| - ) and |
21 |
| - ignorePrefix = true and |
22 |
| - ( |
23 |
| - str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"] and |
24 |
| - testWithGroups = true |
25 |
| - or |
26 |
| - str = |
27 |
| - [ |
28 |
| - "<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>", |
29 |
| - "<script \n>foo</script>", "<script >foo\n</script>", "<foo ></foo>", "<foo>", |
30 |
| - "<foo src=\"foo\"></foo>", "<script>", "<script src=\"foo\"></script>", |
31 |
| - "<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>", |
32 |
| - "<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >", |
33 |
| - "<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>" |
34 |
| - ] and |
35 |
| - testWithGroups = false |
36 |
| - ) |
37 |
| -} |
38 |
| - |
39 |
| -/** |
40 |
| - * A regexp that matches some string from the `isBadTagFilterCandidate` predicate. |
41 |
| - */ |
42 |
| -class HtmlMatchingRegExp extends RootTerm { |
43 |
| - HtmlMatchingRegExp() { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, _) } |
44 |
| - |
45 |
| - /** Holds if this regexp matched `str`, where `str` is one of the string from `isBadTagFilterCandidate`. */ |
46 |
| - predicate matches(string str) { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, str) } |
47 |
| - |
48 |
| - /** Holds if this regexp fills capture group `g' when matching `str', where `str` is one of the string from `isBadTagFilterCandidate`. */ |
49 |
| - predicate fillsCaptureGroup(string str, int g) { |
50 |
| - RegexpMatching<isBadTagFilterCandidate/4>::fillsCaptureGroup(this, str, g) |
51 |
| - } |
52 |
| -} |
53 |
| - |
54 |
| -/** DEPRECATED: Alias for HtmlMatchingRegExp */ |
55 |
| -deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp; |
56 |
| - |
57 |
| -/** |
58 |
| - * Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match. |
59 |
| - * |
60 |
| - * When adding a new case to this predicate, make sure the test string used in `matches(..)` calls are present in `HTMLMatchingRegExp::test` / `HTMLMatchingRegExp::testWithGroups`. |
61 |
| - */ |
62 |
| -predicate isBadRegexpFilter(HtmlMatchingRegExp regexp, string msg) { |
63 |
| - // CVE-2021-33829 - matching both "<!-- foo -->" and "<!-- foo --!>", but in different capture groups |
64 |
| - regexp.matches("<!-- foo -->") and |
65 |
| - regexp.matches("<!-- foo --!>") and |
66 |
| - exists(int a, int b | a != b | |
67 |
| - regexp.fillsCaptureGroup("<!-- foo -->", a) and |
68 |
| - // <!-- foo --> might be ambiguously parsed (matching both capture groups), and that is ok here. |
69 |
| - regexp.fillsCaptureGroup("<!-- foo --!>", b) and |
70 |
| - not regexp.fillsCaptureGroup("<!-- foo --!>", a) and |
71 |
| - msg = |
72 |
| - "Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group " |
73 |
| - + a + " and comments ending with --!> are matched with capture group " + |
74 |
| - strictconcat(int i | regexp.fillsCaptureGroup("<!-- foo --!>", i) | i.toString(), ", ") + |
75 |
| - "." |
76 |
| - ) |
77 |
| - or |
78 |
| - // CVE-2020-17480 - matching "<!-- foo -->" and other tags, but not "<!-- foo --!>". |
79 |
| - exists(int group, int other | |
80 |
| - group != other and |
81 |
| - regexp.fillsCaptureGroup("<!-- foo -->", group) and |
82 |
| - regexp.fillsCaptureGroup("<foo>", other) and |
83 |
| - not regexp.matches("<!-- foo --!>") and |
84 |
| - not regexp.fillsCaptureGroup("<!-- foo -->", any(int i | i != group)) and |
85 |
| - not regexp.fillsCaptureGroup("<!- foo ->", group) and |
86 |
| - not regexp.fillsCaptureGroup("<foo>", group) and |
87 |
| - not regexp.fillsCaptureGroup("<script>", group) and |
88 |
| - msg = |
89 |
| - "This regular expression only parses --> (capture group " + group + |
90 |
| - ") and not --!> as an HTML comment end tag." |
91 |
| - ) |
92 |
| - or |
93 |
| - regexp.matches("<!-- foo -->") and |
94 |
| - not regexp.matches("<!-- foo\n -->") and |
95 |
| - not regexp.matches("<!- foo ->") and |
96 |
| - not regexp.matches("<foo>") and |
97 |
| - not regexp.matches("<script>") and |
98 |
| - msg = "This regular expression does not match comments containing newlines." |
99 |
| - or |
100 |
| - regexp.matches("<script>foo</script>") and |
101 |
| - regexp.matches("<script src=\"foo\"></script>") and |
102 |
| - not regexp.matches("<foo ></foo>") and |
103 |
| - ( |
104 |
| - not regexp.matches("<script \n>foo</script>") and |
105 |
| - msg = "This regular expression matches <script></script>, but not <script \\n></script>" |
106 |
| - or |
107 |
| - not regexp.matches("<script >foo\n</script>") and |
108 |
| - msg = "This regular expression matches <script>...</script>, but not <script >...\\n</script>" |
109 |
| - ) |
110 |
| - or |
111 |
| - regexp.matches("<script>foo</script>") and |
112 |
| - regexp.matches("<script src=\"foo\"></script>") and |
113 |
| - not regexp.matches("<script src='foo'></script>") and |
114 |
| - not regexp.matches("<foo>") and |
115 |
| - msg = "This regular expression does not match script tags where the attribute uses single-quotes." |
116 |
| - or |
117 |
| - regexp.matches("<script>foo</script>") and |
118 |
| - regexp.matches("<script src='foo'></script>") and |
119 |
| - not regexp.matches("<script src=\"foo\"></script>") and |
120 |
| - not regexp.matches("<foo>") and |
121 |
| - msg = "This regular expression does not match script tags where the attribute uses double-quotes." |
122 |
| - or |
123 |
| - regexp.matches("<script>foo</script>") and |
124 |
| - regexp.matches("<script src='foo'></script>") and |
125 |
| - not regexp.matches("<script\tsrc='foo'></script>") and |
126 |
| - not regexp.matches("<foo>") and |
127 |
| - not regexp.matches("<foo src=\"foo\"></foo>") and |
128 |
| - msg = "This regular expression does not match script tags where tabs are used between attributes." |
129 |
| - or |
130 |
| - regexp.matches("<script>foo</script>") and |
131 |
| - not RegExpFlags::isIgnoreCase(regexp) and |
132 |
| - not regexp.matches("<foo>") and |
133 |
| - not regexp.matches("<foo ></foo>") and |
134 |
| - ( |
135 |
| - not regexp.matches("<SCRIPT>foo</SCRIPT>") and |
136 |
| - msg = "This regular expression does not match upper case <SCRIPT> tags." |
137 |
| - or |
138 |
| - not regexp.matches("<sCrIpT>foo</ScRiPt>") and |
139 |
| - regexp.matches("<SCRIPT>foo</SCRIPT>") and |
140 |
| - msg = "This regular expression does not match mixed case <sCrIpT> tags." |
141 |
| - ) |
142 |
| - or |
143 |
| - regexp.matches("<script src=\"foo\"></script>") and |
144 |
| - not regexp.matches("<foo>") and |
145 |
| - not regexp.matches("<foo ></foo>") and |
146 |
| - ( |
147 |
| - not regexp.matches("<script src=\"foo\">foo</script >") and |
148 |
| - msg = "This regular expression does not match script end tags like </script >." |
149 |
| - or |
150 |
| - not regexp.matches("<script src=\"foo\">foo</script foo=\"bar\">") and |
151 |
| - msg = "This regular expression does not match script end tags like </script foo=\"bar\">." |
152 |
| - or |
153 |
| - not regexp.matches("<script src=\"foo\">foo</script\t\n bar>") and |
154 |
| - msg = "This regular expression does not match script end tags like </script\\t\\n bar>." |
155 |
| - ) |
156 |
| -} |
| 5 | +private import codeql.ruby.regexp.RegExpTreeView::RegexTreeView as TreeView |
| 6 | +// BadTagFilterQuery should be used directly from the shared pack, and not from this file. |
| 7 | +deprecated import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView> as Dep |
| 8 | +import Dep |
0 commit comments