Skip to content

Commit b7d9bf4

Browse files
committed
Share IncompleteMultiCharacterSanitization JS/Ruby
Most of the classes and predicates in this query can be shared between the two languages. There's just a few language-specific things that we place in IncompleteMultiCharacterSanitizationSpecific.
1 parent 3179c60 commit b7d9bf4

File tree

8 files changed

+370
-340
lines changed

8 files changed

+370
-340
lines changed

config/identical-files.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,5 +597,9 @@
597597
"Swift patterns test file": [
598598
"swift/ql/test/extractor-tests/patterns/patterns.swift",
599599
"swift/ql/test/library-tests/parent/patterns.swift"
600+
],
601+
"IncompleteMultiCharacterSanitization JS/Ruby": [
602+
"javascript/ql/lib/semmle/javascript/security/IncompleteMultiCharacterSanitization.qll",
603+
"ruby/ql/lib/codeql/ruby/security/IncompleteMultiCharacterSanitization.qll"
600604
]
601605
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
/**
2+
* Provides shared predicates for reasoning about improper multi-character sanitization.
3+
*/
4+
5+
import IncompleteMultiCharacterSanitizationSpecific
6+
7+
/**
8+
* A prefix that may be dangerous to sanitize explicitly.
9+
*
10+
* Note that this class exists solely as a (necessary) optimization for this query.
11+
*/
12+
private class DangerousPrefix extends string {
13+
DangerousPrefix() {
14+
this = ["/..", "../"] or
15+
this = "<!--" or
16+
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
17+
}
18+
}
19+
20+
/**
21+
* A substring of a prefix that may be dangerous to sanitize explicitly.
22+
*/
23+
private class DangerousPrefixSubstring extends string {
24+
DangerousPrefixSubstring() {
25+
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
26+
}
27+
}
28+
29+
/**
30+
* Gets a char from a dangerous prefix that is matched by `t`.
31+
*/
32+
pragma[noinline]
33+
private DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
34+
t.isNullable() and result = ""
35+
or
36+
result = t.getAMatchedString()
37+
or
38+
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
39+
exists(ReDoSUtil::CharacterClass cc |
40+
cc = ReDoSUtil::getCanonicalCharClass(t) and
41+
cc.matches(result) and
42+
result.regexpMatch("\\w") and
43+
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
44+
not cc.matches(">")
45+
)
46+
or
47+
t instanceof RegExpDot and
48+
result.length() = 1
49+
or
50+
(
51+
t instanceof RegExpOpt or
52+
t instanceof RegExpStar or
53+
t instanceof RegExpPlus or
54+
t instanceof RegExpGroup or
55+
t instanceof RegExpAlt
56+
) and
57+
result = getADangerousMatchedChar(t.getAChild())
58+
}
59+
60+
/**
61+
* Gets a dangerous prefix that is in the prefix language of `t`.
62+
*/
63+
private DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
64+
result = getADangerousMatchedPrefixSubstring(t) and
65+
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
66+
}
67+
68+
/**
69+
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
70+
*
71+
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
72+
*/
73+
private DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
74+
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
75+
or
76+
result = getADangerousMatchedChar(t)
77+
or
78+
// loop around for repetitions (only considering alphanumeric characters in the repetition)
79+
exists(RepetitionMatcher repetition | t = repetition |
80+
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
81+
)
82+
}
83+
84+
private class RepetitionMatcher extends EmptyReplaceRegExpTerm {
85+
string char;
86+
87+
pragma[noinline]
88+
RepetitionMatcher() {
89+
(this instanceof RegExpPlus or this instanceof RegExpStar) and
90+
char = getADangerousMatchedChar(this.getAChild()) and
91+
char.regexpMatch("\\w")
92+
}
93+
94+
pragma[noinline]
95+
string getAChar() { result = char }
96+
}
97+
98+
/**
99+
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerability of kind `kind`.
100+
*/
101+
predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
102+
prefix = getADangerousMatchedPrefix(t) and
103+
(
104+
kind = "path injection" and
105+
prefix = ["/..", "../"] and
106+
// If the regex is matching explicit path components, it is unlikely that it's being used as a sanitizer.
107+
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*")
108+
or
109+
kind = "HTML element injection" and
110+
(
111+
// comments
112+
prefix = "<!--" and
113+
// If the regex is matching explicit textual content of an HTML comment, it is unlikely that it's being used as a sanitizer.
114+
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*")
115+
or
116+
// specific tags
117+
// the `cript|scrip` case has been observed in the wild several times
118+
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"]
119+
)
120+
)
121+
or
122+
kind = "HTML attribute injection" and
123+
prefix =
124+
[
125+
// ordinary event handler prefix
126+
"on",
127+
// angular prefixes
128+
"ng-", "ng:", "data-ng-", "x-ng-"
129+
] and
130+
(
131+
// explicit matching: `onclick` and `ng-bind`
132+
t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
133+
or
134+
// regexp-based matching: `on[a-z]+`
135+
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
136+
start.getAMatchedString().regexpMatch("(?i)[^a-z]*" + prefix) and
137+
isCommonWordMatcher(start.getSuccessor())
138+
)
139+
)
140+
}
141+
142+
/**
143+
* Holds if `t` is a common pattern for matching words
144+
*/
145+
private predicate isCommonWordMatcher(RegExpTerm t) {
146+
exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) |
147+
// [a-z]+ and similar
148+
quantified
149+
.(RegExpCharacterClass)
150+
.getAChild()
151+
.(RegExpCharacterRange)
152+
.isRange(["a", "A"], ["z", "Z"])
153+
or
154+
// \w+ or [\w]+
155+
[quantified, quantified.(RegExpCharacterClass).getAChild()]
156+
.(RegExpCharacterClassEscape)
157+
.getValue() = "w"
158+
)
159+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/**
2+
* Provides language-specific predicates for reasoning about improper multi-character sanitization.
3+
*/
4+
5+
import javascript
6+
import semmle.javascript.security.performance.ReDoSUtil as ReDoSUtil
7+
8+
/**
9+
* A regexp term that matches substrings that should be replaced with the empty string.
10+
*/
11+
class EmptyReplaceRegExpTerm extends RegExpTerm {
12+
EmptyReplaceRegExpTerm() {
13+
exists(StringReplaceCall replace |
14+
[replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
15+
this = replace.getRegExp().getRoot().getAChild*()
16+
)
17+
}
18+
}

javascript/ql/src/Security/CWE-116/IncompleteMultiCharacterSanitization.ql

Lines changed: 1 addition & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -14,172 +14,7 @@
1414
*/
1515

1616
import javascript
17-
18-
/**
19-
* A regexp term that matches substrings that should be replaced with the empty string.
20-
*/
21-
class EmptyReplaceRegExpTerm extends RegExpTerm {
22-
EmptyReplaceRegExpTerm() {
23-
exists(StringReplaceCall replace |
24-
[replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
25-
this = replace.getRegExp().getRoot().getAChild*()
26-
)
27-
}
28-
}
29-
30-
/**
31-
* A prefix that may be dangerous to sanitize explicitly.
32-
*
33-
* Note that this class exists solely as a (necessary) optimization for this query.
34-
*/
35-
class DangerousPrefix extends string {
36-
DangerousPrefix() {
37-
this = ["/..", "../"] or
38-
this = "<!--" or
39-
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
40-
}
41-
}
42-
43-
/**
44-
* A substring of a prefix that may be dangerous to sanitize explicitly.
45-
*/
46-
class DangerousPrefixSubstring extends string {
47-
DangerousPrefixSubstring() {
48-
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
49-
}
50-
}
51-
52-
/**
53-
* Gets a dangerous prefix that is in the prefix language of `t`.
54-
*/
55-
DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
56-
result = getADangerousMatchedPrefixSubstring(t) and
57-
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
58-
}
59-
60-
private import semmle.javascript.security.regexp.NfaUtils as NfaUtils
61-
62-
/**
63-
* Gets a char from a dangerous prefix that is matched by `t`.
64-
*/
65-
pragma[noinline]
66-
DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
67-
t.isNullable() and result = ""
68-
or
69-
t.getAMatchedString() = result
70-
or
71-
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
72-
exists(NfaUtils::CharacterClass cc |
73-
cc = NfaUtils::getCanonicalCharClass(t) and
74-
cc.matches(result) and
75-
result.regexpMatch("\\w") and
76-
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
77-
not cc.matches(">")
78-
)
79-
or
80-
t instanceof RegExpDot and
81-
result.length() = 1
82-
or
83-
(
84-
t instanceof RegExpOpt or
85-
t instanceof RegExpStar or
86-
t instanceof RegExpPlus or
87-
t instanceof RegExpGroup or
88-
t instanceof RegExpAlt
89-
) and
90-
result = getADangerousMatchedChar(t.getAChild())
91-
}
92-
93-
/**
94-
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
95-
*
96-
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
97-
*/
98-
DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
99-
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
100-
or
101-
result = getADangerousMatchedChar(t)
102-
or
103-
// loop around for repetitions (only considering alphanumeric characters in the repetition)
104-
exists(RepetitionMatcher repetition | t = repetition |
105-
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
106-
)
107-
}
108-
109-
class RepetitionMatcher extends EmptyReplaceRegExpTerm {
110-
string char;
111-
112-
pragma[noinline]
113-
RepetitionMatcher() {
114-
(this instanceof RegExpPlus or this instanceof RegExpStar) and
115-
char = getADangerousMatchedChar(this.getAChild()) and
116-
char.regexpMatch("\\w")
117-
}
118-
119-
pragma[noinline]
120-
string getAChar() { result = char }
121-
}
122-
123-
/**
124-
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerablity of kind `kind`.
125-
*/
126-
predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
127-
prefix = getADangerousMatchedPrefix(t) and
128-
(
129-
kind = "path injection" and
130-
// upwards navigation
131-
prefix = ["/..", "../"] and
132-
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*") // explicit path name mentions make this an unlikely sanitizer
133-
or
134-
kind = "HTML element injection" and
135-
(
136-
// comments
137-
prefix = "<!--" and
138-
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*") // explicit comment content mentions make this an unlikely sanitizer
139-
or
140-
// specific tags
141-
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"] // the `cript|scrip` case has been observed in the wild several times
142-
)
143-
)
144-
or
145-
kind = "HTML attribute injection" and
146-
prefix =
147-
[
148-
// ordinary event handler prefix
149-
"on",
150-
// angular prefixes
151-
"ng-", "ng:", "data-ng-", "x-ng-"
152-
] and
153-
(
154-
// explicit matching: `onclick` and `ng-bind`
155-
t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
156-
or
157-
// regexp-based matching: `on[a-z]+`
158-
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
159-
start.getConstantValue().regexpMatch("(?i)[^a-z]*" + prefix) and
160-
isCommonWordMatcher(start.getSuccessor())
161-
)
162-
)
163-
}
164-
165-
/**
166-
* Holds if `t` is a common pattern for matching words
167-
*/
168-
predicate isCommonWordMatcher(RegExpTerm t) {
169-
exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) |
170-
// [a-z]+ and similar
171-
quantified
172-
.(RegExpCharacterClass)
173-
.getAChild()
174-
.(RegExpCharacterRange)
175-
.isRange(["a", "A"], ["z", "Z"])
176-
or
177-
// \w+ or [\w]+
178-
[quantified, quantified.(RegExpCharacterClass).getAChild()]
179-
.(RegExpCharacterClassEscape)
180-
.getValue() = "w"
181-
)
182-
}
17+
private import semmle.javascript.security.IncompleteMultiCharacterSanitization
18318

18419
from
18520
StringReplaceCall replace, EmptyReplaceRegExpTerm regexp, EmptyReplaceRegExpTerm dangerous,

0 commit comments

Comments
 (0)