Skip to content

Commit c234bd9

Browse files
committed
Ruby: IncompleteMultiCharacterSanitization Query
This query is similar to IncompleteSanitization but for multi-character sequences.
1 parent 6e289a9 commit c234bd9

File tree

6 files changed

+419
-0
lines changed

6 files changed

+419
-0
lines changed
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
/**
2+
* Provides predicates for reasoning about improper multi-character sanitization.
3+
*/
4+
5+
private import ruby
6+
private import codeql.ruby.regexp.RegExpTreeView as RETV
7+
private import codeql.ruby.security.performance.ReDoSUtil as ReDoSUtil
8+
private import codeql.ruby.DataFlow
9+
private import codeql.ruby.frameworks.core.String
10+
private import codeql.ruby.dataflow.internal.DataFlowDispatch
11+
12+
/**
13+
* A regexp term that matches substrings that should be replaced with the empty string.
14+
*/
15+
class EmptyReplaceRegExpTerm extends RETV::RegExpTerm {
16+
private StringSubstitutionCall call;
17+
18+
EmptyReplaceRegExpTerm() {
19+
call.getReplacementString() = "" and
20+
this = call.getPatternRegExp().getRegExpTerm().getAChild*()
21+
}
22+
23+
/**
24+
* Get the substitution call that uses this regexp term.
25+
*/
26+
StringSubstitutionCall getCall() { result = call }
27+
}
28+
29+
/**
30+
* A prefix that may be dangerous to sanitize explicitly.
31+
*
32+
* Note that this class exists solely as a (necessary) optimization for this query.
33+
*/
34+
private class DangerousPrefix extends string {
35+
DangerousPrefix() {
36+
this = ["/..", "../"] or
37+
this = "<!--" or
38+
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
39+
}
40+
}
41+
42+
/**
43+
* A substring of a prefix that may be dangerous to sanitize explicitly.
44+
*/
45+
private class DangerousPrefixSubstring extends string {
46+
DangerousPrefixSubstring() {
47+
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
48+
}
49+
}
50+
51+
/**
52+
* Gets a dangerous prefix that is in the prefix language of `t`.
53+
*/
54+
private DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
55+
result = getADangerousMatchedPrefixSubstring(t) and
56+
not exists(EmptyReplaceRegExpTerm pred |
57+
pred = t.getPredecessor+() and not pred.matchesEmptyString()
58+
)
59+
}
60+
61+
pragma[noinline]
62+
private DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
63+
t.matchesEmptyString() and result = ""
64+
or
65+
result = t.getAMatch()
66+
or
67+
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
68+
exists(ReDoSUtil::CharacterClass cc |
69+
cc = ReDoSUtil::getCanonicalCharClass(t) and
70+
cc.matches(result) and
71+
result.regexpMatch("\\w") and
72+
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
73+
not cc.matches(">")
74+
)
75+
or
76+
t instanceof RETV::RegExpDot and
77+
result.length() = 1
78+
or
79+
(
80+
t instanceof RETV::RegExpOpt or
81+
t instanceof RETV::RegExpStar or
82+
t instanceof RETV::RegExpPlus or
83+
t instanceof RETV::RegExpGroup or
84+
t instanceof RETV::RegExpAlt
85+
) and
86+
result = getADangerousMatchedChar(t.getAChild())
87+
}
88+
89+
/**
90+
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
91+
*
92+
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
93+
*/
94+
private DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
95+
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
96+
or
97+
result = getADangerousMatchedChar(t)
98+
or
99+
// loop around for repetitions (only considering alphanumeric characters in the repetition)
100+
exists(RepetitionMatcher repetition | t = repetition |
101+
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
102+
)
103+
}
104+
105+
private class RepetitionMatcher extends EmptyReplaceRegExpTerm {
106+
string char;
107+
108+
pragma[noinline]
109+
RepetitionMatcher() {
110+
(this instanceof RETV::RegExpPlus or this instanceof RETV::RegExpStar) and
111+
char = getADangerousMatchedChar(this.getAChild()) and
112+
char.regexpMatch("\\w")
113+
}
114+
115+
pragma[noinline]
116+
string getAChar() { result = char }
117+
}
118+
119+
/**
120+
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerability of kind `kind`.
121+
*/
122+
private predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
123+
prefix = getADangerousMatchedPrefix(t) and
124+
(
125+
kind = "path injection" and
126+
prefix = ["/..", "../"] and
127+
// If the regex is matching explicit path components, it is unlikely that it's being used as a sanitizer.
128+
not t.getSuccessor*().getAMatch().regexpMatch("(?is).*[a-z0-9_-].*")
129+
or
130+
kind = "HTML element injection" and
131+
(
132+
// comments
133+
prefix = "<!--" and
134+
// If the regex is matching explicit textual content of an HTML comment, it is unlikely that it's being used as a sanitizer.
135+
not t.getSuccessor*().getAMatch().regexpMatch("(?is).*[a-z0-9_].*")
136+
or
137+
// specific tags
138+
// the `cript|scrip` case has been observed in the wild several times
139+
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"]
140+
)
141+
)
142+
or
143+
kind = "HTML attribute injection" and
144+
prefix =
145+
[
146+
// ordinary event handler prefix
147+
"on",
148+
// angular prefixes
149+
"ng-", "ng:", "data-ng-", "x-ng-"
150+
] and
151+
(
152+
// explicit matching: `onclick` and `ng-bind`
153+
t.getAMatch().regexpMatch("(?i)" + prefix + "[a-z]+")
154+
or
155+
// regexp-based matching: `on[a-z]+`
156+
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
157+
start.getAMatch().regexpMatch("(?i)[^a-z]*" + prefix) and
158+
isCommonWordMatcher(start.getSuccessor())
159+
)
160+
)
161+
}
162+
163+
/**
164+
* Holds if `t` is a common pattern for matching words
165+
*/
166+
private predicate isCommonWordMatcher(RETV::RegExpTerm t) {
167+
exists(RETV::RegExpTerm quantified | quantified = t.(RETV::RegExpQuantifier).getChild(0) |
168+
// [a-z]+ and similar
169+
quantified
170+
.(RETV::RegExpCharacterClass)
171+
.getAChild()
172+
.(RETV::RegExpCharacterRange)
173+
.isRange(["a", "A"], ["z", "Z"])
174+
or
175+
// \w+ or [\w]+
176+
[quantified, quantified.(RETV::RegExpCharacterClass).getAChild()]
177+
.(RETV::RegExpCharacterClassEscape)
178+
.getValue() = "w"
179+
)
180+
}
181+
182+
/**
183+
* Holds if `replace` has a pattern argument containing a regular expression
184+
* `dangerous` which matches a dangerous string beginning with `prefix`, in
185+
* attempt to avoid a vulnerability of kind `kind`.
186+
*/
187+
predicate hasResult(
188+
StringSubstitutionCall replace, EmptyReplaceRegExpTerm dangerous, string prefix, string kind
189+
) {
190+
exists(EmptyReplaceRegExpTerm regexp |
191+
replace = regexp.getCall() and
192+
dangerous.getRootTerm() = regexp and
193+
// skip leading optional elements
194+
not dangerous.matchesEmptyString() and
195+
// only warn about the longest match
196+
prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length(), m) and
197+
// only warn once per kind
198+
not exists(EmptyReplaceRegExpTerm other |
199+
other = dangerous.getAChild+() or other = dangerous.getPredecessor+()
200+
|
201+
matchesDangerousPrefix(other, _, kind) and
202+
not other.matchesEmptyString()
203+
) and
204+
not exists(RETV::RegExpCaret c | regexp = c.getRootTerm()) and
205+
not exists(RETV::RegExpDollar d | regexp = d.getRootTerm()) and
206+
// Don't flag replace operations that are called repeatedly in a loop, as they can actually work correctly.
207+
not replace.flowsTo(replace.getReceiver+())
208+
)
209+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<include src="IncompleteSanitization.qhelp" />
7+
8+
</qhelp>
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/**
2+
* @name Incomplete multi-character sanitization
3+
* @description A sanitizer that removes a sequence of characters may reintroduce the dangerous sequence.
4+
* @kind problem
5+
* @problem.severity warning
6+
* @security-severity 7.8
7+
* @precision high
8+
* @id rb/incomplete-multi-character-sanitization
9+
* @tags correctness
10+
* security
11+
* external/cwe/cwe-020
12+
* external/cwe/cwe-080
13+
* external/cwe/cwe-116
14+
*/
15+
16+
import ruby
17+
import codeql.ruby.frameworks.core.String
18+
import codeql.ruby.DataFlow
19+
import codeql.ruby.security.IncompleteMultiCharacterSanitizationQuery
20+
21+
from StringSubstitutionCall replace, EmptyReplaceRegExpTerm dangerous, string prefix, string kind
22+
where hasResult(replace, dangerous, prefix, kind)
23+
select replace, "This string may still contain $@, which may cause a " + kind + " vulnerability.",
24+
dangerous, prefix

ruby/ql/test/query-tests/security/cwe-116/IncompleteMultiCharacterSanitization/IncompleteMultiCharacterSanitization.expected

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/**
2+
* @kind problem
3+
*/
4+
5+
import ruby
6+
import codeql.ruby.security.IncompleteMultiCharacterSanitizationQuery as Query
7+
import codeql.ruby.frameworks.core.String
8+
import TestUtilities.InlineExpectationsTest
9+
10+
class Test extends InlineExpectationsTest {
11+
Test() { this = "IncompleteMultiCharacterSanitizationTest" }
12+
13+
override string getARelevantTag() { result = "hasResult" }
14+
15+
override predicate hasActualResult(Location location, string element, string tag, string value) {
16+
tag = "hasResult" and
17+
hasResult(location, element, value)
18+
}
19+
}
20+
21+
predicate hasResult(Location location, string element, string value) {
22+
exists(
23+
StringSubstitutionCall replace, Query::EmptyReplaceRegExpTerm dangerous, string prefix,
24+
string kind
25+
|
26+
replace.getLocation() = location and
27+
element = replace.toString() and
28+
value = shortKind(kind)
29+
|
30+
Query::hasResult(replace, dangerous, prefix, kind)
31+
)
32+
}
33+
34+
bindingset[kind]
35+
string shortKind(string kind) {
36+
kind = "HTML element injection" and result = "html"
37+
or
38+
kind = "path injection" and result = "path"
39+
or
40+
kind = "HTML attribute injection" and result = "attr"
41+
}

0 commit comments

Comments
 (0)