Skip to content

Commit e046690

Browse files
authored
Merge pull request github#12992 from Sim4n6/ruby-UBV
[Ruby] Add Unicode Bypass Validation query, test and help file
2 parents baabd2d + 52dd247 commit e046690

File tree

9 files changed

+352
-0
lines changed

9 files changed

+352
-0
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for detecting
3+
* "Unicode transformation"
4+
* vulnerabilities, as well as extension points for adding your own.
5+
*/
6+
7+
private import ruby
8+
9+
/**
10+
* Provides default sources, sinks and sanitizers for detecting
11+
* "Unicode transformation"
12+
* vulnerabilities, as well as extension points for adding your own.
13+
*/
14+
module UnicodeBypassValidation {
15+
/**
16+
* A data flow source for "Unicode transformation" vulnerabilities.
17+
*/
18+
abstract class Source extends DataFlow::Node { }
19+
20+
/**
21+
* A data flow sink for "Unicode transformation" vulnerabilities.
22+
*/
23+
abstract class Sink extends DataFlow::Node { }
24+
25+
/**
26+
* A sanitizer for "Unicode transformation" vulnerabilities.
27+
*/
28+
abstract class Sanitizer extends DataFlow::Node { }
29+
}
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/**
2+
* Provides a taint-tracking configuration for detecting "Unicode transformation mishandling" vulnerabilities.
3+
*/
4+
5+
private import ruby
6+
private import codeql.ruby.dataflow.RemoteFlowSources
7+
private import codeql.ruby.Concepts
8+
private import codeql.ruby.TaintTracking
9+
private import codeql.ruby.ApiGraphs
10+
import UnicodeBypassValidationCustomizations::UnicodeBypassValidation
11+
12+
/** A state signifying that a logical validation has not been performed. */
13+
class PreValidation extends DataFlow::FlowState {
14+
PreValidation() { this = "PreValidation" }
15+
}
16+
17+
/** A state signifying that a logical validation has been performed. */
18+
class PostValidation extends DataFlow::FlowState {
19+
PostValidation() { this = "PostValidation" }
20+
}
21+
22+
/**
23+
* A taint-tracking configuration for detecting "Unicode transformation mishandling" vulnerabilities.
24+
*
25+
* This configuration uses two flow states, `PreValidation` and `PostValidation`,
26+
* to track the requirement that a logical validation has been performed before the Unicode Transformation.
27+
*/
28+
class Configuration extends TaintTracking::Configuration {
29+
Configuration() { this = "UnicodeBypassValidation" }
30+
31+
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
32+
source instanceof RemoteFlowSource and state instanceof PreValidation
33+
}
34+
35+
override predicate isAdditionalTaintStep(
36+
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
37+
DataFlow::FlowState stateTo
38+
) {
39+
(
40+
exists(Escaping escaping | nodeFrom = escaping.getAnInput() and nodeTo = escaping.getOutput())
41+
or
42+
exists(RegexExecution re | nodeFrom = re.getString() and nodeTo = re)
43+
or
44+
// String Manipulation Method Calls
45+
// https://ruby-doc.org/core-2.7.0/String.html
46+
exists(DataFlow::CallNode cn |
47+
cn.getMethodName() =
48+
[
49+
[
50+
"ljust", "lstrip", "succ", "next", "rjust", "capitalize", "chomp", "gsub", "chop",
51+
"downcase", "swapcase", "uprcase", "scrub", "slice", "squeeze", "strip", "sub",
52+
"tr", "tr_s", "reverse"
53+
] + ["", "!"], "concat", "dump", "each_line", "replace", "insert", "inspect", "lines",
54+
"partition", "prepend", "replace", "rpartition", "scan", "split", "undump",
55+
"unpack" + ["", "1"]
56+
] and
57+
nodeFrom = cn.getReceiver() and
58+
nodeTo = cn
59+
)
60+
or
61+
exists(DataFlow::CallNode cn |
62+
cn.getMethodName() =
63+
[
64+
"casecmp" + ["", "?"], "center", "count", "each_char", "index", "rindex", "sum",
65+
["delete", "delete_prefix", "delete_suffix"] + ["", "!"],
66+
["start_with", "end_with" + "eql", "include"] + ["?", "!"], "match" + ["", "?"],
67+
] and
68+
nodeFrom = cn.getReceiver() and
69+
nodeTo = nodeFrom
70+
)
71+
or
72+
exists(DataFlow::CallNode cn |
73+
cn = API::getTopLevelMember("CGI").getAMethodCall("escapeHTML") and
74+
nodeFrom = cn.getArgument(0) and
75+
nodeTo = cn
76+
)
77+
) and
78+
stateFrom instanceof PreValidation and
79+
stateTo instanceof PostValidation
80+
}
81+
82+
/* A Unicode Tranformation (Unicode tranformation) is considered a sink when the algorithm used is either NFC or NFKC. */
83+
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
84+
(
85+
exists(DataFlow::CallNode cn |
86+
cn.getMethodName() = "unicode_normalize" and
87+
cn.getArgument(0).getConstantValue().getSymbol() = ["nfkc", "nfc", "nfkd", "nfd"] and
88+
sink = cn.getReceiver()
89+
)
90+
or
91+
// unicode_utils
92+
exists(API::MethodAccessNode mac |
93+
mac = API::getTopLevelMember("UnicodeUtils").getMethod(["nfkd", "nfc", "nfd", "nfkc"]) and
94+
sink = mac.getParameter(0).asSink()
95+
)
96+
or
97+
// eprun
98+
exists(API::MethodAccessNode mac |
99+
mac = API::getTopLevelMember("Eprun").getMethod("normalize") and
100+
sink = mac.getParameter(0).asSink()
101+
)
102+
or
103+
// unf
104+
exists(API::MethodAccessNode mac |
105+
mac = API::getTopLevelMember("UNF").getMember("Normalizer").getMethod("normalize") and
106+
sink = mac.getParameter(0).asSink()
107+
)
108+
or
109+
// ActiveSupport::Multibyte::Chars
110+
exists(DataFlow::CallNode cn, DataFlow::CallNode n |
111+
cn =
112+
API::getTopLevelMember("ActiveSupport")
113+
.getMember("Multibyte")
114+
.getMember("Chars")
115+
.getMethod("new")
116+
.getCallNode() and
117+
n = cn.getAMethodCall("normalize") and
118+
sink = cn.getArgument(0)
119+
)
120+
) and
121+
state instanceof PostValidation
122+
}
123+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
2+
<qhelp>
3+
<overview>
4+
<p>Security checks bypass due to a Unicode transformation</p>
5+
<p>
6+
If ever a unicode tranformation is performed after some security checks or logical
7+
validation, the
8+
latter could be bypassed due to a potential Unicode characters collision.
9+
The validation of concern are any character escaping, any regex validation or any string
10+
verification.
11+
</p>
12+
</overview>
13+
<recommendation>
14+
<p> Perform a Unicode normalization before the logical validation. </p>
15+
</recommendation>
16+
<example>
17+
18+
<p> The following example showcases the bypass of all checks performed by <code>
19+
html_escape()</code> due to a post-unicode normalization.</p>
20+
<p>For instance: the character U+FE64 (<code>﹤</code>) is not filtered-out by the
21+
html_escape() function. But due to the Unicode normalization, the character is
22+
transformed and would become U+003C (<code> &lt; </code> ).</p>
23+
24+
<sample src="./examples/unicode_normalization.rb" />
25+
26+
</example>
27+
<example>
28+
29+
<p> The next example shows how an early deletion of a character may be bypassed due to a
30+
potential Unicode character collision.</p>
31+
<p>The character <code>&lt;</code> was expected to be omitted from the string <code>s</code>.
32+
However, a malicious user may consider using its colliding Unicode character U+FE64 <code>
33+
﹤</code> as an alternative. Due to the Late-Unicode normalization with the form NFKC,
34+
the resulting string would contain the unintended character <code>&lt;</code> . </p>
35+
36+
<sample src="./examples/unicode_normalization2.rb" />
37+
38+
</example>
39+
<references>
40+
<li> Research study: <a
41+
href="https://gosecure.github.io/presentations/2021-02-unicode-owasp-toronto/philippe_arteau_owasp_unicode_v4.pdf">
42+
Unicode vulnerabilities that could bYte you
43+
</a>
44+
</li>
45+
<li>
46+
<a
47+
href="https://gosecure.github.io/unicode-pentester-cheatsheet/">Unicode pentest
48+
cheatsheet</a>. </li>
49+
</references>
50+
</qhelp>
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/**
2+
* @name Bypass Logical Validation Using Unicode Characters
3+
* @description A Unicode transformation is using a remote user-controlled data. The transformation is a Unicode normalization using the algorithms "NFC" or "NFKC". In all cases, the security measures implemented or the logical validation performed to escape any injection characters, to validate using regex patterns or to perform string-based checks, before the Unicode transformation are **bypassable** by special Unicode characters.
4+
* @kind path-problem
5+
* @id rb/unicode-bypass-validation
6+
* @precision high
7+
* @problem.severity error
8+
* @tags security
9+
* experimental
10+
* external/cwe/cwe-176
11+
* external/cwe/cwe-179
12+
* external/cwe/cwe-180
13+
*/
14+
15+
import ruby
16+
import codeql.ruby.experimental.UnicodeBypassValidationQuery
17+
import DataFlow::PathGraph
18+
19+
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
20+
where config.hasFlowPath(source, sink)
21+
select sink.getNode(), source, sink,
22+
"This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters.",
23+
sink.getNode(), "Unicode transformation (Unicode normalization)", source.getNode(),
24+
"remote user-controlled data"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
require "erb"
2+
3+
class UnicodeNormalizationHtMLSafeController < ActionController::Base
4+
def unicodeNormalize
5+
unicode_input = params[:unicode_input]
6+
unicode_html_safe = ERB::Util.html_escape(unicode_input)
7+
normalized_nfkc = unicode_html_safe.unicode_normalize(:nfkc) # $result=BAD
8+
normalized_nfc = unicode_html_safe.unicode_normalize(:nfc) # $result=BAD
9+
end
10+
end
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
s = "﹤xss>"
2+
puts s.delete("<").unicode_normalize(:nfkc).include?("<")
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
edges
2+
| unicode_normalization.rb:7:5:7:17 | unicode_input | unicode_normalization.rb:8:23:8:35 | unicode_input |
3+
| unicode_normalization.rb:7:5:7:17 | unicode_input | unicode_normalization.rb:9:22:9:34 | unicode_input |
4+
| unicode_normalization.rb:7:21:7:26 | call to params | unicode_normalization.rb:7:21:7:42 | ...[...] |
5+
| unicode_normalization.rb:7:21:7:42 | ...[...] | unicode_normalization.rb:7:5:7:17 | unicode_input |
6+
| unicode_normalization.rb:15:5:15:17 | unicode_input | unicode_normalization.rb:16:27:16:39 | unicode_input |
7+
| unicode_normalization.rb:15:5:15:17 | unicode_input | unicode_normalization.rb:16:27:16:39 | unicode_input |
8+
| unicode_normalization.rb:15:21:15:26 | call to params | unicode_normalization.rb:15:21:15:42 | ...[...] |
9+
| unicode_normalization.rb:15:21:15:26 | call to params | unicode_normalization.rb:15:21:15:42 | ...[...] |
10+
| unicode_normalization.rb:15:21:15:42 | ...[...] | unicode_normalization.rb:15:5:15:17 | unicode_input |
11+
| unicode_normalization.rb:15:21:15:42 | ...[...] | unicode_normalization.rb:15:5:15:17 | unicode_input |
12+
| unicode_normalization.rb:16:5:16:23 | unicode_input_manip | unicode_normalization.rb:17:23:17:41 | unicode_input_manip |
13+
| unicode_normalization.rb:16:5:16:23 | unicode_input_manip | unicode_normalization.rb:18:22:18:40 | unicode_input_manip |
14+
| unicode_normalization.rb:16:27:16:39 | unicode_input | unicode_normalization.rb:16:27:16:59 | call to sub |
15+
| unicode_normalization.rb:16:27:16:39 | unicode_input | unicode_normalization.rb:16:27:16:59 | call to sub |
16+
| unicode_normalization.rb:16:27:16:59 | call to sub | unicode_normalization.rb:16:5:16:23 | unicode_input_manip |
17+
| unicode_normalization.rb:24:5:24:17 | unicode_input | unicode_normalization.rb:25:37:25:49 | unicode_input |
18+
| unicode_normalization.rb:24:21:24:26 | call to params | unicode_normalization.rb:24:21:24:42 | ...[...] |
19+
| unicode_normalization.rb:24:21:24:42 | ...[...] | unicode_normalization.rb:24:5:24:17 | unicode_input |
20+
| unicode_normalization.rb:25:5:25:21 | unicode_html_safe | unicode_normalization.rb:26:23:26:39 | unicode_html_safe |
21+
| unicode_normalization.rb:25:5:25:21 | unicode_html_safe | unicode_normalization.rb:27:22:27:38 | unicode_html_safe |
22+
| unicode_normalization.rb:25:25:25:50 | call to html_escape | unicode_normalization.rb:25:5:25:21 | unicode_html_safe |
23+
| unicode_normalization.rb:25:37:25:49 | unicode_input | unicode_normalization.rb:25:25:25:50 | call to html_escape |
24+
| unicode_normalization.rb:33:5:33:17 | unicode_input | unicode_normalization.rb:34:40:34:52 | unicode_input |
25+
| unicode_normalization.rb:33:21:33:26 | call to params | unicode_normalization.rb:33:21:33:42 | ...[...] |
26+
| unicode_normalization.rb:33:21:33:42 | ...[...] | unicode_normalization.rb:33:5:33:17 | unicode_input |
27+
| unicode_normalization.rb:34:5:34:21 | unicode_html_safe | unicode_normalization.rb:35:23:35:39 | unicode_html_safe |
28+
| unicode_normalization.rb:34:5:34:21 | unicode_html_safe | unicode_normalization.rb:36:22:36:38 | unicode_html_safe |
29+
| unicode_normalization.rb:34:25:34:53 | call to escapeHTML | unicode_normalization.rb:34:25:34:63 | call to html_safe |
30+
| unicode_normalization.rb:34:25:34:63 | call to html_safe | unicode_normalization.rb:34:5:34:21 | unicode_html_safe |
31+
| unicode_normalization.rb:34:40:34:52 | unicode_input | unicode_normalization.rb:34:25:34:53 | call to escapeHTML |
32+
nodes
33+
| unicode_normalization.rb:7:5:7:17 | unicode_input | semmle.label | unicode_input |
34+
| unicode_normalization.rb:7:21:7:26 | call to params | semmle.label | call to params |
35+
| unicode_normalization.rb:7:21:7:42 | ...[...] | semmle.label | ...[...] |
36+
| unicode_normalization.rb:8:23:8:35 | unicode_input | semmle.label | unicode_input |
37+
| unicode_normalization.rb:9:22:9:34 | unicode_input | semmle.label | unicode_input |
38+
| unicode_normalization.rb:15:5:15:17 | unicode_input | semmle.label | unicode_input |
39+
| unicode_normalization.rb:15:5:15:17 | unicode_input | semmle.label | unicode_input |
40+
| unicode_normalization.rb:15:21:15:26 | call to params | semmle.label | call to params |
41+
| unicode_normalization.rb:15:21:15:42 | ...[...] | semmle.label | ...[...] |
42+
| unicode_normalization.rb:15:21:15:42 | ...[...] | semmle.label | ...[...] |
43+
| unicode_normalization.rb:16:5:16:23 | unicode_input_manip | semmle.label | unicode_input_manip |
44+
| unicode_normalization.rb:16:27:16:39 | unicode_input | semmle.label | unicode_input |
45+
| unicode_normalization.rb:16:27:16:39 | unicode_input | semmle.label | unicode_input |
46+
| unicode_normalization.rb:16:27:16:59 | call to sub | semmle.label | call to sub |
47+
| unicode_normalization.rb:17:23:17:41 | unicode_input_manip | semmle.label | unicode_input_manip |
48+
| unicode_normalization.rb:18:22:18:40 | unicode_input_manip | semmle.label | unicode_input_manip |
49+
| unicode_normalization.rb:24:5:24:17 | unicode_input | semmle.label | unicode_input |
50+
| unicode_normalization.rb:24:21:24:26 | call to params | semmle.label | call to params |
51+
| unicode_normalization.rb:24:21:24:42 | ...[...] | semmle.label | ...[...] |
52+
| unicode_normalization.rb:25:5:25:21 | unicode_html_safe | semmle.label | unicode_html_safe |
53+
| unicode_normalization.rb:25:25:25:50 | call to html_escape | semmle.label | call to html_escape |
54+
| unicode_normalization.rb:25:37:25:49 | unicode_input | semmle.label | unicode_input |
55+
| unicode_normalization.rb:26:23:26:39 | unicode_html_safe | semmle.label | unicode_html_safe |
56+
| unicode_normalization.rb:27:22:27:38 | unicode_html_safe | semmle.label | unicode_html_safe |
57+
| unicode_normalization.rb:33:5:33:17 | unicode_input | semmle.label | unicode_input |
58+
| unicode_normalization.rb:33:21:33:26 | call to params | semmle.label | call to params |
59+
| unicode_normalization.rb:33:21:33:42 | ...[...] | semmle.label | ...[...] |
60+
| unicode_normalization.rb:34:5:34:21 | unicode_html_safe | semmle.label | unicode_html_safe |
61+
| unicode_normalization.rb:34:25:34:53 | call to escapeHTML | semmle.label | call to escapeHTML |
62+
| unicode_normalization.rb:34:25:34:63 | call to html_safe | semmle.label | call to html_safe |
63+
| unicode_normalization.rb:34:40:34:52 | unicode_input | semmle.label | unicode_input |
64+
| unicode_normalization.rb:35:23:35:39 | unicode_html_safe | semmle.label | unicode_html_safe |
65+
| unicode_normalization.rb:36:22:36:38 | unicode_html_safe | semmle.label | unicode_html_safe |
66+
subpaths
67+
#select
68+
| unicode_normalization.rb:8:23:8:35 | unicode_input | unicode_normalization.rb:7:21:7:26 | call to params | unicode_normalization.rb:8:23:8:35 | unicode_input | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:8:23:8:35 | unicode_input | Unicode transformation (Unicode normalization) | unicode_normalization.rb:7:21:7:26 | call to params | remote user-controlled data |
69+
| unicode_normalization.rb:9:22:9:34 | unicode_input | unicode_normalization.rb:7:21:7:26 | call to params | unicode_normalization.rb:9:22:9:34 | unicode_input | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:9:22:9:34 | unicode_input | Unicode transformation (Unicode normalization) | unicode_normalization.rb:7:21:7:26 | call to params | remote user-controlled data |
70+
| unicode_normalization.rb:17:23:17:41 | unicode_input_manip | unicode_normalization.rb:15:21:15:26 | call to params | unicode_normalization.rb:17:23:17:41 | unicode_input_manip | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:17:23:17:41 | unicode_input_manip | Unicode transformation (Unicode normalization) | unicode_normalization.rb:15:21:15:26 | call to params | remote user-controlled data |
71+
| unicode_normalization.rb:18:22:18:40 | unicode_input_manip | unicode_normalization.rb:15:21:15:26 | call to params | unicode_normalization.rb:18:22:18:40 | unicode_input_manip | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:18:22:18:40 | unicode_input_manip | Unicode transformation (Unicode normalization) | unicode_normalization.rb:15:21:15:26 | call to params | remote user-controlled data |
72+
| unicode_normalization.rb:26:23:26:39 | unicode_html_safe | unicode_normalization.rb:24:21:24:26 | call to params | unicode_normalization.rb:26:23:26:39 | unicode_html_safe | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:26:23:26:39 | unicode_html_safe | Unicode transformation (Unicode normalization) | unicode_normalization.rb:24:21:24:26 | call to params | remote user-controlled data |
73+
| unicode_normalization.rb:27:22:27:38 | unicode_html_safe | unicode_normalization.rb:24:21:24:26 | call to params | unicode_normalization.rb:27:22:27:38 | unicode_html_safe | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:27:22:27:38 | unicode_html_safe | Unicode transformation (Unicode normalization) | unicode_normalization.rb:24:21:24:26 | call to params | remote user-controlled data |
74+
| unicode_normalization.rb:35:23:35:39 | unicode_html_safe | unicode_normalization.rb:33:21:33:26 | call to params | unicode_normalization.rb:35:23:35:39 | unicode_html_safe | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:35:23:35:39 | unicode_html_safe | Unicode transformation (Unicode normalization) | unicode_normalization.rb:33:21:33:26 | call to params | remote user-controlled data |
75+
| unicode_normalization.rb:36:22:36:38 | unicode_html_safe | unicode_normalization.rb:33:21:33:26 | call to params | unicode_normalization.rb:36:22:36:38 | unicode_html_safe | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:36:22:36:38 | unicode_html_safe | Unicode transformation (Unicode normalization) | unicode_normalization.rb:33:21:33:26 | call to params | remote user-controlled data |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
experimental/cwe-176/UnicodeBypassValidation.ql

0 commit comments

Comments
 (0)