Skip to content

Commit 59a8b21

Browse files
authored
Merge pull request #10862 from erik-krogh/unsafeCodeConstruction
Rb: Add an `unsafe-code-construction` query
2 parents 7f880a2 + f2658a0 commit 59a8b21

19 files changed

+472
-1
lines changed
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for reasoning about
3+
* code constructed from library input vulnerabilities, as
4+
* well as extension points for adding your own.
5+
*/
6+
7+
private import ruby
8+
private import codeql.ruby.ApiGraphs
9+
private import codeql.ruby.frameworks.core.Gem::Gem as Gem
10+
private import codeql.ruby.Concepts as Concepts
11+
12+
/**
13+
* Module containing sources, sinks, and sanitizers for code constructed from library input.
14+
*/
15+
module UnsafeCodeConstruction {
16+
/** A source for code constructed from library input vulnerabilities. */
17+
abstract class Source extends DataFlow::Node { }
18+
19+
/** An input parameter to a gem seen as a source. */
20+
private class LibraryInputAsSource extends Source instanceof DataFlow::ParameterNode {
21+
LibraryInputAsSource() {
22+
this = Gem::getALibraryInput() and
23+
not this.getName() = "code"
24+
}
25+
}
26+
27+
/** A sink for code constructed from library input vulnerabilities. */
28+
abstract class Sink extends DataFlow::Node {
29+
/**
30+
* Gets the node where the unsafe code is executed.
31+
*/
32+
abstract DataFlow::Node getCodeSink();
33+
34+
/**
35+
* Gets the type of sink.
36+
*/
37+
string getSinkType() { result = "code construction" }
38+
}
39+
40+
/** Gets a node that is eventually executed as code at `codeExec`. */
41+
DataFlow::Node getANodeExecutedAsCode(Concepts::CodeExecution codeExec) {
42+
result = getANodeExecutedAsCode(TypeTracker::TypeBackTracker::end(), codeExec)
43+
}
44+
45+
import codeql.ruby.typetracking.TypeTracker as TypeTracker
46+
47+
/** Gets a node that is eventually executed as code at `codeExec`, type-tracked with `t`. */
48+
private DataFlow::LocalSourceNode getANodeExecutedAsCode(
49+
TypeTracker::TypeBackTracker t, Concepts::CodeExecution codeExec
50+
) {
51+
t.start() and
52+
result = codeExec.getCode().getALocalSource() and
53+
codeExec.runsArbitraryCode() // methods like `Object.send` is benign here, because of the string-construction the attacker cannot control the entire method name
54+
or
55+
exists(TypeTracker::TypeBackTracker t2 |
56+
result = getANodeExecutedAsCode(t2, codeExec).backtrack(t2, t)
57+
)
58+
}
59+
60+
/**
61+
* A string constructed using a `.join(...)` call, where the resulting string ends up being executed as code.
62+
*/
63+
class ArrayJoin extends Sink {
64+
Concepts::CodeExecution s;
65+
66+
ArrayJoin() {
67+
exists(DataFlow::CallNode call |
68+
call.getMethodName() = "join" and
69+
call.getNumberOfArguments() = 1 and // any string. E.g. ";" or "\n".
70+
call = getANodeExecutedAsCode(s) and
71+
this = call.getReceiver()
72+
)
73+
}
74+
75+
override DataFlow::Node getCodeSink() { result = s }
76+
77+
override string getSinkType() { result = "array" }
78+
}
79+
80+
/**
81+
* A string constructed from a string-literal (e.g. `"foo #{sink}"`),
82+
* where the resulting string ends up being executed as a code.
83+
*/
84+
class StringInterpolationAsSink extends Sink {
85+
Concepts::CodeExecution s;
86+
87+
StringInterpolationAsSink() {
88+
exists(Ast::StringlikeLiteral lit |
89+
any(DataFlow::Node n | n.asExpr().getExpr() = lit) = getANodeExecutedAsCode(s) and
90+
this.asExpr().getExpr() = lit.getComponent(_)
91+
)
92+
}
93+
94+
override DataFlow::Node getCodeSink() { result = s }
95+
96+
override string getSinkType() { result = "string interpolation" }
97+
}
98+
99+
import codeql.ruby.security.TaintedFormatStringSpecific as TaintedFormat
100+
101+
/**
102+
* A string constructed from a printf-style call,
103+
* where the resulting string ends up being executed as a code.
104+
*/
105+
class TaintedFormatStringAsSink extends Sink {
106+
Concepts::CodeExecution s;
107+
108+
TaintedFormatStringAsSink() {
109+
exists(TaintedFormat::PrintfStyleCall call |
110+
call = getANodeExecutedAsCode(s) and
111+
this = [call.getFormatArgument(_), call.getFormatString()]
112+
)
113+
}
114+
115+
override DataFlow::Node getCodeSink() { result = s }
116+
117+
override string getSinkType() { result = "string format" }
118+
}
119+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/**
2+
* Provides a taint-tracking configuration for reasoning about code
3+
* constructed from library input vulnerabilities.
4+
*
5+
* Note, for performance reasons: only import this file if `Configuration` is needed,
6+
* otherwise `UnsafeCodeConstructionCustomizations` should be imported instead.
7+
*/
8+
9+
import codeql.ruby.DataFlow
10+
import UnsafeCodeConstructionCustomizations::UnsafeCodeConstruction
11+
private import codeql.ruby.TaintTracking
12+
private import codeql.ruby.dataflow.BarrierGuards
13+
14+
/**
15+
* A taint-tracking configuration for detecting code constructed from library input vulnerabilities.
16+
*/
17+
class Configuration extends TaintTracking::Configuration {
18+
Configuration() { this = "UnsafeShellCommandConstruction" }
19+
20+
override predicate isSource(DataFlow::Node source) { source instanceof Source }
21+
22+
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
23+
24+
override predicate isSanitizer(DataFlow::Node node) {
25+
node instanceof StringConstCompareBarrier or
26+
node instanceof StringConstArrayInclusionCallBarrier
27+
}
28+
29+
// override to require the path doesn't have unmatched return steps
30+
override DataFlow::FlowFeature getAFeature() {
31+
result instanceof DataFlow::FeatureHasSourceCallContext
32+
}
33+
34+
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
35+
// if an array element gets tainted, then we treat the entire array as tainted
36+
exists(DataFlow::CallNode call |
37+
call.getMethodName() = ["<<", "push", "append"] and
38+
call.getReceiver() = succ and
39+
pred = call.getArgument(0) and
40+
call.getNumberOfArguments() = 1
41+
)
42+
or
43+
exists(DataFlow::CallNode call |
44+
call.getMethodName() = "[]" and
45+
succ = call and
46+
pred = call.getArgument(_)
47+
)
48+
}
49+
}

ruby/ql/lib/codeql/ruby/security/UnsafeShellCommandConstructionCustomizations.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ module UnsafeShellCommandConstruction {
6767
*/
6868
class StringInterpolationAsSink extends Sink {
6969
Concepts::SystemCommandExecution s;
70-
Ast::StringLiteral lit;
70+
Ast::StringlikeLiteral lit;
7171

7272
StringInterpolationAsSink() {
7373
isUsedAsShellCommand(any(DataFlow::Node n | n.asExpr().getExpr() = lit), s) and
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: newQuery
3+
---
4+
* Added a new query, `rb/unsafe-code-construction`, to detect libraries that unsafely construct code from their inputs.
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
When a library function dynamically constructs code in a potentially unsafe way,
9+
it's important to document to clients of the library that the function should only be
10+
used with trusted inputs.
11+
12+
If the function is not documented as being potentially unsafe, then a client may
13+
incorrectly use inputs containing unsafe code fragments, and thereby leave the
14+
client vulnerable to code-injection attacks.
15+
</p>
16+
17+
</overview>
18+
19+
<recommendation>
20+
<p>
21+
Properly document library functions that construct code from unsanitized
22+
inputs, or avoid constructing code in the first place.
23+
</p>
24+
</recommendation>
25+
26+
<example>
27+
<p>
28+
The following example shows two methods implemented using <code>eval</code>: a simple
29+
deserialization routine and a getter method.
30+
If untrusted inputs are used with these methods,
31+
then an attacker might be able to execute arbitrary code on the system.
32+
</p>
33+
34+
<sample src="examples/UnsafeCodeConstruction.rb" />
35+
36+
<p>
37+
To avoid this problem, either properly document that the function is potentially
38+
unsafe, or use an alternative solution such as <code>JSON.parse</code> or another library
39+
that does not allow arbitrary code to be executed.
40+
</p>
41+
42+
<sample src="examples/UnsafeCodeConstructionSafe.rb" />
43+
44+
</example>
45+
46+
<example>
47+
<p>
48+
As another example, consider the below code which dynamically constructs
49+
a class that has a getter method with a custom name.
50+
</p>
51+
52+
<sample src="examples/UnsafeCodeConstruction2.rb" />
53+
54+
<p>
55+
The example dynamically constructs a string which is then executed using <code>module_eval</code>.
56+
This code will break if the specified name is not a valid Ruby identifier, and
57+
if the value is controlled by an attacker, then this could lead to code-injection.
58+
</p>
59+
60+
<p>
61+
A more robust implementation, that is also immune to code-injection,
62+
can be made by using <code>module_eval</code> with a block and using <code>define_method</code>
63+
to define the getter method.
64+
</p>
65+
66+
<sample src="examples/UnsafeCodeConstruction2Safe.rb" />
67+
</example>
68+
69+
<example>
70+
<p>
71+
This example dynamically registers a method on another class which
72+
forwards its arguments to a target object. This approach uses
73+
<code>module_eval</code> and string interpolation to construct class variables
74+
and methods.
75+
</p>
76+
77+
<sample src="examples/UnsafeCodeConstruction3.rb" />
78+
79+
<p>
80+
A safer approach is to use <code>class_variable_set</code> and
81+
<code>class_variable_get</code> along with <code>define_method</code>. String
82+
interpolation is still used to construct the class variable name, but this is
83+
safe because <code>class_variable_set</code> is not susceptible to code-injection.
84+
</p>
85+
86+
<p>
87+
<code>send</code> is used to dynamically call the method specified by <code>name</code>.
88+
This is a more robust alternative than the previous example, because it does not allow
89+
arbitrary code to be executed, but it does still allow for any method to be called
90+
on the target object.
91+
</p>
92+
93+
<sample src="examples/UnsafeCodeConstruction3Safe.rb" />
94+
</example>
95+
96+
<references>
97+
<li>
98+
OWASP:
99+
<a href="https://www.owasp.org/index.php/Code_Injection">Code Injection</a>.
100+
</li>
101+
<li>
102+
Wikipedia: <a href="https://en.wikipedia.org/wiki/Code_injection">Code Injection</a>.
103+
</li>
104+
<li>
105+
Ruby documentation: <a href="https://docs.ruby-lang.org/en/3.2/Module.html#method-i-define_method">define_method</a>.
106+
</li>
107+
<li>
108+
Ruby documentation: <a href="https://docs.ruby-lang.org/en/3.2/Module.html#method-i-class_variable_set">class_variable_set</a>.
109+
</li>
110+
</references>
111+
</qhelp>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/**
2+
* @name Unsafe code constructed from library input
3+
* @description Using externally controlled strings to construct code may allow a malicious
4+
* user to execute arbitrary code.
5+
* @kind path-problem
6+
* @problem.severity warning
7+
* @security-severity 6.1
8+
* @precision medium
9+
* @id rb/unsafe-code-construction
10+
* @tags security
11+
* external/cwe/cwe-094
12+
* external/cwe/cwe-079
13+
* external/cwe/cwe-116
14+
*/
15+
16+
import codeql.ruby.security.UnsafeCodeConstructionQuery
17+
import DataFlow::PathGraph
18+
19+
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode
20+
where cfg.hasFlowPath(source, sink) and sinkNode = sink.getNode()
21+
select sink.getNode(), source, sink,
22+
"This " + sinkNode.getSinkType() + " which depends on $@ is later $@.", source.getNode(),
23+
"library input", sinkNode.getCodeSink(), "interpreted as code"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
module MyLib
2+
def unsafeDeserialize(value)
3+
eval("foo = #{value}")
4+
foo
5+
end
6+
7+
def unsafeGetter(obj, path)
8+
eval("obj.#{path}")
9+
end
10+
end
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
require 'json'
2+
3+
module BadMakeGetter
4+
# Makes a class with a method named `getter_name` that returns `val`
5+
def self.define_getter_class(getter_name, val)
6+
new_class = Class.new
7+
new_class.module_eval <<-END
8+
def #{getter_name}
9+
#{JSON.dump(val)}
10+
end
11+
END
12+
new_class
13+
end
14+
end
15+
16+
one = BadMakeGetter.define_getter_class(:one, "foo")
17+
puts "One is #{one.new.one}"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Uses `define_method` instead of constructing a string
2+
module GoodMakeGetter
3+
def self.define_getter_class(getter_name, val)
4+
new_class = Class.new
5+
new_class.module_eval do
6+
define_method(getter_name) { val }
7+
end
8+
new_class
9+
end
10+
end
11+
12+
two = GoodMakeGetter.define_getter_class(:two, "bar")
13+
puts "Two is #{two.new.two}"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
module Invoker
2+
def attach(klass, name, target)
3+
klass.module_eval <<-CODE
4+
@@#{name} = target
5+
6+
def #{name}(*args)
7+
@@#{name}.#{name}(*args)
8+
end
9+
CODE
10+
end
11+
end

0 commit comments

Comments
 (0)