Skip to content

Commit 7d4ea47

Browse files
authored
Merge pull request github#10855 from erik-krogh/formatTaint
Ruby: taint-steps for printf calls - and add a `AdditionalTaintStep` class
2 parents 3ebb7cf + 6bc12e8 commit 7d4ea47

File tree

10 files changed

+200
-71
lines changed

10 files changed

+200
-71
lines changed

ruby/ql/lib/codeql/ruby/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ private import codeql.ruby.frameworks.HttpClients
2323
private import codeql.ruby.frameworks.XmlParsing
2424
private import codeql.ruby.frameworks.ActionDispatch
2525
private import codeql.ruby.frameworks.PosixSpawn
26+
private import codeql.ruby.frameworks.StringFormatters
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/**
2+
* Provides classes representing various flow steps for taint tracking.
3+
*/
4+
5+
private import codeql.ruby.DataFlow
6+
private import internal.DataFlowPrivate as DFPrivate
7+
8+
private class Unit = DFPrivate::Unit;
9+
10+
/**
11+
* A module importing the frameworks that implement additional flow steps,
12+
* ensuring that they are visible to the taint tracking library.
13+
*/
14+
private module Frameworks {
15+
import codeql.ruby.frameworks.StringFormatters
16+
}
17+
18+
/**
19+
* A unit class for adding additional taint steps.
20+
*
21+
* Extend this class to add additional taint steps that should apply to all
22+
* taint configurations.
23+
*/
24+
class AdditionalTaintStep extends Unit {
25+
/**
26+
* Holds if the step from `node1` to `node2` should be considered a taint
27+
* step for all configurations.
28+
*/
29+
abstract predicate step(DataFlow::Node node1, DataFlow::Node node2);
30+
}

ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ private CfgNodes::ExprNodes::VariableWriteAccessCfgNode variablesInPattern(
6262

6363
cached
6464
private module Cached {
65+
private import codeql.ruby.dataflow.FlowSteps as FlowSteps
66+
6567
cached
6668
predicate forceCachingInSameStage() { any() }
6769

@@ -93,12 +95,10 @@ private module Cached {
9395
)
9496
)
9597
or
96-
// string interpolation of `nodeFrom` into `nodeTo`
97-
nodeFrom.asExpr() =
98-
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
99-
or
10098
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
10199
or
100+
any(FlowSteps::AdditionalTaintStep s).step(nodeFrom, nodeTo)
101+
or
102102
// Although flow through collections is modeled precisely using stores/reads, we still
103103
// allow flow out of a _tainted_ collection. This is needed in order to support taint-
104104
// tracking configurations where the source is a collection.
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/**
2+
* Provides classes for modeling string formatting libraries.
3+
*/
4+
5+
private import codeql.ruby.AST as Ast
6+
private import codeql.ruby.DataFlow
7+
private import codeql.ruby.ApiGraphs
8+
private import codeql.ruby.frameworks.core.IO
9+
10+
/**
11+
* A call to `printf` or `sprintf`.
12+
*/
13+
abstract class PrintfStyleCall extends DataFlow::CallNode {
14+
// We assume that most printf-like calls have the signature f(format_string, args...)
15+
/**
16+
* Gets the format string of this call.
17+
*/
18+
DataFlow::Node getFormatString() { result = this.getArgument(0) }
19+
20+
/**
21+
* Gets then `n`th formatted argument of this call.
22+
*/
23+
DataFlow::Node getFormatArgument(int n) { n >= 0 and result = this.getArgument(n + 1) }
24+
25+
/** Holds if this call returns the formatted string. */
26+
predicate returnsFormatted() { any() }
27+
}
28+
29+
/**
30+
* A call to `Kernel.printf`.
31+
*/
32+
class KernelPrintfCall extends PrintfStyleCall {
33+
KernelPrintfCall() {
34+
this = API::getTopLevelMember("Kernel").getAMethodCall("printf")
35+
or
36+
this.asExpr().getExpr() instanceof Ast::UnknownMethodCall and
37+
this.getMethodName() = "printf"
38+
}
39+
40+
// Kernel#printf supports two signatures:
41+
// printf(io, string, ...)
42+
// printf(string, ...)
43+
override DataFlow::Node getFormatString() {
44+
// Because `printf` has two different signatures, we can't be sure which
45+
// argument is the format string, so we use a heuristic:
46+
// If the first argument has a string value, then we assume it is the format string.
47+
// Otherwise we treat both the first and second args as the format string.
48+
if this.getArgument(0).getExprNode().getConstantValue().isString(_)
49+
then result = this.getArgument(0)
50+
else result = this.getArgument([0, 1])
51+
}
52+
53+
override predicate returnsFormatted() { none() }
54+
}
55+
56+
/**
57+
* A call to `Kernel.sprintf`.
58+
*/
59+
class KernelSprintfCall extends PrintfStyleCall {
60+
KernelSprintfCall() {
61+
this = API::getTopLevelMember("Kernel").getAMethodCall("sprintf")
62+
or
63+
this.asExpr().getExpr() instanceof Ast::UnknownMethodCall and
64+
this.getMethodName() = "sprintf"
65+
}
66+
67+
override predicate returnsFormatted() { any() }
68+
}
69+
70+
/**
71+
* A call to `IO#printf`.
72+
*/
73+
class IOPrintfCall extends PrintfStyleCall {
74+
IOPrintfCall() {
75+
this.getReceiver() instanceof IO::IOInstance and this.getMethodName() = "printf"
76+
}
77+
78+
override predicate returnsFormatted() { none() }
79+
}
80+
81+
/**
82+
* A call to `String#%`.
83+
*/
84+
class StringPercentCall extends PrintfStyleCall {
85+
StringPercentCall() { this.getMethodName() = "%" }
86+
87+
override DataFlow::Node getFormatString() { result = this.getReceiver() }
88+
89+
override DataFlow::Node getFormatArgument(int n) {
90+
exists(CfgNodes::ExprNodes::ArrayLiteralCfgNode arrLit | arrLit = this.getArgument(0).asExpr() |
91+
result.asExpr() = arrLit.getArgument(n)
92+
)
93+
or
94+
exists(CfgNodes::ExprNodes::HashLiteralCfgNode hashLit |
95+
hashLit = this.getArgument(0).asExpr()
96+
|
97+
n = -2 and // -2 is indicates that the index does not make sense in this context
98+
result.asExpr() = hashLit.getAKeyValuePair().getValue()
99+
)
100+
}
101+
102+
override predicate returnsFormatted() { any() }
103+
}
104+
105+
private import codeql.ruby.dataflow.FlowSteps
106+
private import codeql.ruby.CFG
107+
108+
/**
109+
* A step for string interpolation of `pred` into `succ`.
110+
* E.g.
111+
* ```rb
112+
* succ = "foo #{pred} bar"
113+
* ```
114+
*/
115+
private class StringLiteralFormatStep extends AdditionalTaintStep {
116+
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
117+
pred.asExpr() = succ.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
118+
}
119+
}
120+
121+
/**
122+
* A taint propagating data flow edge arising from string formatting.
123+
*/
124+
private class StringFormattingTaintStep extends AdditionalTaintStep {
125+
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
126+
exists(PrintfStyleCall call |
127+
call.returnsFormatted() and
128+
succ = call
129+
|
130+
pred = call.getFormatString()
131+
or
132+
pred = call.getFormatArgument(_)
133+
)
134+
}
135+
}

ruby/ql/lib/codeql/ruby/security/TaintedFormatStringSpecific.qll

Lines changed: 2 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -2,73 +2,8 @@
22
* Provides Ruby-specific imports and classes needed for `TaintedFormatStringQuery` and `TaintedFormatStringCustomizations`.
33
*/
44

5-
import codeql.ruby.AST
5+
import codeql.ruby.frameworks.StringFormatters
66
import codeql.ruby.DataFlow
77
import codeql.ruby.dataflow.RemoteFlowSources
8-
import codeql.ruby.ApiGraphs
98
import codeql.ruby.TaintTracking
10-
private import codeql.ruby.frameworks.Files
11-
private import codeql.ruby.frameworks.core.IO
12-
private import codeql.ruby.controlflow.CfgNodes
13-
14-
/**
15-
* A call to `printf` or `sprintf`.
16-
*/
17-
abstract class PrintfStyleCall extends DataFlow::CallNode {
18-
// We assume that most printf-like calls have the signature f(format_string, args...)
19-
/**
20-
* Gets the format string of this call.
21-
*/
22-
DataFlow::Node getFormatString() { result = this.getArgument(0) }
23-
24-
/**
25-
* Gets then `n`th formatted argument of this call.
26-
*/
27-
DataFlow::Node getFormatArgument(int n) { n >= 0 and result = this.getArgument(n + 1) }
28-
}
29-
30-
/**
31-
* A call to `Kernel.printf`.
32-
*/
33-
class KernelPrintfCall extends PrintfStyleCall {
34-
KernelPrintfCall() {
35-
this = API::getTopLevelMember("Kernel").getAMethodCall("printf")
36-
or
37-
this.asExpr().getExpr() instanceof UnknownMethodCall and
38-
this.getMethodName() = "printf"
39-
}
40-
41-
// Kernel#printf supports two signatures:
42-
// printf(io, string, ...)
43-
// printf(string, ...)
44-
override DataFlow::Node getFormatString() {
45-
// Because `printf` has two different signatures, we can't be sure which
46-
// argument is the format string, so we use a heuristic:
47-
// If the first argument has a string value, then we assume it is the format string.
48-
// Otherwise we treat both the first and second args as the format string.
49-
if this.getArgument(0).getExprNode().getConstantValue().isString(_)
50-
then result = this.getArgument(0)
51-
else result = this.getArgument([0, 1])
52-
}
53-
}
54-
55-
/**
56-
* A call to `Kernel.sprintf`.
57-
*/
58-
class KernelSprintfCall extends PrintfStyleCall {
59-
KernelSprintfCall() {
60-
this = API::getTopLevelMember("Kernel").getAMethodCall("sprintf")
61-
or
62-
this.asExpr().getExpr() instanceof UnknownMethodCall and
63-
this.getMethodName() = "sprintf"
64-
}
65-
}
66-
67-
/**
68-
* A call to `IO#printf`.
69-
*/
70-
class IOPrintfCall extends PrintfStyleCall {
71-
IOPrintfCall() {
72-
this.getReceiver() instanceof IO::IOInstance and this.getMethodName() = "printf"
73-
}
74-
}
9+
import codeql.ruby.DataFlow

ruby/ql/test/library-tests/dataflow/string-flow/string-flow.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ edges
1212
| string_flow.rb:10:29:10:29 | b : | string_flow.rb:10:10:10:30 | call to try_convert |
1313
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:15:10:15:17 | ... % ... |
1414
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:15:17:15:17 | a : |
15+
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:16:10:16:29 | ... % ... |
1516
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:16:28:16:28 | a : |
1617
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:17:10:17:10 | a : |
1718
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:17:10:17:18 | ... % ... |

ruby/ql/test/query-tests/security/cwe-079/StoredXSS.expected

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ edges
1111
| app/views/foo/stores/show.html.erb:41:64:41:87 | ... + ... : | app/views/foo/bars/_widget.html.erb:5:9:5:20 | call to display_text |
1212
| app/views/foo/stores/show.html.erb:41:64:41:87 | ... + ... : | app/views/foo/bars/_widget.html.erb:8:9:8:36 | ...[...] |
1313
| app/views/foo/stores/show.html.erb:41:76:41:87 | call to display_text : | app/views/foo/stores/show.html.erb:41:64:41:87 | ... + ... : |
14+
| app/views/foo/stores/show.html.erb:87:17:87:28 | call to handle : | app/views/foo/stores/show.html.erb:87:3:87:29 | call to sprintf |
1415
nodes
1516
| app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read : | semmle.label | call to read : |
1617
| app/controllers/foo/stores_controller.rb:9:22:9:23 | dt : | semmle.label | dt : |
@@ -31,6 +32,8 @@ nodes
3132
| app/views/foo/stores/show.html.erb:70:3:70:20 | call to raw_name | semmle.label | call to raw_name |
3233
| app/views/foo/stores/show.html.erb:80:5:80:22 | call to display_name | semmle.label | call to display_name |
3334
| app/views/foo/stores/show.html.erb:83:5:83:24 | @other_user_raw_name | semmle.label | @other_user_raw_name |
35+
| app/views/foo/stores/show.html.erb:87:3:87:29 | call to sprintf | semmle.label | call to sprintf |
36+
| app/views/foo/stores/show.html.erb:87:17:87:28 | call to handle : | semmle.label | call to handle : |
3437
subpaths
3538
#select
3639
| app/views/foo/bars/_widget.html.erb:5:9:5:20 | call to display_text | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read : | app/views/foo/bars/_widget.html.erb:5:9:5:20 | call to display_text | Stored cross-site scripting vulnerability due to $@. | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | stored value |
@@ -46,3 +49,4 @@ subpaths
4649
| app/views/foo/stores/show.html.erb:70:3:70:20 | call to raw_name | app/views/foo/stores/show.html.erb:70:3:70:20 | call to raw_name | app/views/foo/stores/show.html.erb:70:3:70:20 | call to raw_name | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:70:3:70:20 | call to raw_name | stored value |
4750
| app/views/foo/stores/show.html.erb:80:5:80:22 | call to display_name | app/views/foo/stores/show.html.erb:80:5:80:22 | call to display_name | app/views/foo/stores/show.html.erb:80:5:80:22 | call to display_name | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:80:5:80:22 | call to display_name | stored value |
4851
| app/views/foo/stores/show.html.erb:83:5:83:24 | @other_user_raw_name | app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name : | app/views/foo/stores/show.html.erb:83:5:83:24 | @other_user_raw_name | Stored cross-site scripting vulnerability due to $@. | app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name | stored value |
52+
| app/views/foo/stores/show.html.erb:87:3:87:29 | call to sprintf | app/views/foo/stores/show.html.erb:87:17:87:28 | call to handle : | app/views/foo/stores/show.html.erb:87:3:87:29 | call to sprintf | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:87:17:87:28 | call to handle | stored value |

ruby/ql/test/query-tests/security/cwe-079/app/views/foo/stores/show.html.erb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,5 +82,10 @@
8282
<%# BAD: Indirect to a database value without escaping %>
8383
<%= @other_user_raw_name.html_safe %>
8484

85+
<%# BAD: Kernel.sprintf is a taint-step %>
86+
<%=
87+
sprintf("%s", @user.handle).html_safe
88+
%>
89+
8590
<%# GOOD: The `foo.bar.baz` is not recognized as a source %>
8691
<%= @other_user_raw_name.foo.bar.baz.html_safe %>

ruby/ql/test/query-tests/security/cwe-134/TaintedFormatString.expected

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ edges
1212
| tainted_format_string.rb:33:32:33:46 | ...[...] : | tainted_format_string.rb:33:12:33:46 | ... + ... |
1313
| tainted_format_string.rb:36:30:36:35 | call to params : | tainted_format_string.rb:36:30:36:44 | ...[...] : |
1414
| tainted_format_string.rb:36:30:36:44 | ...[...] : | tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" |
15+
| tainted_format_string.rb:39:22:39:27 | call to params : | tainted_format_string.rb:39:22:39:36 | ...[...] : |
16+
| tainted_format_string.rb:39:22:39:36 | ...[...] : | tainted_format_string.rb:39:5:39:45 | "A log message #{...} %{foo}" |
17+
| tainted_format_string.rb:42:22:42:27 | call to params : | tainted_format_string.rb:42:22:42:36 | ...[...] : |
18+
| tainted_format_string.rb:42:22:42:36 | ...[...] : | tainted_format_string.rb:42:5:42:43 | "A log message #{...} %08x" |
1519
nodes
1620
| tainted_format_string.rb:4:12:4:17 | call to params : | semmle.label | call to params : |
1721
| tainted_format_string.rb:4:12:4:26 | ...[...] | semmle.label | ...[...] |
@@ -37,6 +41,12 @@ nodes
3741
| tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | semmle.label | "A log message: #{...}" |
3842
| tainted_format_string.rb:36:30:36:35 | call to params : | semmle.label | call to params : |
3943
| tainted_format_string.rb:36:30:36:44 | ...[...] : | semmle.label | ...[...] : |
44+
| tainted_format_string.rb:39:5:39:45 | "A log message #{...} %{foo}" | semmle.label | "A log message #{...} %{foo}" |
45+
| tainted_format_string.rb:39:22:39:27 | call to params : | semmle.label | call to params : |
46+
| tainted_format_string.rb:39:22:39:36 | ...[...] : | semmle.label | ...[...] : |
47+
| tainted_format_string.rb:42:5:42:43 | "A log message #{...} %08x" | semmle.label | "A log message #{...} %08x" |
48+
| tainted_format_string.rb:42:22:42:27 | call to params : | semmle.label | call to params : |
49+
| tainted_format_string.rb:42:22:42:36 | ...[...] : | semmle.label | ...[...] : |
4050
subpaths
4151
#select
4252
| tainted_format_string.rb:4:12:4:26 | ...[...] | tainted_format_string.rb:4:12:4:17 | call to params : | tainted_format_string.rb:4:12:4:26 | ...[...] | Format string depends on a $@. | tainted_format_string.rb:4:12:4:17 | call to params | user-provided value |
@@ -50,3 +60,5 @@ subpaths
5060
| tainted_format_string.rb:28:19:28:33 | ...[...] | tainted_format_string.rb:28:19:28:24 | call to params : | tainted_format_string.rb:28:19:28:33 | ...[...] | Format string depends on a $@. | tainted_format_string.rb:28:19:28:24 | call to params | user-provided value |
5161
| tainted_format_string.rb:33:12:33:46 | ... + ... | tainted_format_string.rb:33:32:33:37 | call to params : | tainted_format_string.rb:33:12:33:46 | ... + ... | Format string depends on a $@. | tainted_format_string.rb:33:32:33:37 | call to params | user-provided value |
5262
| tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | tainted_format_string.rb:36:30:36:35 | call to params : | tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | Format string depends on a $@. | tainted_format_string.rb:36:30:36:35 | call to params | user-provided value |
63+
| tainted_format_string.rb:39:5:39:45 | "A log message #{...} %{foo}" | tainted_format_string.rb:39:22:39:27 | call to params : | tainted_format_string.rb:39:5:39:45 | "A log message #{...} %{foo}" | Format string depends on a $@. | tainted_format_string.rb:39:22:39:27 | call to params | user-provided value |
64+
| tainted_format_string.rb:42:5:42:43 | "A log message #{...} %08x" | tainted_format_string.rb:42:22:42:27 | call to params : | tainted_format_string.rb:42:5:42:43 | "A log message #{...} %08x" | Format string depends on a $@. | tainted_format_string.rb:42:22:42:27 | call to params | user-provided value |

ruby/ql/test/query-tests/security/cwe-134/tainted_format_string.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,11 @@ def show
3434

3535
# Taint via string interpolation
3636
printf("A log message: #{params[:format]}", arg) # BAD
37+
38+
# Using String#
39+
"A log message #{params[:format]} %{foo}" % {foo: "foo"} # BAD
40+
41+
# String# with an array
42+
"A log message #{params[:format]} %08x" % ["foo"] # BAD
3743
end
3844
end

0 commit comments

Comments
 (0)