Skip to content

Commit cb1cd5e

Browse files
authored
Merge pull request github#14560 from alexrford/rb/modgen
Ruby: add a query and script for autogenerating typeModel and summaryModel data extensions entries
2 parents 6062fbb + f6ac63b commit cb1cd5e

File tree

6 files changed

+436
-1
lines changed

6 files changed

+436
-1
lines changed

ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ module ApiGraphShared<ApiGraphSharedSig S> {
185185
bindingset[sink]
186186
pragma[inline_late]
187187
Node getAValueReachingSinkInline(ApiNode sink) {
188-
result = asSinkInline(getAnEpsilonSuccessorInline(sink))
188+
backwardStartNode(result) = getAnEpsilonSuccessorInline(sink)
189189
}
190190

191191
/**
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
private import internal.Types
2+
private import internal.Summaries
3+
4+
/**
5+
* Holds if `(type2, path)` should be seen as an instance of `type1`.
6+
*/
7+
query predicate typeModel = Types::typeModel/3;
8+
9+
/**
10+
* Holds if the value at `(type, path)` should be seen as a flow
11+
* source of the given `kind`.
12+
*
13+
* The kind `remote` represents a general remote flow source.
14+
*/
15+
query predicate sourceModel(string type, string path, string kind) { none() }
16+
17+
/**
18+
* Holds if the value at `(type, path)` should be seen as a sink
19+
* of the given `kind`.
20+
*/
21+
query predicate sinkModel(string type, string path, string kind) { none() }
22+
23+
/**
24+
* Holds if calls to `(type, path)`, the value referred to by `input`
25+
* can flow to the value referred to by `output`.
26+
*
27+
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
28+
* respectively.
29+
*/
30+
query predicate summaryModel = Summaries::summaryModel/5;
31+
32+
/**
33+
* Holds if `path` can be substituted for a token `TypeVar[name]`.
34+
*/
35+
query predicate typeVariableModel(string name, string path) { none() }
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/**
2+
* Contains predicates for generating `summaryModel`s to summarize flow through methods.
3+
*/
4+
5+
private import ruby
6+
private import codeql.ruby.ApiGraphs
7+
private import codeql.ruby.TaintTracking
8+
private import Util as Util
9+
10+
/**
11+
* Contains predicates for generating `summaryModel`s to summarize flow through methods.
12+
*/
13+
module Summaries {
14+
private module Config implements DataFlow::ConfigSig {
15+
predicate isSource(DataFlow::Node source) {
16+
exists(DataFlow::MethodNode methodNode | methodNode.isPublic() |
17+
Util::getAnyParameter(methodNode) = source
18+
)
19+
}
20+
21+
predicate isSink(DataFlow::Node sink) { sink = any(DataFlow::MethodNode m).getAReturnNode() }
22+
23+
DataFlow::FlowFeature getAFeature() {
24+
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
25+
}
26+
}
27+
28+
private module ValueFlow {
29+
import DataFlow::Global<Config>
30+
31+
predicate summaryModel(string type, string path, string input, string output) {
32+
exists(DataFlow::MethodNode methodNode, DataFlow::ParameterNode paramNode |
33+
methodNode.getLocation().getFile() instanceof Util::RelevantFile and
34+
flow(paramNode, methodNode.getAReturnNode())
35+
|
36+
Util::pathToMethod(methodNode, type, path) and
37+
input = Util::getArgumentPath(paramNode) and
38+
output = "ReturnValue"
39+
)
40+
}
41+
}
42+
43+
private module TaintFlow {
44+
import TaintTracking::Global<Config>
45+
46+
predicate summaryModel(string type, string path, string input, string output) {
47+
not ValueFlow::summaryModel(type, path, input, output) and
48+
exists(DataFlow::MethodNode methodNode, DataFlow::ParameterNode paramNode |
49+
methodNode.getLocation().getFile() instanceof Util::RelevantFile and
50+
flow(paramNode, methodNode.getAReturnNode())
51+
|
52+
Util::pathToMethod(methodNode, type, path) and
53+
input = Util::getArgumentPath(paramNode) and
54+
output = "ReturnValue"
55+
)
56+
}
57+
}
58+
59+
/**
60+
* Holds if in calls to `(type, path)`, the value referred to by `input`
61+
* can flow to the value referred to by `output`.
62+
*
63+
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
64+
* respectively.
65+
*/
66+
predicate summaryModel(string type, string path, string input, string output, string kind) {
67+
ValueFlow::summaryModel(type, path, input, output) and kind = "value"
68+
or
69+
TaintFlow::summaryModel(type, path, input, output) and kind = "taint"
70+
}
71+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/**
2+
* Contains predicates for generating `typeModel`s that contain typing
3+
* information for API nodes.
4+
*/
5+
6+
private import ruby
7+
private import codeql.ruby.ApiGraphs
8+
private import Util as Util
9+
10+
/**
11+
* Contains predicates for generating `typeModel`s that contain typing
12+
* information for API nodes.
13+
*/
14+
module Types {
15+
/**
16+
* Holds if `node` should be seen as having the given `type`.
17+
*/
18+
private predicate valueHasTypeName(DataFlow::LocalSourceNode node, string type) {
19+
node.getLocation().getFile() instanceof Util::RelevantFile and
20+
exists(DataFlow::ModuleNode mod |
21+
(
22+
node = mod.getAnImmediateReference().getAMethodCall("new")
23+
or
24+
node = mod.getAnOwnInstanceSelf()
25+
) and
26+
type = mod.getQualifiedName()
27+
or
28+
(
29+
node = mod.getAnImmediateReference()
30+
or
31+
node = mod.getAnOwnModuleSelf()
32+
) and
33+
type = mod.getQualifiedName() + "!"
34+
)
35+
}
36+
37+
/**
38+
* Holds if `(type2, path)` should be seen as an instance of `type1`.
39+
*/
40+
predicate typeModel(string type1, string type2, string path) {
41+
exists(API::Node node |
42+
valueHasTypeName(node.getAValueReachingSink(), type1) and
43+
Util::pathToNode(node, type2, path, true)
44+
)
45+
}
46+
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/**
2+
* Contains utility methods and classes to assist with generating data extensions models.
3+
*/
4+
5+
private import ruby
6+
private import codeql.ruby.ApiGraphs
7+
8+
/**
9+
* A file that is relevant in the context of library modeling.
10+
*
11+
* In practice, this means a file that is not part of test code.
12+
*/
13+
class RelevantFile extends File {
14+
RelevantFile() { not this.getRelativePath().regexpMatch(".*/?test(case)?s?/.*") }
15+
}
16+
17+
/**
18+
* Gets an access path of an argument corresponding to the given `paramNode`.
19+
*/
20+
string getArgumentPath(DataFlow::ParameterNode paramNode) {
21+
paramNode.getLocation().getFile() instanceof RelevantFile and
22+
exists(string paramSpecifier |
23+
exists(Ast::Parameter param |
24+
param = paramNode.asParameter() and
25+
(
26+
paramSpecifier = param.getPosition().toString()
27+
or
28+
paramSpecifier = param.(Ast::KeywordParameter).getName() + ":"
29+
or
30+
param instanceof Ast::BlockParameter and
31+
paramSpecifier = "block"
32+
)
33+
)
34+
or
35+
paramNode instanceof DataFlow::SelfParameterNode and paramSpecifier = "self"
36+
|
37+
result = "Argument[" + paramSpecifier + "]"
38+
)
39+
}
40+
41+
/**
42+
* Holds if `(type,path)` evaluates to the given method, when evalauted from a client of the current library.
43+
*/
44+
predicate pathToMethod(DataFlow::MethodNode method, string type, string path) {
45+
method.getLocation().getFile() instanceof RelevantFile and
46+
exists(DataFlow::ModuleNode mod, string methodName |
47+
method = mod.getOwnInstanceMethod(methodName) and
48+
if methodName = "initialize"
49+
then (
50+
type = mod.getQualifiedName() + "!" and
51+
path = "Method[new]"
52+
) else (
53+
type = mod.getQualifiedName() and
54+
path = "Method[" + methodName + "]"
55+
)
56+
or
57+
method = mod.getOwnSingletonMethod(methodName) and
58+
type = mod.getQualifiedName() + "!" and
59+
path = "Method[" + methodName + "]"
60+
)
61+
}
62+
63+
/**
64+
* Gets any parameter to `methodNode`. This may be a positional, keyword,
65+
* block, or self parameter.
66+
*/
67+
DataFlow::ParameterNode getAnyParameter(DataFlow::MethodNode methodNode) {
68+
result =
69+
[
70+
methodNode.getParameter(_), methodNode.getKeywordParameter(_), methodNode.getBlockParameter(),
71+
methodNode.getSelfParameter()
72+
]
73+
}
74+
75+
private predicate pathToNodeBase(API::Node node, string type, string path, boolean isOutput) {
76+
exists(DataFlow::MethodNode method, string prevPath | pathToMethod(method, type, prevPath) |
77+
isOutput = true and
78+
node = method.getAReturnNode().backtrack() and
79+
path = prevPath + ".ReturnValue" and
80+
not method.getMethodName() = "initialize" // ignore return value of initialize method
81+
or
82+
isOutput = false and
83+
exists(DataFlow::ParameterNode paramNode |
84+
paramNode = getAnyParameter(method) and
85+
node = paramNode.track()
86+
|
87+
path = prevPath + "." + getArgumentPath(paramNode)
88+
)
89+
)
90+
}
91+
92+
private predicate pathToNodeRec(
93+
API::Node node, string type, string path, boolean isOutput, int pathLength
94+
) {
95+
pathLength < 8 and
96+
(
97+
pathToNodeBase(node, type, path, isOutput) and
98+
pathLength = 1
99+
or
100+
exists(API::Node prevNode, string prevPath, boolean prevIsOutput, int prevPathLength |
101+
pathToNodeRec(prevNode, type, prevPath, prevIsOutput, prevPathLength) and
102+
pathLength = prevPathLength + 1
103+
|
104+
node = prevNode.getAnElement() and
105+
path = prevPath + ".Element" and
106+
isOutput = prevIsOutput
107+
or
108+
node = prevNode.getReturn() and
109+
path = prevPath + ".ReturnValue" and
110+
isOutput = prevIsOutput
111+
or
112+
prevIsOutput = false and
113+
isOutput = true and
114+
(
115+
exists(int n |
116+
node = prevNode.getParameter(n) and
117+
path = prevPath + ".Parameter[" + n + "]"
118+
)
119+
or
120+
exists(string name |
121+
node = prevNode.getKeywordParameter(name) and
122+
path = prevPath + ".Parameter[" + name + ":]"
123+
)
124+
or
125+
node = prevNode.getBlock() and
126+
path = prevPath + ".Parameter[block]"
127+
)
128+
)
129+
)
130+
}
131+
132+
/**
133+
* Holds if `(type,path)` evaluates to a value corresponding to `node`, when evaluated from a client of the current library.
134+
*/
135+
predicate pathToNode(API::Node node, string type, string path, boolean isOutput) {
136+
pathToNodeRec(node, type, path, isOutput, _)
137+
}

0 commit comments

Comments
 (0)