Skip to content

Commit 6365e5e

Browse files
committed
Java: Initial implementation of content based model generation.
1 parent 4bdf21b commit 6365e5e

File tree

4 files changed

+183
-9
lines changed

4 files changed

+183
-9
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/**
2+
* @name Capture field based summary models.
3+
* @description Finds applicable field based summary models to be used by other queries.
4+
* @kind diagnostic
5+
* @id java/utils/modelgenerator/fieldbased-summary-models
6+
* @tags modelgenerator
7+
*/
8+
9+
import internal.CaptureModels
10+
11+
from DataFlowSummaryTargetApi api, string flow
12+
where flow = captureContentFlow(api)
13+
select flow order by flow

java/ql/src/utils/modelgenerator/internal/CaptureModels.qll

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,35 @@ private class ReturnNodeExt extends DataFlow::Node {
1818
kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind()
1919
}
2020

21-
string getOutput() {
22-
kind instanceof DataFlowImplCommon::ValueReturnKind and
21+
/**
22+
* Gets the kind of the return node.
23+
*/
24+
DataFlowImplCommon::ReturnKindExt getKind() { result = kind }
25+
}
26+
27+
bindingset[c]
28+
private signature string printCallableParamSig(Callable c, ParameterPosition p);
29+
30+
private module PrintReturnNodeExt<printCallableParamSig/2 printCallableParam> {
31+
string getOutput(ReturnNodeExt node) {
32+
node.getKind() instanceof DataFlowImplCommon::ValueReturnKind and
2333
result = "ReturnValue"
2434
or
2535
exists(ParameterPosition pos |
26-
pos = kind.(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
27-
result = paramReturnNodeAsOutput(returnNodeEnclosingCallable(this), pos)
36+
pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
37+
result = printCallableParam(returnNodeEnclosingCallable(node), pos)
2838
)
2939
}
3040
}
3141

42+
string getOutput(ReturnNodeExt node) {
43+
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
44+
}
45+
46+
string getContentOutput(ReturnNodeExt node) {
47+
result = PrintReturnNodeExt<paramReturnNodeAsContentOutput/2>::getOutput(node)
48+
}
49+
3250
class DataFlowSummaryTargetApi extends SummaryTargetApi {
3351
DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) }
3452
}
@@ -71,7 +89,8 @@ private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2
7189
* Holds if content `c` is either a field, a synthetic field or language specific
7290
* content of a relevant type or a container like content.
7391
*/
74-
private predicate isRelevantContent(DataFlow::ContentSet c) {
92+
pragma[nomagic]
93+
private predicate isRelevantContent0(DataFlow::ContentSet c) {
7594
isRelevantTypeInContent(c) or
7695
containerContent(c)
7796
}
@@ -85,6 +104,16 @@ string parameterNodeAsInput(DataFlow::ParameterNode p) {
85104
result = qualifierString() and p instanceof InstanceParameterNode
86105
}
87106

107+
/**
108+
* Gets the MaD string representation of the parameter `p`
109+
* when used in content flow.
110+
*/
111+
string parameterNodeAsContentInput(DataFlow::ParameterNode p) {
112+
result = parameterContentAccess(p.asParameter())
113+
or
114+
result = qualifierString() and p instanceof InstanceParameterNode
115+
}
116+
88117
/**
89118
* Gets the MaD input string representation of `source`.
90119
*/
@@ -170,7 +199,7 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
170199
) {
171200
exists(DataFlow::ContentSet c |
172201
DataFlowImplCommon::store(node1, c.getAStoreContent(), node2, _, _) and
173-
isRelevantContent(c) and
202+
isRelevantContent0(c) and
174203
(
175204
state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1
176205
or
@@ -180,7 +209,7 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
180209
or
181210
exists(DataFlow::ContentSet c |
182211
DataFlowPrivate::readStep(node1, c, node2) and
183-
isRelevantContent(c) and
212+
isRelevantContent0(c) and
184213
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
185214
)
186215
}
@@ -196,14 +225,17 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
196225

197226
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
198227

228+
/**
229+
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
230+
*/
199231
string captureThroughFlow0(
200232
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
201233
) {
202234
exists(string input, string output |
203235
p.getEnclosingCallable() = api and
204236
returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and
205237
input = parameterNodeAsInput(p) and
206-
output = returnNodeExt.getOutput() and
238+
output = getOutput(returnNodeExt) and
207239
input != output and
208240
result = Printing::asTaintModel(api, input, output)
209241
)
@@ -219,6 +251,69 @@ string captureThroughFlow(DataFlowSummaryTargetApi api) {
219251
)
220252
}
221253

254+
private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig {
255+
predicate isSource(DataFlow::Node source) {
256+
source instanceof DataFlow::ParameterNode and
257+
source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
258+
}
259+
260+
predicate isSink(DataFlow::Node sink) {
261+
sink instanceof ReturnNodeExt and
262+
sink.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
263+
}
264+
265+
predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2;
266+
267+
predicate isBarrier(DataFlow::Node n) {
268+
exists(Type t | t = n.getType() and not isRelevantType(t))
269+
}
270+
271+
int accessPathLimit() { result = 2 }
272+
273+
predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) }
274+
275+
DataFlow::FlowFeature getAFeature() {
276+
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
277+
}
278+
}
279+
280+
private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;
281+
282+
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
283+
not exists(ap.getHead()) and result = ""
284+
or
285+
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
286+
head = ap.getHead() and
287+
tail = ap.getTail() and
288+
result = "." + printContent(head) + printStoreAccessPath(tail)
289+
)
290+
}
291+
292+
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
293+
not exists(ap.getHead()) and result = ""
294+
or
295+
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
296+
head = ap.getHead() and
297+
tail = ap.getTail() and
298+
result = printReadAccessPath(tail) + "." + printContent(head)
299+
)
300+
}
301+
302+
string captureContentFlow(DataFlowSummaryTargetApi api) {
303+
exists(
304+
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output,
305+
PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores,
306+
boolean preservesValue
307+
|
308+
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
309+
returnNodeExt.getEnclosingCallable() = api and
310+
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
311+
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
312+
input != output and
313+
result = Printing::asModel(api, input, output, preservesValue)
314+
)
315+
}
316+
222317
/**
223318
* A dataflow configuration used for finding new sources.
224319
* The sources are the already known existing sources and the sinks are the API return nodes.
@@ -261,7 +356,7 @@ string captureSource(DataFlowSourceTargetApi api) {
261356
ExternalFlow::sourceNode(source, kind) and
262357
api = sink.getEnclosingCallable() and
263358
not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and
264-
result = Printing::asSourceModel(api, sink.getOutput(), kind)
359+
result = Printing::asSourceModel(api, getOutput(sink), kind)
265360
)
266361
}
267362

java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44

55
private import java as J
66
private import semmle.code.java.dataflow.internal.DataFlowPrivate
7+
private import semmle.code.java.dataflow.internal.DataFlowUtil as DataFlowUtil
78
private import semmle.code.java.dataflow.internal.ContainerFlow as ContainerFlow
89
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
910
private import semmle.code.java.dataflow.internal.ModelExclusions
1011
private import semmle.code.java.dataflow.DataFlow as Df
12+
private import semmle.code.java.dataflow.internal.ContentDataFlow as Cdf
1113
private import semmle.code.java.dataflow.SSA as Ssa
1214
private import semmle.code.java.dataflow.TaintTracking as Tt
1315
import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
@@ -17,6 +19,8 @@ import semmle.code.java.dataflow.internal.DataFlowDispatch as DataFlowDispatch
1719

1820
module DataFlow = Df::DataFlow;
1921

22+
module ContentDataFlow = Cdf::ContentDataFlow;
23+
2024
module TaintTracking = Tt::TaintTracking;
2125

2226
class Type = J::Type;
@@ -25,6 +29,8 @@ class Unit = J::Unit;
2529

2630
class Callable = J::Callable;
2731

32+
class ContentSet = DataFlowUtil::ContentSet;
33+
2834
private predicate isInfrequentlyUsed(J::CompilationUnit cu) {
2935
cu.getPackage().getName().matches("javax.swing%") or
3036
cu.getPackage().getName().matches("java.awt%")
@@ -217,6 +223,12 @@ string parameterAccess(J::Parameter p) {
217223
else result = "Argument[" + p.getPosition() + "]"
218224
}
219225

226+
/**
227+
* Gets the MaD string representation of the parameter `p`
228+
* when used in content flow.
229+
*/
230+
string parameterContentAccess(J::Parameter p) { result = "Argument[" + p.getPosition() + "]" }
231+
220232
class InstanceParameterNode = DataFlow::InstanceParameterNode;
221233

222234
class ParameterPosition = DataFlowDispatch::ParameterPosition;
@@ -232,6 +244,17 @@ string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) {
232244
result = qualifierString() and pos = -1
233245
}
234246

247+
/**
248+
* Gets the MaD string representation of return through parameter at position
249+
* `pos` of callable `c` for content flow.
250+
*/
251+
bindingset[c]
252+
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
253+
result = parameterContentAccess(c.getParameter(pos))
254+
or
255+
result = qualifierString() and pos = -1
256+
}
257+
235258
/**
236259
* Gets the enclosing callable of `ret`.
237260
*/
@@ -305,3 +328,34 @@ bindingset[kind]
305328
predicate isRelevantSourceKind(string kind) { any() }
306329

307330
predicate containerContent = DataFlowPrivate::containerContent/1;
331+
332+
/**
333+
* Holds if there is a taint step from `node1` to `node2` in content flow.
334+
*/
335+
predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
336+
TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and
337+
not exists(DataFlow::Content f |
338+
DataFlowPrivate::readStep(node1, f, node2) and containerContent(f)
339+
)
340+
}
341+
342+
/**
343+
* Gets the MaD string representation of the contentset `c`.
344+
*/
345+
string printContent(ContentSet c) {
346+
exists(Field f, string name |
347+
f = c.(DataFlowUtil::FieldContent).getField() and name = f.getQualifiedName()
348+
|
349+
if f.isPublic() then result = "Field[" + name + "]" else result = "SyntheticField[" + name + "]"
350+
)
351+
or
352+
result = "SyntheticField[" + c.(DataFlowUtil::SyntheticFieldContent).getField() + "]"
353+
or
354+
c instanceof DataFlowUtil::CollectionContent and result = "Element"
355+
or
356+
c instanceof DataFlowUtil::ArrayContent and result = "ArrayElement"
357+
or
358+
c instanceof DataFlowUtil::MapValueContent and result = "MapValue"
359+
or
360+
c instanceof DataFlowUtil::MapKeyContent and result = "MapKey"
361+
}

shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,18 @@ module ModelPrintingImpl<ModelPrintingLangSig Lang> {
9797
result = asSummaryModel(api, input, output, "taint")
9898
}
9999

100+
/**
101+
* Gets the summary model for `api` with `input` and `output`.
102+
*/
103+
bindingset[input, output, preservesValue]
104+
string asModel(Printing::SummaryApi api, string input, string output, boolean preservesValue) {
105+
preservesValue = true and
106+
result = asValueModel(api, input, output)
107+
or
108+
preservesValue = false and
109+
result = asTaintModel(api, input, output)
110+
}
111+
100112
/**
101113
* Gets the sink model for `api` with `input` and `kind`.
102114
*/

0 commit comments

Comments
 (0)