Skip to content

Commit a5b4622

Browse files
authored
Merge pull request github#17330 from michaelnebel/java/modelgenfieldbased
Java/C#: Field based model generator (Experimental).
2 parents b73b868 + e1048cf commit a5b4622

27 files changed

+575
-33
lines changed

csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,35 @@ private class ReturnNodeExt extends DataFlow::Node {
1818
kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind()
1919
}
2020

21-
string getOutput() {
22-
kind instanceof DataFlowImplCommon::ValueReturnKind and
21+
/**
22+
* Gets the kind of the return node.
23+
*/
24+
DataFlowImplCommon::ReturnKindExt getKind() { result = kind }
25+
}
26+
27+
bindingset[c]
28+
private signature string printCallableParamSig(Callable c, ParameterPosition p);
29+
30+
private module PrintReturnNodeExt<printCallableParamSig/2 printCallableParam> {
31+
string getOutput(ReturnNodeExt node) {
32+
node.getKind() instanceof DataFlowImplCommon::ValueReturnKind and
2333
result = "ReturnValue"
2434
or
2535
exists(ParameterPosition pos |
26-
pos = kind.(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
27-
result = paramReturnNodeAsOutput(returnNodeEnclosingCallable(this), pos)
36+
pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
37+
result = printCallableParam(returnNodeEnclosingCallable(node), pos)
2838
)
2939
}
3040
}
3141

42+
string getOutput(ReturnNodeExt node) {
43+
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
44+
}
45+
46+
string getContentOutput(ReturnNodeExt node) {
47+
result = PrintReturnNodeExt<paramReturnNodeAsContentOutput/2>::getOutput(node)
48+
}
49+
3250
class DataFlowSummaryTargetApi extends SummaryTargetApi {
3351
DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) }
3452
}
@@ -71,7 +89,8 @@ private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2
7189
* Holds if content `c` is either a field, a synthetic field or language specific
7290
* content of a relevant type or a container like content.
7391
*/
74-
private predicate isRelevantContent(DataFlow::ContentSet c) {
92+
pragma[nomagic]
93+
private predicate isRelevantContent0(DataFlow::ContentSet c) {
7594
isRelevantTypeInContent(c) or
7695
containerContent(c)
7796
}
@@ -85,6 +104,16 @@ string parameterNodeAsInput(DataFlow::ParameterNode p) {
85104
result = qualifierString() and p instanceof InstanceParameterNode
86105
}
87106

107+
/**
108+
* Gets the MaD string representation of the parameter `p`
109+
* when used in content flow.
110+
*/
111+
string parameterNodeAsContentInput(DataFlow::ParameterNode p) {
112+
result = parameterContentAccess(p.asParameter())
113+
or
114+
result = qualifierString() and p instanceof InstanceParameterNode
115+
}
116+
88117
/**
89118
* Gets the MaD input string representation of `source`.
90119
*/
@@ -170,7 +199,7 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
170199
) {
171200
exists(DataFlow::ContentSet c |
172201
DataFlowImplCommon::store(node1, c.getAStoreContent(), node2, _, _) and
173-
isRelevantContent(c) and
202+
isRelevantContent0(c) and
174203
(
175204
state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1
176205
or
@@ -180,7 +209,7 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
180209
or
181210
exists(DataFlow::ContentSet c |
182211
DataFlowPrivate::readStep(node1, c, node2) and
183-
isRelevantContent(c) and
212+
isRelevantContent0(c) and
184213
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
185214
)
186215
}
@@ -196,14 +225,17 @@ module PropagateFlowConfig implements DataFlow::StateConfigSig {
196225

197226
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
198227

228+
/**
229+
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
230+
*/
199231
string captureThroughFlow0(
200232
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
201233
) {
202234
exists(string input, string output |
203235
p.getEnclosingCallable() = api and
204236
returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and
205237
input = parameterNodeAsInput(p) and
206-
output = returnNodeExt.getOutput() and
238+
output = getOutput(returnNodeExt) and
207239
input != output and
208240
result = Printing::asTaintModel(api, input, output)
209241
)
@@ -219,6 +251,69 @@ string captureThroughFlow(DataFlowSummaryTargetApi api) {
219251
)
220252
}
221253

254+
private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig {
255+
predicate isSource(DataFlow::Node source) {
256+
source instanceof DataFlow::ParameterNode and
257+
source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
258+
}
259+
260+
predicate isSink(DataFlow::Node sink) {
261+
sink instanceof ReturnNodeExt and
262+
sink.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
263+
}
264+
265+
predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2;
266+
267+
predicate isBarrier(DataFlow::Node n) {
268+
exists(Type t | t = n.getType() and not isRelevantType(t))
269+
}
270+
271+
int accessPathLimit() { result = 2 }
272+
273+
predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) }
274+
275+
DataFlow::FlowFeature getAFeature() {
276+
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
277+
}
278+
}
279+
280+
private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;
281+
282+
private string getContent(PropagateContentFlow::AccessPath ap, int i) {
283+
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
284+
head = ap.getHead() and
285+
tail = ap.getTail()
286+
|
287+
i = 0 and
288+
result = "." + printContent(head)
289+
or
290+
i > 0 and result = getContent(tail, i - 1)
291+
)
292+
}
293+
294+
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
295+
result = concat(int i | | getContent(ap, i), "" order by i)
296+
}
297+
298+
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
299+
result = concat(int i | | getContent(ap, i), "" order by i desc)
300+
}
301+
302+
string captureContentFlow(DataFlowSummaryTargetApi api) {
303+
exists(
304+
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output,
305+
PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores,
306+
boolean preservesValue
307+
|
308+
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
309+
returnNodeExt.getEnclosingCallable() = api and
310+
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
311+
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
312+
input != output and
313+
result = Printing::asModel(api, input, output, preservesValue)
314+
)
315+
}
316+
222317
/**
223318
* A dataflow configuration used for finding new sources.
224319
* The sources are the already known existing sources and the sinks are the API return nodes.
@@ -261,7 +356,7 @@ string captureSource(DataFlowSourceTargetApi api) {
261356
ExternalFlow::sourceNode(source, kind) and
262357
api = sink.getEnclosingCallable() and
263358
not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and
264-
result = Printing::asSourceModel(api, sink.getOutput(), kind)
359+
result = Printing::asSourceModel(api, getOutput(sink), kind)
265360
)
266361
}
267362

csharp/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55
private import csharp as CS
66
private import semmle.code.csharp.commons.Util as Util
77
private import semmle.code.csharp.commons.Collections as Collections
8+
private import semmle.code.csharp.commons.QualifiedName as QualifiedName
89
private import semmle.code.csharp.dataflow.internal.DataFlowDispatch
910
private import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
1011
private import semmle.code.csharp.frameworks.system.linq.Expressions
1112
private import semmle.code.csharp.frameworks.System
13+
private import semmle.code.csharp.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
1214
import semmle.code.csharp.dataflow.internal.ExternalFlow as ExternalFlow
15+
import semmle.code.csharp.dataflow.internal.ContentDataFlow as ContentDataFlow
1316
import semmle.code.csharp.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
1417
import semmle.code.csharp.dataflow.internal.DataFlowPrivate as DataFlowPrivate
1518
import semmle.code.csharp.dataflow.internal.DataFlowDispatch as DataFlowDispatch
@@ -22,6 +25,8 @@ class Type = CS::Type;
2225

2326
class Callable = CS::Callable;
2427

28+
class ContentSet = DataFlow::ContentSet;
29+
2530
/**
2631
* Holds if any of the parameters of `api` are `System.Func<>`.
2732
*/
@@ -241,20 +246,40 @@ string parameterAccess(CS::Parameter p) {
241246
else result = "Argument[" + p.getPosition() + "]"
242247
}
243248

249+
/**
250+
* Gets the MaD string representation of the parameter `p`
251+
* when used in content flow.
252+
*/
253+
string parameterContentAccess(CS::Parameter p) { result = "Argument[" + p.getPosition() + "]" }
254+
244255
class InstanceParameterNode = DataFlowPrivate::InstanceParameterNode;
245256

246257
class ParameterPosition = DataFlowDispatch::ParameterPosition;
247258

259+
private signature string parameterAccessSig(Parameter p);
260+
261+
module ParamReturnNodeAsOutput<parameterAccessSig/1 getParamAccess> {
262+
bindingset[c]
263+
string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) {
264+
result = getParamAccess(c.getParameter(pos.getPosition()))
265+
or
266+
pos.isThisParameter() and
267+
result = qualifierString()
268+
}
269+
}
270+
248271
/**
249272
* Gets the MaD string representation of return through parameter at position
250273
* `pos` of callable `c`.
251274
*/
252275
bindingset[c]
253276
string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) {
254-
result = parameterAccess(c.getParameter(pos.getPosition()))
255-
or
256-
pos.isThisParameter() and
257-
result = qualifierString()
277+
result = ParamReturnNodeAsOutput<parameterAccess/1>::paramReturnNodeAsOutput(c, pos)
278+
}
279+
280+
bindingset[c]
281+
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
282+
result = ParamReturnNodeAsOutput<parameterContentAccess/1>::paramReturnNodeAsOutput(c, pos)
258283
}
259284

260285
/**
@@ -344,3 +369,44 @@ predicate isRelevantSourceKind(string kind) { any() }
344369
* Holds if the the content `c` is a container.
345370
*/
346371
predicate containerContent(DataFlow::ContentSet c) { c.isElement() }
372+
373+
/**
374+
* Holds if there is a taint step from `node1` to `node2` in content flow.
375+
*/
376+
predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
377+
TaintTrackingPrivate::defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and
378+
not nodeTo.asExpr() instanceof CS::ElementAccess and
379+
not exists(DataFlow::ContentSet c |
380+
DataFlowPrivate::readStep(nodeFrom, c, nodeTo) and containerContent(c)
381+
)
382+
}
383+
384+
bindingset[d]
385+
private string getFullyQualifiedName(Declaration d) {
386+
exists(string qualifier, string name |
387+
d.hasFullyQualifiedName(qualifier, name) and
388+
result = QualifiedName::getQualifiedName(qualifier, name)
389+
)
390+
}
391+
392+
/**
393+
* Gets the MaD string representation of the contentset `c`.
394+
*/
395+
string printContent(DataFlow::ContentSet c) {
396+
exists(CS::Field f, string name | name = getFullyQualifiedName(f) |
397+
c.isField(f) and
398+
if f.isEffectivelyPublic()
399+
then result = "Field[" + name + "]"
400+
else result = "SyntheticField[" + name + "]"
401+
)
402+
or
403+
exists(CS::Property p, string name | name = getFullyQualifiedName(p) |
404+
c.isProperty(p) and
405+
if p.isEffectivelyPublic()
406+
then result = "Property[" + name + "]"
407+
else result = "SyntheticField[" + name + "]"
408+
)
409+
or
410+
c.isElement() and
411+
result = "Element"
412+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
unexpectedModel
2+
expectedModel
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
extensions:
2+
- addsTo:
3+
pack: codeql/csharp-all
4+
extensible: summaryModel
5+
data:
6+
- [ "Models", "ManuallyModelled", False, "HasSummary", "(System.Object)", "", "Argument[0]", "ReturnValue", "value", "manual"]
7+
8+
- addsTo:
9+
pack: codeql/csharp-all
10+
extensible: neutralModel
11+
data:
12+
- [ "Models", "ManuallyModelled", "HasNeutralSummary", "(System.Object)", "summary", "manual"]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import csharp
2+
import utils.modelgenerator.internal.CaptureModels
3+
import TestUtilities.InlineMadTest
4+
5+
module InlineMadTestConfig implements InlineMadTestConfigSig {
6+
string getCapturedModel(Callable c) { result = captureContentFlow(c) }
7+
8+
string getKind() { result = "contentbased-summary" }
9+
}
10+
11+
import InlineMadTest<InlineMadTestConfig>

0 commit comments

Comments
 (0)