Skip to content

Commit d2c98c8

Browse files
committed
Java: Improve content based model generation.
1 parent 7c0101a commit d2c98c8

File tree

4 files changed

+294
-29
lines changed

4 files changed

+294
-29
lines changed

java/ql/src/utils/modelgenerator/internal/CaptureModels.qll

Lines changed: 240 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) {
127127
api = returnNodeEnclosingCallable(ret) and
128128
isOwnInstanceAccessNode(ret)
129129
) and
130-
result = Printing::asValueModel(api, qualifierString(), "ReturnValue")
130+
result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue")
131131
}
132132

133133
private int accessPathLimit0() { result = 2 }
@@ -237,7 +237,7 @@ string captureThroughFlow0(
237237
input = parameterNodeAsInput(p) and
238238
output = getOutput(returnNodeExt) and
239239
input != output and
240-
result = Printing::asTaintModel(api, input, output)
240+
result = Printing::asLiftedTaintModel(api, input, output)
241241
)
242242
}
243243

@@ -291,26 +291,259 @@ private string getContent(PropagateContentFlow::AccessPath ap, int i) {
291291
)
292292
}
293293

294+
/**
295+
* Gets the MaD string representation of a store step access path.
296+
*/
294297
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
295298
result = concat(int i | | getContent(ap, i), "" order by i)
296299
}
297300

301+
/**
302+
* Gets the MaD string representation of a read step access path.
303+
*/
298304
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
299305
result = concat(int i | | getContent(ap, i), "" order by i desc)
300306
}
301307

302-
string captureContentFlow(DataFlowSummaryTargetApi api) {
308+
/**
309+
* Holds if the access path `ap` contains a field or synthetic field access.
310+
*/
311+
private predicate mentionsField(PropagateContentFlow::AccessPath ap) {
312+
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
313+
head = ap.getHead() and
314+
tail = ap.getTail() and
315+
(mentionsField(tail) or isField(head))
316+
)
317+
}
318+
319+
private predicate apiFlow(
320+
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
321+
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue
322+
) {
323+
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
324+
returnNodeExt.getEnclosingCallable() = api and
325+
p.getEnclosingCallable() = api
326+
}
327+
328+
/**
329+
* A class of APIs relevant for modeling using content flow.
330+
* The following heuristic is applied:
331+
* Content flow is only relevant for an API, if
332+
* #content flow <= 2 * #parameters + 3
333+
* If an API produces more content flow, it is likely that
334+
* 1. Types are not sufficiently constrained leading to a combinatorial
335+
* explosion in dispatch and thus in the generated summaries.
336+
* 2. It is a reasonable approximation to use the non-content based flow
337+
* detection instead, as reads and stores would use a significant
338+
* part of an objects internal state.
339+
*/
340+
private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
341+
ContentDataFlowSummaryTargetApi() {
342+
count(string input, string output |
343+
exists(
344+
DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
345+
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores
346+
|
347+
apiFlow(this, p, reads, returnNodeExt, stores, _) and
348+
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
349+
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores)
350+
)
351+
) <= 2 * this.getNumberOfParameters() + 3
352+
}
353+
}
354+
355+
pragma[nomagic]
356+
private predicate apiContentFlow(
357+
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
358+
PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt,
359+
PropagateContentFlow::AccessPath stores, boolean preservesValue
360+
) {
361+
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
362+
returnNodeExt.getEnclosingCallable() = api and
363+
p.getEnclosingCallable() = api
364+
}
365+
366+
/**
367+
* Holds if any of the content sets in `path` translates into a synthetic field.
368+
*/
369+
private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) {
370+
exists(PropagateContentFlow::AccessPath tail, ContentSet head |
371+
head = path.getHead() and
372+
tail = path.getTail() and
373+
(
374+
exists(getSyntheticName(head)) or
375+
hasSyntheticContent(tail)
376+
)
377+
)
378+
}
379+
380+
/**
381+
* A module containing predicates for validating access paths containing content sets
382+
* that translates into synthetic fields, when used for generated summary models.
383+
*/
384+
private module AccessPathSyntheticValidation {
385+
/**
386+
* Holds if there exists an API that has content flow from `read` (on type `t1`)
387+
* to `store` (on type `t2`).
388+
*/
389+
private predicate step(
390+
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
391+
) {
392+
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
393+
p.getType() = t1 and
394+
returnNodeExt.getType() = t2 and
395+
apiContentFlow(_, p, read, returnNodeExt, store, _)
396+
)
397+
}
398+
399+
/**
400+
* Holds if there exists an API that has content flow from `read` (on type `t1`)
401+
* to `store` (on type `t2`), where `read` does not have synthetic content and `store` does.
402+
*
403+
* Step A -> Synth.
404+
*/
405+
private predicate synthPathEntry(
406+
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
407+
) {
408+
not hasSyntheticContent(read) and
409+
hasSyntheticContent(store) and
410+
step(t1, read, t2, store)
411+
}
412+
413+
/**
414+
* Holds if there exists an API that has content flow from `read` (on type `t1`)
415+
* to `store` (on type `t2`), where `read` has synthetic content
416+
* and `store` does not.
417+
*
418+
* Step Synth -> A.
419+
*/
420+
private predicate synthPathExit(
421+
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
422+
) {
423+
hasSyntheticContent(read) and
424+
not hasSyntheticContent(store) and
425+
step(t1, read, t2, store)
426+
}
427+
428+
/**
429+
* Takes one or more synthetic steps.
430+
* Synth ->+ Synth
431+
*/
432+
private predicate synthPathStepRec(
433+
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
434+
) {
435+
hasSyntheticContent(read) and
436+
hasSyntheticContent(store) and
437+
(
438+
step(t1, read, t2, store)
439+
or
440+
exists(PropagateContentFlow::AccessPath mid, Type midType |
441+
step(t1, read, midType, mid) and synthPathStepRec(midType, mid.reverse(), t2, store)
442+
)
443+
)
444+
}
445+
446+
/**
447+
* Holds if there exists a path of steps from `read` to an exit.
448+
*
449+
* read ->* Synth -> A
450+
*/
451+
private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) {
452+
synthPathExit(t, read, _, _)
453+
or
454+
exists(PropagateContentFlow::AccessPath mid, Type midType |
455+
synthPathStepRec(t, read, midType, mid) and synthPathExit(midType, mid.reverse(), _, _)
456+
)
457+
}
458+
459+
/**
460+
* Holds if there exists a path of steps from an entry to `store`.
461+
*
462+
* A -> Synth ->* store
463+
*/
464+
private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) {
465+
synthPathEntry(_, _, t, store)
466+
or
467+
exists(PropagateContentFlow::AccessPath mid, Type midType |
468+
synthPathEntry(_, _, midType, mid) and synthPathStepRec(midType, mid.reverse(), t, store)
469+
)
470+
}
471+
472+
/**
473+
* Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`)
474+
* contain content that will be translated into a synthetic field, when being used in
475+
* a MaD summary model, and if there is a range of APIs, such that
476+
* when chaining their flow access paths, there exists access paths `A` and `B` where
477+
* A ->* read -> store ->* B and where `A` and `B` do not contain content that will
478+
* be translated into a synthetic field.
479+
*
480+
* This is needed because we don't want to include summaries that reads from or
481+
* stores into a "dead" synthetic field.
482+
*
483+
* Example:
484+
* Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and
485+
* `setX`, which gets and sets a private field `X` on `t`.
486+
* This would lead to the following content flows
487+
* getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
488+
* setX : Argument[0] -> Argument[this].SyntheticField[t.X]
489+
* As the reads and stores are on synthetic fields we should only make summaries
490+
* if both of these methods exist.
491+
*/
492+
pragma[nomagic]
493+
predicate acceptReadStore(
494+
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
495+
) {
496+
synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse())
497+
or
498+
exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read |
499+
synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store)
500+
or
501+
synthEntryReaches(t1, store0) and
502+
step(t1, read, t2, store) and
503+
reachesSynthExit(t2, store.reverse())
504+
)
505+
}
506+
}
507+
508+
/**
509+
* Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
510+
* Flow is considered relevant,
511+
* 1. If `read` or `store` do not contain a content set that translates into a synthetic field.
512+
* 2. If `read` or `store` contain a content set that translates into a synthetic field, and if
513+
* the synthetic content is "live" on the relevant declaring type.
514+
*/
515+
private predicate apiRelevantContentFlow(
516+
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
517+
PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt,
518+
PropagateContentFlow::AccessPath store, boolean preservesValue
519+
) {
520+
apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and
521+
(
522+
not hasSyntheticContent(read) and not hasSyntheticContent(store)
523+
or
524+
AccessPathSyntheticValidation::acceptReadStore(p.getType(), read, returnNodeExt.getType(), store)
525+
)
526+
}
527+
528+
/**
529+
* Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
530+
* the return value or a parameter).
531+
*
532+
* Models are lifted to the best type in case the read and store access paths do not
533+
* contain a field or synthetic field access.
534+
*/
535+
string captureContentFlow(ContentDataFlowSummaryTargetApi api) {
303536
exists(
304537
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output,
305538
PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores,
306-
boolean preservesValue
539+
boolean preservesValue, boolean lift
307540
|
308-
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
309-
returnNodeExt.getEnclosingCallable() = api and
541+
apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and
310542
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
311543
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
312544
input != output and
313-
result = Printing::asModel(api, input, output, preservesValue)
545+
(if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) and
546+
result = Printing::asModel(api, input, output, preservesValue, lift)
314547
)
315548
}
316549

java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -340,16 +340,35 @@ predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2
340340
}
341341

342342
/**
343-
* Gets the MaD string representation of the contentset `c`.
343+
* Holds if the content set `c` is a field or a synthetic field.
344+
*/
345+
predicate isField(ContentSet c) {
346+
c instanceof DataFlowUtil::FieldContent or
347+
c instanceof DataFlowUtil::SyntheticFieldContent
348+
}
349+
350+
/**
351+
* Gets the MaD synthetic name string representation for the content set `c`, if any.
352+
*/
353+
string getSyntheticName(DataFlow::ContentSet c) {
354+
exists(Field f |
355+
not f.isPublic() and
356+
f = c.(DataFlowUtil::FieldContent).getField() and
357+
result = f.getQualifiedName()
358+
)
359+
or
360+
result = c.(DataFlowUtil::SyntheticFieldContent).getField()
361+
}
362+
363+
/**
364+
* Gets the MaD string representation of the content set `c`.
344365
*/
345366
string printContent(ContentSet c) {
346-
exists(Field f, string name |
347-
f = c.(DataFlowUtil::FieldContent).getField() and name = f.getQualifiedName()
348-
|
349-
if f.isPublic() then result = "Field[" + name + "]" else result = "SyntheticField[" + name + "]"
367+
exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() |
368+
result = "Field[" + f.getQualifiedName() + "]"
350369
)
351370
or
352-
result = "SyntheticField[" + c.(DataFlowUtil::SyntheticFieldContent).getField() + "]"
371+
result = "SyntheticField[" + getSyntheticName(c) + "]"
353372
or
354373
c instanceof DataFlowUtil::CollectionContent and result = "Element"
355374
or

java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi {
329329
output(this, tv, output) and
330330
input != output
331331
|
332-
result = Printing::asValueModel(this, input, output)
332+
result = Printing::asLiftedValueModel(this, input, output)
333333
)
334334
}
335335
}

0 commit comments

Comments
 (0)