Skip to content

Commit 4a9e3ee

Browse files
authored
Merge pull request #17363 from michaelnebel/modelgen/fieldbasedimprovements
C#/Java: Content based model generation improvements.
2 parents cfa4cb4 + 68165bb commit 4a9e3ee

File tree

17 files changed

+875
-75
lines changed

17 files changed

+875
-75
lines changed

csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,12 @@ class ContentSet extends TContentSet {
299299
*/
300300
predicate isProperty(Property p) { this = TPropertyContentSet(p) }
301301

302-
/** Holds if this content set represent the field `f`. */
302+
/** Holds if this content set represents the field `f`. */
303303
predicate isField(Field f) { this.isSingleton(TFieldContent(f)) }
304304

305+
/** Holds if this content set represents the synthetic field `s`. */
306+
predicate isSyntheticField(string s) { this.isSingleton(TSyntheticFieldContent(s)) }
307+
305308
/** Holds if this content set represents an element in a collection. */
306309
predicate isElement() { this.isSingleton(TElementContent()) }
307310

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/**
2+
* @name Capture content based summary models.
3+
* @description Finds applicable content based summary models to be used by other queries.
4+
* @kind diagnostic
5+
* @id cs/utils/modelgenerator/contentbased-summary-models
6+
* @tags modelgenerator
7+
*/
8+
9+
import internal.CaptureModels
10+
11+
from DataFlowSummaryTargetApi api, string flow
12+
where flow = captureContentFlow(api)
13+
select flow order by flow

csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll

Lines changed: 240 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) {
127127
api = returnNodeEnclosingCallable(ret) and
128128
isOwnInstanceAccessNode(ret)
129129
) and
130-
result = Printing::asValueModel(api, qualifierString(), "ReturnValue")
130+
result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue")
131131
}
132132

133133
private int accessPathLimit0() { result = 2 }
@@ -237,7 +237,7 @@ string captureThroughFlow0(
237237
input = parameterNodeAsInput(p) and
238238
output = getOutput(returnNodeExt) and
239239
input != output and
240-
result = Printing::asTaintModel(api, input, output)
240+
result = Printing::asLiftedTaintModel(api, input, output)
241241
)
242242
}
243243

@@ -291,26 +291,257 @@ private string getContent(PropagateContentFlow::AccessPath ap, int i) {
291291
)
292292
}
293293

294+
/**
295+
* Gets the MaD string representation of a store step access path.
296+
*/
294297
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
295298
result = concat(int i | | getContent(ap, i), "" order by i)
296299
}
297300

301+
/**
302+
* Gets the MaD string representation of a read step access path.
303+
*/
298304
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
299305
result = concat(int i | | getContent(ap, i), "" order by i desc)
300306
}
301307

302-
string captureContentFlow(DataFlowSummaryTargetApi api) {
308+
/**
309+
* Holds if the access path `ap` contains a field or synthetic field access.
310+
*/
311+
private predicate mentionsField(PropagateContentFlow::AccessPath ap) {
312+
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
313+
head = ap.getHead() and
314+
tail = ap.getTail()
315+
|
316+
mentionsField(tail) or isField(head)
317+
)
318+
}
319+
320+
private predicate apiFlow(
321+
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
322+
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue
323+
) {
324+
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
325+
returnNodeExt.getEnclosingCallable() = api and
326+
p.getEnclosingCallable() = api
327+
}
328+
329+
/**
330+
* A class of APIs relevant for modeling using content flow.
331+
* The following heuristic is applied:
332+
* Content flow is only relevant for an API, if
333+
* #content flow <= 2 * #parameters + 3
334+
* If an API produces more content flow, it is likely that
335+
* 1. Types are not sufficiently constrained leading to a combinatorial
336+
* explosion in dispatch and thus in the generated summaries.
337+
* 2. It is a reasonable approximation to use the non-content based flow
338+
* detection instead, as reads and stores would use a significant
339+
* part of an objects internal state.
340+
*/
341+
private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
342+
ContentDataFlowSummaryTargetApi() {
343+
count(string input, string output |
344+
exists(
345+
DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
346+
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores
347+
|
348+
apiFlow(this, p, reads, returnNodeExt, stores, _) and
349+
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
350+
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores)
351+
)
352+
) <= 2 * this.getNumberOfParameters() + 3
353+
}
354+
}
355+
356+
pragma[nomagic]
357+
private predicate apiContentFlow(
358+
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
359+
PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt,
360+
PropagateContentFlow::AccessPath stores, boolean preservesValue
361+
) {
362+
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
363+
returnNodeExt.getEnclosingCallable() = api and
364+
p.getEnclosingCallable() = api
365+
}
366+
367+
/**
368+
* Holds if any of the content sets in `path` translates into a synthetic field.
369+
*/
370+
private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) {
371+
exists(PropagateContentFlow::AccessPath tail, ContentSet head |
372+
head = path.getHead() and
373+
tail = path.getTail()
374+
|
375+
exists(getSyntheticName(head)) or
376+
hasSyntheticContent(tail)
377+
)
378+
}
379+
380+
/**
381+
* A module containing predicates for validating access paths containing content sets
382+
* that translates into synthetic fields, when used for generated summary models.
383+
*/
384+
private module AccessPathSyntheticValidation {
385+
/**
386+
* Holds if there exists an API that has content flow from `read` (on type `t1`)
387+
* to `store` (on type `t2`).
388+
*/
389+
private predicate step(
390+
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
391+
) {
392+
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
393+
p.getType() = t1 and
394+
returnNodeExt.getType() = t2 and
395+
apiContentFlow(_, p, read, returnNodeExt, store, _)
396+
)
397+
}
398+
399+
/**
400+
* Holds if there exists an API that has content flow from `read` (on type `t1`)
401+
* to `store` (on type `t2`), where `read` does not have synthetic content and `store` does.
402+
*
403+
* Step A -> Synth.
404+
*/
405+
private predicate synthPathEntry(
406+
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
407+
) {
408+
not hasSyntheticContent(read) and
409+
hasSyntheticContent(store) and
410+
step(t1, read, t2, store)
411+
}
412+
413+
/**
414+
* Holds if there exists an API that has content flow from `read` (on type `t1`)
415+
* to `store` (on type `t2`), where `read` has synthetic content
416+
* and `store` does not.
417+
*
418+
* Step Synth -> A.
419+
*/
420+
private predicate synthPathExit(
421+
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
422+
) {
423+
hasSyntheticContent(read) and
424+
not hasSyntheticContent(store) and
425+
step(t1, read, t2, store)
426+
}
427+
428+
/**
429+
* Holds if there exists a path of steps from `read` to an exit.
430+
*
431+
* read ->* Synth -> A
432+
*/
433+
private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) {
434+
synthPathExit(t, read, _, _)
435+
or
436+
hasSyntheticContent(read) and
437+
exists(PropagateContentFlow::AccessPath mid, Type midType |
438+
hasSyntheticContent(mid) and
439+
step(t, read, midType, mid) and
440+
reachesSynthExit(midType, mid.reverse())
441+
)
442+
}
443+
444+
/**
445+
* Holds if there exists a path of steps from an entry to `store`.
446+
*
447+
* A -> Synth ->* store
448+
*/
449+
private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) {
450+
synthPathEntry(_, _, t, store)
451+
or
452+
hasSyntheticContent(store) and
453+
exists(PropagateContentFlow::AccessPath mid, Type midType |
454+
hasSyntheticContent(mid) and
455+
step(midType, mid, t, store) and
456+
synthEntryReaches(midType, mid.reverse())
457+
)
458+
}
459+
460+
/**
461+
* Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`)
462+
* contain content that will be translated into a synthetic field, when being used in
463+
* a MaD summary model, and if there is a range of APIs, such that
464+
* when chaining their flow access paths, there exists access paths `A` and `B` where
465+
* A ->* read -> store ->* B and where `A` and `B` do not contain content that will
466+
* be translated into a synthetic field.
467+
*
468+
* This is needed because we don't want to include summaries that reads from or
469+
* stores into a "dead" synthetic field.
470+
*
471+
* Example:
472+
* Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and
473+
* `setX`, which gets and sets a private field `X` on `t`.
474+
* This would lead to the following content flows
475+
* getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
476+
* setX : Argument[0] -> Argument[this].SyntheticField[t.X]
477+
* As the reads and stores are on synthetic fields we should only make summaries
478+
* if both of these methods exist.
479+
*/
480+
pragma[nomagic]
481+
predicate acceptReadStore(
482+
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
483+
) {
484+
synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse())
485+
or
486+
exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read |
487+
synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store)
488+
or
489+
synthEntryReaches(t1, store0) and
490+
step(t1, read, t2, store) and
491+
reachesSynthExit(t2, store.reverse())
492+
)
493+
}
494+
}
495+
496+
/**
497+
* Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
498+
* Flow is considered relevant,
499+
* 1. If `read` or `store` do not contain a content set that translates into a synthetic field.
500+
* 2. If `read` or `store` contain a content set that translates into a synthetic field, and if
501+
* the synthetic content is "live" on the relevant declaring type.
502+
*/
503+
private predicate apiRelevantContentFlow(
504+
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
505+
PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt,
506+
PropagateContentFlow::AccessPath store, boolean preservesValue
507+
) {
508+
apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and
509+
(
510+
not hasSyntheticContent(read) and not hasSyntheticContent(store)
511+
or
512+
AccessPathSyntheticValidation::acceptReadStore(p.getType(), read, returnNodeExt.getType(), store)
513+
)
514+
}
515+
516+
pragma[nomagic]
517+
private predicate captureContentFlow0(
518+
ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue,
519+
boolean lift
520+
) {
303521
exists(
304-
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output,
305-
PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores,
306-
boolean preservesValue
522+
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath reads,
523+
PropagateContentFlow::AccessPath stores
307524
|
308-
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
309-
returnNodeExt.getEnclosingCallable() = api and
525+
apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and
310526
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
311527
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
312528
input != output and
313-
result = Printing::asModel(api, input, output, preservesValue)
529+
(if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true)
530+
)
531+
}
532+
533+
/**
534+
* Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
535+
* the return value or a parameter).
536+
*
537+
* Models are lifted to the best type in case the read and store access paths do not
538+
* contain a field or synthetic field access.
539+
*/
540+
string captureContentFlow(ContentDataFlowSummaryTargetApi api) {
541+
exists(string input, string output, boolean lift, boolean preservesValue |
542+
captureContentFlow0(api, input, output, _, lift) and
543+
preservesValue = max(boolean p | captureContentFlow0(api, input, output, p, lift)) and
544+
result = Printing::asModel(api, input, output, preservesValue, lift)
314545
)
315546
}
316547

csharp/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -390,23 +390,47 @@ private string getFullyQualifiedName(Declaration d) {
390390
}
391391

392392
/**
393-
* Gets the MaD string representation of the contentset `c`.
393+
* Holds if the content set `c` is a field, property or synthetic field.
394+
*/
395+
predicate isField(ContentSet c) { c.isField(_) or c.isSyntheticField(_) or c.isProperty(_) }
396+
397+
/**
398+
* Gets the MaD synthetic name string representation for the content set `c`, if any.
399+
*/
400+
string getSyntheticName(DataFlow::ContentSet c) {
401+
exists(CS::Field f |
402+
not f.isEffectivelyPublic() and
403+
c.isField(f) and
404+
result = getFullyQualifiedName(f)
405+
)
406+
or
407+
exists(CS::Property p |
408+
not p.isEffectivelyPublic() and
409+
c.isProperty(p) and
410+
result = getFullyQualifiedName(p)
411+
)
412+
or
413+
c.isSyntheticField(result)
414+
}
415+
416+
/**
417+
* Gets the MaD string representation of the content set `c`.
394418
*/
395419
string printContent(DataFlow::ContentSet c) {
396420
exists(CS::Field f, string name | name = getFullyQualifiedName(f) |
397421
c.isField(f) and
398-
if f.isEffectivelyPublic()
399-
then result = "Field[" + name + "]"
400-
else result = "SyntheticField[" + name + "]"
422+
f.isEffectivelyPublic() and
423+
result = "Field[" + name + "]"
401424
)
402425
or
403426
exists(CS::Property p, string name | name = getFullyQualifiedName(p) |
404427
c.isProperty(p) and
405-
if p.isEffectivelyPublic()
406-
then result = "Property[" + name + "]"
407-
else result = "SyntheticField[" + name + "]"
428+
p.isEffectivelyPublic() and
429+
result = "Property[" + name + "]"
408430
)
409431
or
432+
result = "SyntheticField[" + getSyntheticName(c) + "]"
433+
or
410434
c.isElement() and
411435
result = "Element"
412436
}

csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi {
223223
output(this, tp, output) and
224224
input != output
225225
|
226-
result = Printing::asValueModel(this, input, output)
226+
result = Printing::asLiftedValueModel(this, input, output)
227227
)
228228
}
229229
}

0 commit comments

Comments
 (0)