@@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) {
127
127
api = returnNodeEnclosingCallable ( ret ) and
128
128
isOwnInstanceAccessNode ( ret )
129
129
) and
130
- result = Printing:: asValueModel ( api , qualifierString ( ) , "ReturnValue" )
130
+ result = Printing:: asLiftedValueModel ( api , qualifierString ( ) , "ReturnValue" )
131
131
}
132
132
133
133
private int accessPathLimit0 ( ) { result = 2 }
@@ -237,7 +237,7 @@ string captureThroughFlow0(
237
237
input = parameterNodeAsInput ( p ) and
238
238
output = getOutput ( returnNodeExt ) and
239
239
input != output and
240
- result = Printing:: asTaintModel ( api , input , output )
240
+ result = Printing:: asLiftedTaintModel ( api , input , output )
241
241
)
242
242
}
243
243
@@ -291,26 +291,257 @@ private string getContent(PropagateContentFlow::AccessPath ap, int i) {
291
291
)
292
292
}
293
293
294
+ /**
295
+ * Gets the MaD string representation of a store step access path.
296
+ */
294
297
private string printStoreAccessPath ( PropagateContentFlow:: AccessPath ap ) {
295
298
result = concat ( int i | | getContent ( ap , i ) , "" order by i )
296
299
}
297
300
301
+ /**
302
+ * Gets the MaD string representation of a read step access path.
303
+ */
298
304
private string printReadAccessPath ( PropagateContentFlow:: AccessPath ap ) {
299
305
result = concat ( int i | | getContent ( ap , i ) , "" order by i desc )
300
306
}
301
307
302
- string captureContentFlow ( DataFlowSummaryTargetApi api ) {
308
+ /**
309
+ * Holds if the access path `ap` contains a field or synthetic field access.
310
+ */
311
+ private predicate mentionsField ( PropagateContentFlow:: AccessPath ap ) {
312
+ exists ( ContentSet head , PropagateContentFlow:: AccessPath tail |
313
+ head = ap .getHead ( ) and
314
+ tail = ap .getTail ( )
315
+ |
316
+ mentionsField ( tail ) or isField ( head )
317
+ )
318
+ }
319
+
320
+ private predicate apiFlow (
321
+ DataFlowSummaryTargetApi api , DataFlow:: ParameterNode p , PropagateContentFlow:: AccessPath reads ,
322
+ ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath stores , boolean preservesValue
323
+ ) {
324
+ PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
325
+ returnNodeExt .getEnclosingCallable ( ) = api and
326
+ p .getEnclosingCallable ( ) = api
327
+ }
328
+
329
+ /**
330
+ * A class of APIs relevant for modeling using content flow.
331
+ * The following heuristic is applied:
332
+ * Content flow is only relevant for an API, if
333
+ * #content flow <= 2 * #parameters + 3
334
+ * If an API produces more content flow, it is likely that
335
+ * 1. Types are not sufficiently constrained leading to a combinatorial
336
+ * explosion in dispatch and thus in the generated summaries.
337
+ * 2. It is a reasonable approximation to use the non-content based flow
338
+ * detection instead, as reads and stores would use a significant
339
+ * part of an objects internal state.
340
+ */
341
+ private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
342
+ ContentDataFlowSummaryTargetApi ( ) {
343
+ count ( string input , string output |
344
+ exists (
345
+ DataFlow:: ParameterNode p , PropagateContentFlow:: AccessPath reads ,
346
+ ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath stores
347
+ |
348
+ apiFlow ( this , p , reads , returnNodeExt , stores , _) and
349
+ input = parameterNodeAsContentInput ( p ) + printReadAccessPath ( reads ) and
350
+ output = getContentOutput ( returnNodeExt ) + printStoreAccessPath ( stores )
351
+ )
352
+ ) <= 2 * this .getNumberOfParameters ( ) + 3
353
+ }
354
+ }
355
+
356
+ pragma [ nomagic]
357
+ private predicate apiContentFlow (
358
+ ContentDataFlowSummaryTargetApi api , DataFlow:: ParameterNode p ,
359
+ PropagateContentFlow:: AccessPath reads , ReturnNodeExt returnNodeExt ,
360
+ PropagateContentFlow:: AccessPath stores , boolean preservesValue
361
+ ) {
362
+ PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
363
+ returnNodeExt .getEnclosingCallable ( ) = api and
364
+ p .getEnclosingCallable ( ) = api
365
+ }
366
+
367
+ /**
368
+ * Holds if any of the content sets in `path` translates into a synthetic field.
369
+ */
370
+ private predicate hasSyntheticContent ( PropagateContentFlow:: AccessPath path ) {
371
+ exists ( PropagateContentFlow:: AccessPath tail , ContentSet head |
372
+ head = path .getHead ( ) and
373
+ tail = path .getTail ( )
374
+ |
375
+ exists ( getSyntheticName ( head ) ) or
376
+ hasSyntheticContent ( tail )
377
+ )
378
+ }
379
+
380
+ /**
381
+ * A module containing predicates for validating access paths containing content sets
382
+ * that translates into synthetic fields, when used for generated summary models.
383
+ */
384
+ private module AccessPathSyntheticValidation {
385
+ /**
386
+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
387
+ * to `store` (on type `t2`).
388
+ */
389
+ private predicate step (
390
+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
391
+ ) {
392
+ exists ( DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt |
393
+ p .getType ( ) = t1 and
394
+ returnNodeExt .getType ( ) = t2 and
395
+ apiContentFlow ( _, p , read , returnNodeExt , store , _)
396
+ )
397
+ }
398
+
399
+ /**
400
+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
401
+ * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does.
402
+ *
403
+ * Step A -> Synth.
404
+ */
405
+ private predicate synthPathEntry (
406
+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
407
+ ) {
408
+ not hasSyntheticContent ( read ) and
409
+ hasSyntheticContent ( store ) and
410
+ step ( t1 , read , t2 , store )
411
+ }
412
+
413
+ /**
414
+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
415
+ * to `store` (on type `t2`), where `read` has synthetic content
416
+ * and `store` does not.
417
+ *
418
+ * Step Synth -> A.
419
+ */
420
+ private predicate synthPathExit (
421
+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
422
+ ) {
423
+ hasSyntheticContent ( read ) and
424
+ not hasSyntheticContent ( store ) and
425
+ step ( t1 , read , t2 , store )
426
+ }
427
+
428
+ /**
429
+ * Holds if there exists a path of steps from `read` to an exit.
430
+ *
431
+ * read ->* Synth -> A
432
+ */
433
+ private predicate reachesSynthExit ( Type t , PropagateContentFlow:: AccessPath read ) {
434
+ synthPathExit ( t , read , _, _)
435
+ or
436
+ hasSyntheticContent ( read ) and
437
+ exists ( PropagateContentFlow:: AccessPath mid , Type midType |
438
+ hasSyntheticContent ( mid ) and
439
+ step ( t , read , midType , mid ) and
440
+ reachesSynthExit ( midType , mid .reverse ( ) )
441
+ )
442
+ }
443
+
444
+ /**
445
+ * Holds if there exists a path of steps from an entry to `store`.
446
+ *
447
+ * A -> Synth ->* store
448
+ */
449
+ private predicate synthEntryReaches ( Type t , PropagateContentFlow:: AccessPath store ) {
450
+ synthPathEntry ( _, _, t , store )
451
+ or
452
+ hasSyntheticContent ( store ) and
453
+ exists ( PropagateContentFlow:: AccessPath mid , Type midType |
454
+ hasSyntheticContent ( mid ) and
455
+ step ( midType , mid , t , store ) and
456
+ synthEntryReaches ( midType , mid .reverse ( ) )
457
+ )
458
+ }
459
+
460
+ /**
461
+ * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`)
462
+ * contain content that will be translated into a synthetic field, when being used in
463
+ * a MaD summary model, and if there is a range of APIs, such that
464
+ * when chaining their flow access paths, there exists access paths `A` and `B` where
465
+ * A ->* read -> store ->* B and where `A` and `B` do not contain content that will
466
+ * be translated into a synthetic field.
467
+ *
468
+ * This is needed because we don't want to include summaries that reads from or
469
+ * stores into a "dead" synthetic field.
470
+ *
471
+ * Example:
472
+ * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and
473
+ * `setX`, which gets and sets a private field `X` on `t`.
474
+ * This would lead to the following content flows
475
+ * getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
476
+ * setX : Argument[0] -> Argument[this].SyntheticField[t.X]
477
+ * As the reads and stores are on synthetic fields we should only make summaries
478
+ * if both of these methods exist.
479
+ */
480
+ pragma [ nomagic]
481
+ predicate acceptReadStore (
482
+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
483
+ ) {
484
+ synthPathEntry ( t1 , read , t2 , store ) and reachesSynthExit ( t2 , store .reverse ( ) )
485
+ or
486
+ exists ( PropagateContentFlow:: AccessPath store0 | store0 .reverse ( ) = read |
487
+ synthEntryReaches ( t1 , store0 ) and synthPathExit ( t1 , read , t2 , store )
488
+ or
489
+ synthEntryReaches ( t1 , store0 ) and
490
+ step ( t1 , read , t2 , store ) and
491
+ reachesSynthExit ( t2 , store .reverse ( ) )
492
+ )
493
+ }
494
+ }
495
+
496
+ /**
497
+ * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
498
+ * Flow is considered relevant,
499
+ * 1. If `read` or `store` do not contain a content set that translates into a synthetic field.
500
+ * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if
501
+ * the synthetic content is "live" on the relevant declaring type.
502
+ */
503
+ private predicate apiRelevantContentFlow (
504
+ ContentDataFlowSummaryTargetApi api , DataFlow:: ParameterNode p ,
505
+ PropagateContentFlow:: AccessPath read , ReturnNodeExt returnNodeExt ,
506
+ PropagateContentFlow:: AccessPath store , boolean preservesValue
507
+ ) {
508
+ apiContentFlow ( api , p , read , returnNodeExt , store , preservesValue ) and
509
+ (
510
+ not hasSyntheticContent ( read ) and not hasSyntheticContent ( store )
511
+ or
512
+ AccessPathSyntheticValidation:: acceptReadStore ( p .getType ( ) , read , returnNodeExt .getType ( ) , store )
513
+ )
514
+ }
515
+
516
+ pragma [ nomagic]
517
+ private predicate captureContentFlow0 (
518
+ ContentDataFlowSummaryTargetApi api , string input , string output , boolean preservesValue ,
519
+ boolean lift
520
+ ) {
303
521
exists (
304
- DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt , string input , string output ,
305
- PropagateContentFlow:: AccessPath reads , PropagateContentFlow:: AccessPath stores ,
306
- boolean preservesValue
522
+ DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath reads ,
523
+ PropagateContentFlow:: AccessPath stores
307
524
|
308
- PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
309
- returnNodeExt .getEnclosingCallable ( ) = api and
525
+ apiRelevantContentFlow ( api , p , reads , returnNodeExt , stores , preservesValue ) and
310
526
input = parameterNodeAsContentInput ( p ) + printReadAccessPath ( reads ) and
311
527
output = getContentOutput ( returnNodeExt ) + printStoreAccessPath ( stores ) and
312
528
input != output and
313
- result = Printing:: asModel ( api , input , output , preservesValue )
529
+ ( if mentionsField ( reads ) or mentionsField ( stores ) then lift = false else lift = true )
530
+ )
531
+ }
532
+
533
+ /**
534
+ * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
535
+ * the return value or a parameter).
536
+ *
537
+ * Models are lifted to the best type in case the read and store access paths do not
538
+ * contain a field or synthetic field access.
539
+ */
540
+ string captureContentFlow ( ContentDataFlowSummaryTargetApi api ) {
541
+ exists ( string input , string output , boolean lift , boolean preservesValue |
542
+ captureContentFlow0 ( api , input , output , _, lift ) and
543
+ preservesValue = max ( boolean p | captureContentFlow0 ( api , input , output , p , lift ) ) and
544
+ result = Printing:: asModel ( api , input , output , preservesValue , lift )
314
545
)
315
546
}
316
547
0 commit comments